PageRenderTime 47ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/library/PEAR/XML/Beautifier/Tokenizer.php

https://github.com/vinnivinsachi/Vincent-DR
PHP | 372 lines | 188 code | 33 blank | 151 comment | 22 complexity | ce7e636f4bacc6763238f4cefc9cf219 MD5 | raw file
  1. <?PHP
  2. /* vim: set expandtab tabstop=4 shiftwidth=4: */
  3. // +----------------------------------------------------------------------+
  4. // | PHP Version 4 |
  5. // +----------------------------------------------------------------------+
  6. // | Copyright (c) 1997-2002 The PHP Group |
  7. // +----------------------------------------------------------------------+
  8. // | This source file is subject to version 2.0 of the PHP license, |
  9. // | that is bundled with this package in the file LICENSE, and is |
  10. // | available at through the world-wide-web at |
  11. // | http://www.php.net/license/2_02.txt. |
  12. // | If you did not receive a copy of the PHP license and are unable to |
  13. // | obtain it through the world-wide-web, please send a note to |
  14. // | license@php.net so we can mail you a copy immediately. |
  15. // +----------------------------------------------------------------------+
  16. // | Authors: Stephan Schmidt <schst@php.net> |
  17. // +----------------------------------------------------------------------+
  18. /**
  19. * XML/Beautifier/Tokenizer.php
  20. *
  21. * @category XML
  22. * @package XML_Beautifier
  23. * @author Stephan Schmidt <schst@php.net>
  24. * @todo tokenize DTD
  25. * @todo check for xml:space attribute
  26. */
  27. /**
  28. * XML_Parser is needed to parse the document
  29. */
  30. require_once 'XML/Parser.php';
  31. /**
  32. * Tokenizer for XML_Beautifier
  33. *
  34. * This class breaks an XML document in seperate tokens
  35. * that will be rendered by an XML_Beautifier renderer.
  36. *
  37. * @category XML
  38. * @package XML_Beautifier
  39. * @author Stephan Schmidt <schst@php.net>
  40. */
  41. class XML_Beautifier_Tokenizer extends XML_Parser {
  42. /**
  43. * current depth
  44. * @var integer
  45. * @access private
  46. */
  47. var $_depth = 0;
  48. /**
  49. * stack for all found elements
  50. * @var array
  51. * @access private
  52. */
  53. var $_struct = array();
  54. /**
  55. * current parsing mode
  56. * @var string
  57. * @access private
  58. */
  59. var $_mode = "xml";
  60. /**
  61. * Tokenize a document
  62. *
  63. * @param string $document filename or XML document
  64. * @param boolean $isFile flag to indicate whether the first parameter is a file
  65. */
  66. function tokenize( $document, $isFile = true )
  67. {
  68. $this->folding = false;
  69. $this->XML_Parser();
  70. $this->_resetVars();
  71. if( $isFile === true ) {
  72. $this->setInputFile($document);
  73. $result = $this->parse();
  74. }
  75. else {
  76. $result = $this->parseString($document);
  77. }
  78. if ($this->isError($result)) {
  79. return $result;
  80. }
  81. return $this->_struct;
  82. }
  83. /**
  84. * Start element handler for XML parser
  85. *
  86. * @access protected
  87. * @param object $parser XML parser object
  88. * @param string $element XML element
  89. * @param array $attribs attributes of XML tag
  90. * @return void
  91. */
  92. function startHandler($parser, $element, $attribs)
  93. {
  94. $struct = array(
  95. "type" => XML_BEAUTIFIER_ELEMENT,
  96. "tagname" => $element,
  97. "attribs" => $attribs,
  98. "contains" => XML_BEAUTIFIER_EMPTY,
  99. "depth" => $this->_depth++,
  100. "children" => array()
  101. );
  102. array_push($this->_struct,$struct);
  103. }
  104. /**
  105. * End element handler for XML parser
  106. *
  107. * @access protected
  108. * @param object XML parser object
  109. * @param string
  110. * @return void
  111. */
  112. function endHandler($parser, $element)
  113. {
  114. $struct = array_pop($this->_struct);
  115. if ($struct["depth"] > 0) {
  116. $parent = array_pop($this->_struct);
  117. array_push($parent["children"], $struct);
  118. $parent["contains"] = $parent["contains"] | XML_BEAUTIFIER_ELEMENT;
  119. array_push($this->_struct, $parent);
  120. } else {
  121. array_push($this->_struct, $struct);
  122. }
  123. $this->_depth--;
  124. }
  125. /**
  126. * Handler for character data
  127. *
  128. * @access protected
  129. * @param object XML parser object
  130. * @param string CDATA
  131. * @return void
  132. */
  133. function cdataHandler($parser, $cdata)
  134. {
  135. if ((string)$cdata === '') {
  136. return true;
  137. }
  138. $struct = array(
  139. "type" => XML_BEAUTIFIER_CDATA,
  140. "data" => $cdata,
  141. "depth" => $this->_depth
  142. );
  143. $this->_appendToParent($struct);
  144. }
  145. /**
  146. * Handler for processing instructions
  147. *
  148. * @access protected
  149. * @param object XML parser object
  150. * @param string target
  151. * @param string data
  152. * @return void
  153. */
  154. function piHandler($parser, $target, $data)
  155. {
  156. $struct = array(
  157. "type" => XML_BEAUTIFIER_PI,
  158. "target" => $target,
  159. "data" => $data,
  160. "depth" => $this->_depth
  161. );
  162. $this->_appendToParent($struct);
  163. }
  164. /**
  165. * Handler for external entities
  166. *
  167. * @access protected
  168. * @param object XML parser object
  169. * @param string target
  170. * @param string data
  171. * @return void
  172. */
  173. function entityrefHandler($parser, $open_entity_names, $base, $system_id, $public_id)
  174. {
  175. $struct = array(
  176. "type" => XML_BEAUTIFIER_ENTITY,
  177. "name" => $open_entity_names,
  178. "depth" => $this->_depth
  179. );
  180. $this->_appendToParent($struct);
  181. return true;
  182. }
  183. /**
  184. * Handler for all other stuff
  185. *
  186. * @access protected
  187. * @param object XML parser object
  188. * @param string data
  189. * @return void
  190. */
  191. function defaultHandler($parser, $data)
  192. {
  193. switch ($this->_mode) {
  194. case "xml":
  195. $this->_handleXMLDefault($data);
  196. break;
  197. case "doctype":
  198. $this->_handleDoctype($data);
  199. break;
  200. }
  201. }
  202. /**
  203. * handler for all data inside the doctype declaration
  204. *
  205. * @access private
  206. * @param string data
  207. * @todo improve doctype parsing to split the declaration into seperate tokens
  208. */
  209. function _handleDoctype($data)
  210. {
  211. if (eregi(">", $data)) {
  212. $last = $this->_getLastToken();
  213. if ($last["data"] == "]" ) {
  214. $this->_mode = "xml";
  215. }
  216. }
  217. $struct = array(
  218. "type" => XML_BEAUTIFIER_DT_DECLARATION,
  219. "data" => $data,
  220. "depth" => $this->_depth
  221. );
  222. $this->_appendToParent($struct);
  223. }
  224. /**
  225. * handler for all default XML data
  226. *
  227. * @access private
  228. * @param string data
  229. */
  230. function _handleXMLDefault($data)
  231. {
  232. /*
  233. * handle comment
  234. */
  235. if (strncmp("<!--", $data, 4) == 0) {
  236. $regs = array();
  237. eregi("<!--(.+)-->", $data, $regs);
  238. $comment = trim($regs[1]);
  239. $struct = array(
  240. "type" => XML_BEAUTIFIER_COMMENT,
  241. "data" => $comment,
  242. "depth" => $this->_depth
  243. );
  244. /*
  245. * handle XML declaration
  246. */
  247. } elseif (strncmp("<?", $data, 2) == 0) {
  248. preg_match_all('/([a-zA-Z_]+)="((?:\\\.|[^"\\\])*)"/', $data, $match);
  249. $cnt = count($match[1]);
  250. $attribs = array();
  251. for ($i = 0; $i < $cnt; $i++) {
  252. $attribs[$match[1][$i]] = $match[2][$i];
  253. }
  254. if (!isset($attribs["version"])) {
  255. $attribs["version"] = "1.0";
  256. }
  257. if (!isset($attribs["encoding"])) {
  258. $attribs["encoding"] = "UTF-8";
  259. }
  260. if (!isset($attribs["standalone"])) {
  261. $attribs["standalone"] = true;
  262. }
  263. $struct = array(
  264. "type" => XML_BEAUTIFIER_XML_DECLARATION,
  265. "version" => $attribs["version"],
  266. "encoding" => $attribs["encoding"],
  267. "standalone" => $attribs["standalone"],
  268. "depth" => $this->_depth
  269. );
  270. } elseif (eregi("^<!DOCTYPE", $data)) {
  271. $this->_mode = "doctype";
  272. $struct = array(
  273. "type" => XML_BEAUTIFIER_DT_DECLARATION,
  274. "data" => $data,
  275. "depth" => $this->_depth
  276. );
  277. } else {
  278. /*
  279. * handle all other data
  280. */
  281. $struct = array(
  282. "type" => XML_BEAUTIFIER_DEFAULT,
  283. "data" => $data,
  284. "depth" => $this->_depth
  285. );
  286. }
  287. $this->_appendToParent($struct);
  288. return true;
  289. }
  290. /**
  291. * append a struct to the last struct on the stack
  292. *
  293. * @access private
  294. * @param array $struct structure to append
  295. */
  296. function _appendToParent($struct)
  297. {
  298. if ($this->_depth > 0) {
  299. $parent = array_pop($this->_struct);
  300. array_push($parent["children"], $struct);
  301. $parent["contains"] = $parent["contains"] | $struct["type"];
  302. array_push($this->_struct, $parent);
  303. return true;
  304. }
  305. array_push($this->_struct, $struct);
  306. }
  307. /**
  308. * get the last token
  309. *
  310. * @access private
  311. * @return array
  312. */
  313. function _getLastToken()
  314. {
  315. $parent = array_pop($this->_struct);
  316. if (isset($parent["children"]) && is_array($parent["children"])) {
  317. $last = array_pop($parent["children"]);
  318. array_push($parent["children"], $last);
  319. } else {
  320. $last = $parent;
  321. }
  322. array_push($this->_struct, $parent);
  323. return $last;
  324. }
  325. /**
  326. * reset all used object properties
  327. *
  328. * This method is called before parsing a new document
  329. *
  330. * @access private
  331. */
  332. function _resetVars()
  333. {
  334. $this->_depth = 0;
  335. $this->_struct = array();
  336. $this->_mode = "xml";
  337. }
  338. }
  339. ?>