PageRenderTime 54ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/phpFlickr/xml_saxy_parser.php

http://github.com/gabrys/wikidot
PHP | 1136 lines | 949 code | 42 blank | 145 comment | 38 complexity | 4a073b44ce59dd66407e0fc2e1789604 MD5 | raw file
Possible License(s): LGPL-2.1
  1. <?php
  2. /**
  3. * SAXY is a non-validating, but lightweight and fast SAX parser for PHP, modelled on the Expat parser
  4. * @package saxy-xmlparser
  5. * @subpackage saxy-xmlparser-main
  6. * @version 1.0
  7. * @copyright (C) 2004 John Heinstein. All rights reserved
  8. * @license http://www.gnu.org/copyleft/lesser.html LGPL License
  9. * @author John Heinstein <johnkarl@nbnet.nb.ca>
  10. * @link http://www.engageinteractive.com/saxy/ SAXY Home Page
  11. * SAXY is Free Software
  12. *
  13. * This version was modified by Dan Coulter to bring the base and parser files into
  14. * the same file for the purpose of including it in his project. Visit the SAXY
  15. * Home page listed above to download the full version of this class along with
  16. * documentation.
  17. **/
  18. if (!defined('SAXY_INCLUDE_PATH')) {
  19. define('SAXY_INCLUDE_PATH', (dirname(__FILE__) . "/"));
  20. }
  21. /** current version of SAXY */
  22. define ('SAXY_VERSION', '1.0');
  23. /** default XML namespace */
  24. define ('SAXY_XML_NAMESPACE', 'http://www.w3.org/xml/1998/namespace');
  25. /** saxy parse state, before prolog is encountered */
  26. define('SAXY_STATE_PROLOG_NONE', 0);
  27. /** saxy parse state, in processing instruction */
  28. define('SAXY_STATE_PROLOG_PROCESSINGINSTRUCTION', 1);
  29. /** saxy parse state, an exclamation mark has been encountered */
  30. define('SAXY_STATE_PROLOG_EXCLAMATION', 2);
  31. /** saxy parse state, in DTD */
  32. define('SAXY_STATE_PROLOG_DTD', 3);
  33. /** saxy parse state, an inline DTD */
  34. define('SAXY_STATE_PROLOG_INLINEDTD', 4);
  35. /** saxy parse state, a comment */
  36. define('SAXY_STATE_PROLOG_COMMENT', 5);
  37. /** saxy parse state, processing main document */
  38. define('SAXY_STATE_PARSING', 6);
  39. /** saxy parse state, processing comment in main document */
  40. define('SAXY_STATE_PARSING_COMMENT', 7);
  41. //SAXY error codes; same as EXPAT error codes
  42. /** no error */
  43. define('SAXY_XML_ERROR_NONE', 0);
  44. /** out of memory error */
  45. define('SAXY_XML_ERROR_NO_MEMORY', 1);
  46. /** syntax error */
  47. define('SAXY_XML_ERROR_SYNTAX', 2);
  48. /** no elements in document */
  49. define('SAXY_XML_ERROR_NO_ELEMENTS', 3);
  50. /** invalid token encountered error */
  51. define('SAXY_XML_ERROR_INVALID_TOKEN', 4);
  52. /** unclosed token error */
  53. define('SAXY_XML_ERROR_UNCLOSED_TOKEN', 5);
  54. /** partial character error */
  55. define('SAXY_XML_ERROR_PARTIAL_CHAR', 6);
  56. /** mismatched tag error */
  57. define('SAXY_XML_ERROR_TAG_MISMATCH', 7);
  58. /** duplicate attribute error */
  59. define('SAXY_XML_ERROR_DUPLICATE_ATTRIBUTE', 8);
  60. /** junk after document element error */
  61. define('SAXY_XML_ERROR_JUNK_AFTER_DOC_ELEMENT', 9);
  62. /** parameter enitity reference error */
  63. define('SAXY_XML_ERROR_PARAM_ENTITY_REF', 10);
  64. /** undefined entity error */
  65. define('SAXY_XML_ERROR_UNDEFINED_ENTITY', 11);
  66. /** recursive entity error */
  67. define('SAXY_XML_ERROR_RECURSIVE_ENTITY_REF', 12);
  68. /** asynchronous entity error */
  69. define('SAXY_XML_ERROR_ASYNC_ENTITY', 13);
  70. /** bad character reference error */
  71. define('SAXY_XML_ERROR_BAD_CHAR_REF', 14);
  72. /** binary entity reference error */
  73. define('SAXY_XML_ERROR_BINARY_ENTITY_REF', 15);
  74. /** attribute external entity error */
  75. define('SAXY_XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF', 16);
  76. /** misplaced processing instruction error */
  77. define('SAXY_XML_ERROR_MISPLACED_XML_PI', 17);
  78. /** unknown encoding error */
  79. define('SAXY_XML_ERROR_UNKNOWN_ENCODING', 18);
  80. /** incorrect encoding error */
  81. define('SAXY_XML_ERROR_INCORRECT_ENCODING', 19);
  82. /** unclosed CDATA Section error */
  83. define('SAXY_XML_ERROR_UNCLOSED_CDATA_SECTION', 20);
  84. /** external entity handling error */
  85. define('SAXY_XML_ERROR_EXTERNAL_ENTITY_HANDLING', 21);
  86. //require_once(SAXY_INCLUDE_PATH . 'xml_saxy_shared.php');
  87. /**
  88. * SAXY_Parser_Base is a base class for SAXY and SAXY Lite
  89. * @package saxy-xmlparser
  90. * @version 1.0
  91. * @copyright (C) 2004 John Heinstein. All rights reserved
  92. * @license http://www.gnu.org/copyleft/lesser.html LGPL License
  93. * @author John Heinstein <johnkarl@nbnet.nb.ca>
  94. * @link http://www.engageinteractive.com/saxy/ SAXY Home Page
  95. * SAXY is Free Software
  96. **/
  97. /** the initial characters of a cdata section */
  98. define('SAXY_SEARCH_CDATA', '![CDATA[');
  99. /** the length of the initial characters of a cdata section */
  100. define('SAXY_CDATA_LEN', 8);
  101. /** the initial characters of a notation */
  102. define('SAXY_SEARCH_NOTATION', '!NOTATION');
  103. /** the initial characters of a doctype */
  104. define('SAXY_SEARCH_DOCTYPE', '!DOCTYPE');
  105. /** saxy parse state, just before parsing an attribute */
  106. define('SAXY_STATE_ATTR_NONE', 0);
  107. /** saxy parse state, parsing an attribute key */
  108. define('SAXY_STATE_ATTR_KEY', 1);
  109. /** saxy parse state, parsing an attribute value */
  110. define('SAXY_STATE_ATTR_VALUE', 2);
  111. /**
  112. * The base SAX Parser class
  113. *
  114. * @package saxy-xmlparser
  115. * @author John Heinstein <johnkarl@nbnet.nb.ca>
  116. */
  117. class SAXY_Parser_Base {
  118. /** @var int The current state of the parser */
  119. public $state;
  120. /** @var int A temporary container for parsed characters */
  121. public $charContainer;
  122. /** @var Object A reference to the start event handler */
  123. public $startElementHandler;
  124. /** @var Object A reference to the end event handler */
  125. public $endElementHandler;
  126. /** @var Object A reference to the data event handler */
  127. public $characterDataHandler;
  128. /** @var Object A reference to the CDATA Section event handler */
  129. public $cDataSectionHandler = null;
  130. /** @var boolean True if predefined entities are to be converted into characters */
  131. public $convertEntities = true;
  132. /** @var Array Translation table for predefined entities */
  133. public $predefinedEntities = array('&amp;' => '&', '&lt;' => '<', '&gt;' => '>',
  134. '&quot;' => '"', '&apos;' => "'");
  135. /** @var Array User defined translation table for entities */
  136. public $definedEntities = array();
  137. /** @var boolean True if whitespace is to be preserved during parsing. NOT YET IMPLEMENTED! */
  138. public $preserveWhitespace = false;
  139. /**
  140. * Constructor for SAX parser
  141. */
  142. function SAXY_Parser_Base() {
  143. $this->charContainer = '';
  144. } //SAXY_Parser_Base
  145. /**
  146. * Sets a reference to the handler for the start element event
  147. * @param mixed A reference to the start element handler
  148. */
  149. function xml_set_element_handler($startHandler, $endHandler) {
  150. $this->startElementHandler = $startHandler;
  151. $this->endElementHandler = $endHandler;
  152. } //xml_set_element_handler
  153. /**
  154. * Sets a reference to the handler for the data event
  155. * @param mixed A reference to the data handler
  156. */
  157. function xml_set_character_data_handler($handler) {
  158. $this->characterDataHandler =& $handler;
  159. } //xml_set_character_data_handler
  160. /**
  161. * Sets a reference to the handler for the CDATA Section event
  162. * @param mixed A reference to the CDATA Section handler
  163. */
  164. function xml_set_cdata_section_handler($handler) {
  165. $this->cDataSectionHandler =& $handler;
  166. } //xml_set_cdata_section_handler
  167. /**
  168. * Sets whether predefined entites should be replaced with their equivalent characters during parsing
  169. * @param boolean True if entity replacement is to occur
  170. */
  171. function convertEntities($truthVal) {
  172. $this->convertEntities = $truthVal;
  173. } //convertEntities
  174. /**
  175. * Appends an array of entity mappings to the existing translation table
  176. *
  177. * Intended mainly to facilitate the conversion of non-ASCII entities into equivalent characters
  178. *
  179. * @param array A list of entity mappings in the format: array('&amp;' => '&');
  180. */
  181. function appendEntityTranslationTable($table) {
  182. $this->definedEntities = $table;
  183. } //appendEntityTranslationTable
  184. /**
  185. * Gets the nth character from the end of the string
  186. * @param string The text to be queried
  187. * @param int The index from the end of the string
  188. * @return string The found character
  189. */
  190. function getCharFromEnd($text, $index) {
  191. $len = strlen($text);
  192. $char = $text{($len - 1 - $index)};
  193. return $char;
  194. } //getCharFromEnd
  195. /**
  196. * Parses the attributes string into an array of key / value pairs
  197. * @param string The attribute text
  198. * @return Array An array of key / value pairs
  199. */
  200. function parseAttributes($attrText) {
  201. $attrText = trim($attrText);
  202. $attrArray = array();
  203. $maybeEntity = false;
  204. $total = strlen($attrText);
  205. $keyDump = '';
  206. $valueDump = '';
  207. $currentState = SAXY_STATE_ATTR_NONE;
  208. $quoteType = '';
  209. for ($i = 0; $i < $total; $i++) {
  210. $currentChar = $attrText{$i};
  211. if ($currentState == SAXY_STATE_ATTR_NONE) {
  212. if (trim($currentChar != '')) {
  213. $currentState = SAXY_STATE_ATTR_KEY;
  214. }
  215. }
  216. switch ($currentChar) {
  217. case "\t":
  218. if ($currentState == SAXY_STATE_ATTR_VALUE) {
  219. $valueDump .= $currentChar;
  220. }
  221. else {
  222. $currentChar = '';
  223. }
  224. break;
  225. case "\x0B": //vertical tab
  226. case "\n":
  227. case "\r":
  228. $currentChar = '';
  229. break;
  230. case '=':
  231. if ($currentState == SAXY_STATE_ATTR_VALUE) {
  232. $valueDump .= $currentChar;
  233. }
  234. else {
  235. $currentState = SAXY_STATE_ATTR_VALUE;
  236. $quoteType = '';
  237. $maybeEntity = false;
  238. }
  239. break;
  240. case '"':
  241. if ($currentState == SAXY_STATE_ATTR_VALUE) {
  242. if ($quoteType == '') {
  243. $quoteType = '"';
  244. }
  245. else {
  246. if ($quoteType == $currentChar) {
  247. if ($this->convertEntities && $maybeEntity) {
  248. $valueDump = strtr($valueDump, $this->predefinedEntities);
  249. $valueDump = strtr($valueDump, $this->definedEntities);
  250. }
  251. $keyDump = trim($keyDump);
  252. $attrArray[$keyDump] = $valueDump;
  253. $keyDump = $valueDump = $quoteType = '';
  254. $currentState = SAXY_STATE_ATTR_NONE;
  255. }
  256. else {
  257. $valueDump .= $currentChar;
  258. }
  259. }
  260. }
  261. break;
  262. case "'":
  263. if ($currentState == SAXY_STATE_ATTR_VALUE) {
  264. if ($quoteType == '') {
  265. $quoteType = "'";
  266. }
  267. else {
  268. if ($quoteType == $currentChar) {
  269. if ($this->convertEntities && $maybeEntity) {
  270. $valueDump = strtr($valueDump, $this->predefinedEntities);
  271. $valueDump = strtr($valueDump, $this->definedEntities);
  272. }
  273. $keyDump = trim($keyDump);
  274. $attrArray[$keyDump] = $valueDump;
  275. $keyDump = $valueDump = $quoteType = '';
  276. $currentState = SAXY_STATE_ATTR_NONE;
  277. }
  278. else {
  279. $valueDump .= $currentChar;
  280. }
  281. }
  282. }
  283. break;
  284. case '&':
  285. //might be an entity
  286. $maybeEntity = true;
  287. $valueDump .= $currentChar;
  288. break;
  289. default:
  290. if ($currentState == SAXY_STATE_ATTR_KEY) {
  291. $keyDump .= $currentChar;
  292. }
  293. else {
  294. $valueDump .= $currentChar;
  295. }
  296. }
  297. }
  298. return $attrArray;
  299. } //parseAttributes
  300. /**
  301. * Parses character data
  302. * @param string The character data
  303. */
  304. function parseBetweenTags($betweenTagText) {
  305. if (trim($betweenTagText) != ''){
  306. $this->fireCharacterDataEvent($betweenTagText);
  307. }
  308. } //parseBetweenTags
  309. /**
  310. * Fires a start element event
  311. * @param string The start element tag name
  312. * @param Array The start element attributes
  313. */
  314. function fireStartElementEvent($tagName, $attributes) {
  315. call_user_func($this->startElementHandler, $this, $tagName, $attributes);
  316. } //fireStartElementEvent
  317. /**
  318. * Fires an end element event
  319. * @param string The end element tag name
  320. */
  321. function fireEndElementEvent($tagName) {
  322. call_user_func($this->endElementHandler, $this, $tagName);
  323. } //fireEndElementEvent
  324. /**
  325. * Fires a character data event
  326. * @param string The character data
  327. */
  328. function fireCharacterDataEvent($data) {
  329. if ($this->convertEntities && ((strpos($data, "&") != -1))) {
  330. $data = strtr($data, $this->predefinedEntities);
  331. $data = strtr($data, $this->definedEntities);
  332. }
  333. call_user_func($this->characterDataHandler, $this, $data);
  334. } //fireCharacterDataEvent
  335. /**
  336. * Fires a CDATA Section event
  337. * @param string The CDATA Section data
  338. */
  339. function fireCDataSectionEvent($data) {
  340. call_user_func($this->cDataSectionHandler, $this, $data);
  341. } //fireCDataSectionEvent
  342. } //SAXY_Parser_Base
  343. /**
  344. * The SAX Parser class
  345. *
  346. * @package saxy-xmlparser
  347. * @subpackage saxy-xmlparser-main
  348. * @author John Heinstein <johnkarl@nbnet.nb.ca>
  349. */
  350. class SAXY_Parser extends SAXY_Parser_Base {
  351. /** @var int The current error number */
  352. public $errorCode = SAXY_XML_ERROR_NONE;
  353. /** @var Object A reference to the DocType event handler */
  354. public $DTDHandler = null;
  355. /** @var Object A reference to the Comment event handler */
  356. public $commentHandler = null;
  357. /** @var Object A reference to the Processing Instruction event handler */
  358. public $processingInstructionHandler = null;
  359. /** @var Object A reference to the Start Namespace Declaration event handler */
  360. public $startNamespaceDeclarationHandler = null;
  361. /** @var Object A reference to the End Namespace Declaration event handler */
  362. public $endNamespaceDeclarationHandler = null;
  363. /** @var boolean True if SAXY takes namespaces into consideration when parsing element tags */
  364. public $isNamespaceAware = false;
  365. /** @var array An indexed array containing associative arrays of namespace prefixes mapped to their namespace URIs */
  366. public $namespaceMap = array();
  367. /** @var array A stack used to determine when an end namespace event should be fired */
  368. public $namespaceStack = array();
  369. /** @var array A track used to track the uri of the current default namespace */
  370. public $defaultNamespaceStack = array();
  371. /** @var array A stack containing tag names of unclosed elements */
  372. public $elementNameStack = array();
  373. /**
  374. * Constructor for SAX parser
  375. */
  376. function SAXY_Parser() {
  377. $this->SAXY_Parser_Base();
  378. $this->state = SAXY_STATE_PROLOG_NONE;
  379. } //SAXY_Parser
  380. /**
  381. * Sets a reference to the handler for the DocType event
  382. * @param mixed A reference to the DocType handler
  383. */
  384. function xml_set_doctype_handler($handler) {
  385. $this->DTDHandler =& $handler;
  386. } //xml_set_doctype_handler
  387. /**
  388. * Sets a reference to the handler for the Comment event
  389. * @param mixed A reference to the Comment handler
  390. */
  391. function xml_set_comment_handler($handler) {
  392. $this->commentHandler =& $handler;
  393. } //xml_set_comment_handler
  394. /**
  395. * Sets a reference to the handler for the Processing Instruction event
  396. * @param mixed A reference to the Processing Instruction handler
  397. */
  398. function xml_set_processing_instruction_handler($handler) {
  399. $this->processingInstructionHandler =& $handler;
  400. } //xml_set_processing_instruction_handler
  401. /**
  402. * Sets a reference to the handler for the Start Namespace Declaration event
  403. * @param mixed A reference to the Start Namespace Declaration handler
  404. */
  405. function xml_set_start_namespace_decl_handler($handler) {
  406. $this->startNamespaceDeclarationHandler =& $handler;
  407. } //xml_set_start_namespace_decl_handler
  408. /**
  409. * Sets a reference to the handler for the End Namespace Declaration event
  410. * @param mixed A reference to the Start Namespace Declaration handler
  411. */
  412. function xml_set_end_namespace_decl_handler($handler) {
  413. $this->endNamespaceDeclarationHandler =& $handler;
  414. } //xml_set_end_namespace_decl_handler
  415. /**
  416. * Specifies whether SAXY is namespace sensitive
  417. * @param boolean True if SAXY is namespace aware
  418. */
  419. function setNamespaceAwareness($isNamespaceAware) {
  420. $this->isNamespaceAware =& $isNamespaceAware;
  421. } //setNamespaceAwareness
  422. /**
  423. * Returns the current version of SAXY
  424. * @return Object The current version of SAXY
  425. */
  426. function getVersion() {
  427. return SAXY_VERSION;
  428. } //getVersion
  429. /**
  430. * Processes the xml prolog, doctype, and any other nodes that exist outside of the main xml document
  431. * @param string The xml text to be processed
  432. * @return string The preprocessed xml text
  433. */
  434. function preprocessXML($xmlText) {
  435. //strip prolog
  436. $xmlText = trim($xmlText);
  437. $startChar = -1;
  438. $total = strlen($xmlText);
  439. for ($i = 0; $i < $total; $i++) {
  440. $currentChar = $xmlText{$i};
  441. switch ($this->state) {
  442. case SAXY_STATE_PROLOG_NONE:
  443. if ($currentChar == '<') {
  444. $nextChar = $xmlText{($i + 1)};
  445. if ($nextChar == '?') {
  446. $this->state = SAXY_STATE_PROLOG_PROCESSINGINSTRUCTION;
  447. $this->charContainer = '';
  448. }
  449. else if ($nextChar == '!') {
  450. $this->state = SAXY_STATE_PROLOG_EXCLAMATION;
  451. $this->charContainer .= $currentChar;
  452. break;
  453. }
  454. else {
  455. $this->charContainer = '';
  456. $startChar = $i;
  457. $this->state = SAXY_STATE_PARSING;
  458. return (substr($xmlText, $startChar));
  459. }
  460. }
  461. break;
  462. case SAXY_STATE_PROLOG_EXCLAMATION:
  463. if ($currentChar == 'D') {
  464. $this->state = SAXY_STATE_PROLOG_DTD;
  465. $this->charContainer .= $currentChar;
  466. }
  467. else if ($currentChar == '-') {
  468. $this->state = SAXY_STATE_PROLOG_COMMENT;
  469. $this->charContainer = '';
  470. }
  471. else {
  472. //will trap ! and add it
  473. $this->charContainer .= $currentChar;
  474. }
  475. break;
  476. case SAXY_STATE_PROLOG_PROCESSINGINSTRUCTION:
  477. if ($currentChar == '>') {
  478. $this->state = SAXY_STATE_PROLOG_NONE;
  479. $this->parseProcessingInstruction($this->charContainer);
  480. $this->charContainer = '';
  481. }
  482. else {
  483. $this->charContainer .= $currentChar;
  484. }
  485. break;
  486. case SAXY_STATE_PROLOG_COMMENT:
  487. if ($currentChar == '>') {
  488. $this->state = SAXY_STATE_PROLOG_NONE;
  489. $this->parseComment($this->charContainer);
  490. $this->charContainer = '';
  491. }
  492. else if ($currentChar == '-') {
  493. if ((($xmlText{($i + 1)} == '-') && ($xmlText{($i + 2)} == '>')) ||
  494. ($xmlText{($i + 1)} == '>') ||
  495. (($xmlText{($i - 1)} == '-') && ($xmlText{($i - 2)}== '!')) ){
  496. //do nothing
  497. }
  498. else {
  499. $this->charContainer .= $currentChar;
  500. }
  501. }
  502. else {
  503. $this->charContainer .= $currentChar;
  504. }
  505. break;
  506. case SAXY_STATE_PROLOG_DTD:
  507. if ($currentChar == '[') {
  508. $this->charContainer .= $currentChar;
  509. $this->state = SAXY_STATE_PROLOG_INLINEDTD;
  510. }
  511. else if ($currentChar == '>') {
  512. $this->state = SAXY_STATE_PROLOG_NONE;
  513. if ($this->DTDHandler != null) {
  514. $this->fireDTDEvent($this->charContainer . $currentChar);
  515. }
  516. $this->charContainer = '';
  517. }
  518. else {
  519. $this->charContainer .= $currentChar;
  520. }
  521. break;
  522. case SAXY_STATE_PROLOG_INLINEDTD:
  523. $previousChar = $xmlText{($i - 1)};
  524. if (($currentChar == '>') && ($previousChar == ']')){
  525. $this->state = SAXY_STATE_PROLOG_NONE;
  526. if ($this->DTDHandler != null) {
  527. $this->fireDTDEvent($this->charContainer . $currentChar);
  528. }
  529. $this->charContainer = '';
  530. }
  531. else {
  532. $this->charContainer .= $currentChar;
  533. }
  534. break;
  535. }
  536. }
  537. } //preprocessXML
  538. /**
  539. * The controlling method for the parsing process
  540. * @param string The xml text to be processed
  541. * @return boolean True if parsing is successful
  542. */
  543. function parse ($xmlText) {
  544. $xmlText = $this->preprocessXML($xmlText);
  545. $total = strlen($xmlText);
  546. for ($i = 0; $i < $total; $i++) {
  547. $currentChar = $xmlText{$i};
  548. switch ($this->state) {
  549. case SAXY_STATE_PARSING:
  550. switch ($currentChar) {
  551. case '<':
  552. if (substr($this->charContainer, 0, SAXY_CDATA_LEN) == SAXY_SEARCH_CDATA) {
  553. $this->charContainer .= $currentChar;
  554. }
  555. else {
  556. $this->parseBetweenTags($this->charContainer);
  557. $this->charContainer = '';
  558. }
  559. break;
  560. case '-':
  561. if (($xmlText{($i - 1)} == '-') && ($xmlText{($i - 2)} == '!')
  562. && ($xmlText{($i - 3)} == '<')) {
  563. $this->state = SAXY_STATE_PARSING_COMMENT;
  564. $this->charContainer = '';
  565. }
  566. else {
  567. $this->charContainer .= $currentChar;
  568. }
  569. break;
  570. case '>':
  571. if ((substr($this->charContainer, 0, SAXY_CDATA_LEN) == SAXY_SEARCH_CDATA) &&
  572. !(($this->getCharFromEnd($this->charContainer, 0) == ']') &&
  573. ($this->getCharFromEnd($this->charContainer, 1) == ']'))) {
  574. $this->charContainer .= $currentChar;
  575. }
  576. else {
  577. $this->parseTag($this->charContainer);
  578. $this->charContainer = '';
  579. }
  580. break;
  581. default:
  582. $this->charContainer .= $currentChar;
  583. }
  584. break;
  585. case SAXY_STATE_PARSING_COMMENT:
  586. switch ($currentChar) {
  587. case '>':
  588. if (($xmlText{($i - 1)} == '-') && ($xmlText{($i - 2)} == '-')) {
  589. $this->fireCommentEvent(substr($this->charContainer, 0,
  590. (strlen($this->charContainer) - 2)));
  591. $this->charContainer = '';
  592. $this->state = SAXY_STATE_PARSING;
  593. }
  594. else {
  595. $this->charContainer .= $currentChar;
  596. }
  597. break;
  598. default:
  599. $this->charContainer .= $currentChar;
  600. }
  601. break;
  602. }
  603. }
  604. return ($this->errorCode == 0);
  605. } //parse
  606. /**
  607. * Parses an element tag
  608. * @param string The interior text of the element tag
  609. */
  610. function parseTag($tagText) {
  611. $tagText = trim($tagText);
  612. $firstChar = $tagText{0};
  613. $myAttributes = array();
  614. switch ($firstChar) {
  615. case '/':
  616. $tagName = substr($tagText, 1);
  617. $this->_fireEndElementEvent($tagName);
  618. break;
  619. case '!':
  620. $upperCaseTagText = strtoupper($tagText);
  621. if (strpos($upperCaseTagText, SAXY_SEARCH_CDATA) !== false) { //CDATA Section
  622. $total = strlen($tagText);
  623. $openBraceCount = 0;
  624. $textNodeText = '';
  625. for ($i = 0; $i < $total; $i++) {
  626. $currentChar = $tagText{$i};
  627. if (($currentChar == ']') && ($tagText{($i + 1)} == ']')) {
  628. break;
  629. }
  630. else if ($openBraceCount > 1) {
  631. $textNodeText .= $currentChar;
  632. }
  633. else if ($currentChar == '[') { //this won't be reached after the first open brace is found
  634. $openBraceCount ++;
  635. }
  636. }
  637. if ($this->cDataSectionHandler == null) {
  638. $this->fireCharacterDataEvent($textNodeText);
  639. }
  640. else {
  641. $this->fireCDataSectionEvent($textNodeText);
  642. }
  643. }
  644. else if (strpos($upperCaseTagText, SAXY_SEARCH_NOTATION) !== false) { //NOTATION node, discard
  645. return;
  646. }
  647. /*
  648. else if (substr($tagText, 0, 2) == '!-') { //comment node
  649. if ($this->commentHandler != null) {
  650. $this->fireCommentEvent(substr($tagText, 3, (strlen($tagText) - 5)));
  651. }
  652. }
  653. */
  654. break;
  655. case '?':
  656. //Processing Instruction node
  657. $this->parseProcessingInstruction($tagText);
  658. break;
  659. default:
  660. if ((strpos($tagText, '"') !== false) || (strpos($tagText, "'") !== false)) {
  661. $total = strlen($tagText);
  662. $tagName = '';
  663. for ($i = 0; $i < $total; $i++) {
  664. $currentChar = $tagText{$i};
  665. if (($currentChar == ' ') || ($currentChar == "\t") ||
  666. ($currentChar == "\n") || ($currentChar == "\r") ||
  667. ($currentChar == "\x0B")) {
  668. $myAttributes = $this->parseAttributes(substr($tagText, $i));
  669. break;
  670. }
  671. else {
  672. $tagName .= $currentChar;
  673. }
  674. }
  675. if (strrpos($tagText, '/') == (strlen($tagText) - 1)) { //check $tagText, but send $tagName
  676. $this->_fireStartElementEvent($tagName, $myAttributes);
  677. $this->_fireEndElementEvent($tagName);
  678. }
  679. else {
  680. $this->_fireStartElementEvent($tagName, $myAttributes);
  681. }
  682. }
  683. else {
  684. if (strpos($tagText, '/') !== false) {
  685. $tagText = trim(substr($tagText, 0, (strrchr($tagText, '/') - 1)));
  686. $this->_fireStartElementEvent($tagText, $myAttributes);
  687. $this->_fireEndElementEvent($tagText);
  688. }
  689. else {
  690. $this->_fireStartElementEvent($tagText, $myAttributes);
  691. }
  692. }
  693. }
  694. } //parseTag
  695. /**
  696. * Fires a start element event and pushes the element name onto the elementName stack
  697. * @param string The start element tag name
  698. * @param Array The start element attributes
  699. */
  700. function _fireStartElementEvent($tagName, &$myAttributes) {
  701. $this->elementNameStack[] = $tagName;
  702. if ($this->isNamespaceAware) {
  703. $this->detectStartNamespaceDeclaration($myAttributes);
  704. $tagName = $this->expandNamespacePrefix($tagName);
  705. $this->expandAttributePrefixes($myAttributes);
  706. }
  707. $this->fireStartElementEvent($tagName, $myAttributes);
  708. } //_fireStartElementEvent
  709. /**
  710. * Expands attribute prefixes to full namespace uri
  711. * @param Array The start element attributes
  712. */
  713. function expandAttributePrefixes(&$myAttributes) {
  714. $arTransform = array();
  715. foreach ($myAttributes as $key => $value) {
  716. if (strpos($key, 'xmlns') === false) {
  717. if (strpos($key, ':') !== false) {
  718. $expandedTag = $this->expandNamespacePrefix($key);
  719. $arTransform[$key] = $expandedTag;
  720. }
  721. }
  722. }
  723. foreach ($arTransform as $key => $value) {
  724. $myAttributes[$value] = $myAttributes[$key];
  725. unset($myAttributes[$key]);
  726. }
  727. } //expandAttributePrefixes
  728. /**
  729. * Expands the namespace prefix (if one exists) to the full namespace uri
  730. * @param string The tagName with the namespace prefix
  731. * @return string The tagName, with the prefix expanded to the namespace uri
  732. */
  733. function expandNamespacePrefix($tagName) {
  734. $stackLen = count($this->defaultNamespaceStack);
  735. $defaultNamespace = $this->defaultNamespaceStack[($stackLen - 1)];
  736. $colonIndex = strpos($tagName, ':');
  737. if ($colonIndex !== false) {
  738. $prefix = substr($tagName, 0, $colonIndex);
  739. if ($prefix != 'xml') {
  740. $tagName = $this->getNamespaceURI($prefix) . substr($tagName, $colonIndex);
  741. }
  742. else {
  743. $tagName = SAXY_XML_NAMESPACE . substr($tagName, $colonIndex);
  744. }
  745. }
  746. else if ($defaultNamespace != '') {
  747. $tagName = $defaultNamespace . ':' . $tagName;
  748. }
  749. return $tagName;
  750. } //expandNamespacePrefix
  751. /**
  752. * Searches the namespaceMap for the specified prefix, and returns the full namespace URI
  753. * @param string The namespace prefix
  754. * @return string The namespace uri
  755. */
  756. function getNamespaceURI($prefix) {
  757. $total = count($this->namespaceMap);
  758. $uri = $prefix; //in case uri can't be found, just send back prefix
  759. //should really generate an error, but worry about this later
  760. //reset($this->namespaceMap);
  761. for ($i = ($total - 1); $i >= 0; $i--) {
  762. $currMap =& $this->namespaceMap[$i];
  763. if (isset($currMap[$prefix])) {
  764. $uri = $currMap[$prefix];
  765. break;
  766. }
  767. }
  768. return $uri;
  769. } //getNamespaceURI
  770. /**
  771. * Searches the attributes array for an xmlns declaration and fires an event if found
  772. * @param Array The start element attributes
  773. */
  774. function detectStartNamespaceDeclaration(&$myAttributes) {
  775. $namespaceExists = false;
  776. $namespaceMapUpper = 0;
  777. $userDefinedDefaultNamespace = false;
  778. $total = count($myAttributes);
  779. foreach ($myAttributes as $key => $value) {
  780. if (strpos($key, 'xmlns') !== false) {
  781. //add an array to store all namespaces for the current element
  782. if (!$namespaceExists) {
  783. $this->namespaceMap[] = array();
  784. $namespaceMapUpper = count($this->namespaceMap) - 1;
  785. }
  786. //check for default namespace override, i.e. xmlns='...'
  787. if (strpos($key, ':') !== false) {
  788. $prefix = $namespaceMapKey = substr($key, 6);
  789. $this->namespaceMap[$namespaceMapUpper][$namespaceMapKey] = $value;
  790. }
  791. else {
  792. $prefix = '';
  793. $userDefinedDefaultNamespace = true;
  794. //if default namespace '', store in map using key ':'
  795. $this->namespaceMap[$namespaceMapUpper][':'] = $value;
  796. $this->defaultNamespaceStack[] = $value;
  797. }
  798. $this->fireStartNamespaceDeclarationEvent($prefix, $value);
  799. $namespaceExists = true;
  800. unset($myAttributes[$key]);
  801. }
  802. }
  803. //store the default namespace (inherited from the parent elements so grab last one)
  804. if (!$userDefinedDefaultNamespace) {
  805. $stackLen = count($this->defaultNamespaceStack);
  806. if ($stackLen == 0) {
  807. $this->defaultNamespaceStack[] = '';
  808. }
  809. else {
  810. $this->defaultNamespaceStack[] =
  811. $this->defaultNamespaceStack[($stackLen - 1)];
  812. }
  813. }
  814. $this->namespaceStack[] = $namespaceExists;
  815. } //detectStartNamespaceDeclaration
  816. /**
  817. * Fires an end element event and pops the element name from the elementName stack
  818. * @param string The end element tag name
  819. */
  820. function _fireEndElementEvent($tagName) {
  821. $lastTagName = array_pop($this->elementNameStack);
  822. //check for mismatched tag error
  823. if ($lastTagName != $tagName) {
  824. $this->errorCode = SAXY_XML_ERROR_TAG_MISMATCH;
  825. }
  826. if ($this->isNamespaceAware) {
  827. $tagName = $this->expandNamespacePrefix($tagName);
  828. $this->fireEndElementEvent($tagName);
  829. $this->detectEndNamespaceDeclaration();
  830. $defaultNamespace = array_pop($this->defaultNamespaceStack);
  831. }
  832. else {
  833. $this->fireEndElementEvent($tagName);
  834. }
  835. } //_fireEndElementEvent
  836. /**
  837. * Determines whether an end namespace declaration event should be fired
  838. */
  839. function detectEndNamespaceDeclaration() {
  840. $isNamespaceEnded = array_pop($this->namespaceStack);
  841. if ($isNamespaceEnded) {
  842. $map = array_pop($this->namespaceMap);
  843. foreach ($map as $key => $value) {
  844. if ($key == ':') {
  845. $key = '';
  846. }
  847. $this->fireEndNamespaceDeclarationEvent($key);
  848. }
  849. }
  850. } //detectEndNamespaceDeclaration
  851. /**
  852. * Parses a processing instruction
  853. * @param string The interior text of the processing instruction
  854. */
  855. function parseProcessingInstruction($data) {
  856. $endTarget = 0;
  857. $total = strlen($data);
  858. for ($x = 2; $x < $total; $x++) {
  859. if (trim($data{$x}) == '') {
  860. $endTarget = $x;
  861. break;
  862. }
  863. }
  864. $target = substr($data, 1, ($endTarget - 1));
  865. $data = substr($data, ($endTarget + 1), ($total - $endTarget - 2));
  866. if ($this->processingInstructionHandler != null) {
  867. $this->fireProcessingInstructionEvent($target, $data);
  868. }
  869. } //parseProcessingInstruction
  870. /**
  871. * Parses a comment
  872. * @param string The interior text of the comment
  873. */
  874. function parseComment($data) {
  875. if ($this->commentHandler != null) {
  876. $this->fireCommentEvent($data);
  877. }
  878. } //parseComment
  879. /**
  880. * Fires a doctype event
  881. * @param string The doctype data
  882. */
  883. function fireDTDEvent($data) {
  884. call_user_func($this->DTDHandler, $this, $data);
  885. } //fireDTDEvent
  886. /**
  887. * Fires a comment event
  888. * @param string The text of the comment
  889. */
  890. function fireCommentEvent($data) {
  891. call_user_func($this->commentHandler, $this, $data);
  892. } //fireCommentEvent
  893. /**
  894. * Fires a processing instruction event
  895. * @param string The processing instruction data
  896. */
  897. function fireProcessingInstructionEvent($target, $data) {
  898. call_user_func($this->processingInstructionHandler, $this, $target, $data);
  899. } //fireProcessingInstructionEvent
  900. /**
  901. * Fires a start namespace declaration event
  902. * @param string The namespace prefix
  903. * @param string The namespace uri
  904. */
  905. function fireStartNamespaceDeclarationEvent($prefix, $uri) {
  906. call_user_func($this->startNamespaceDeclarationHandler, $this, $prefix, $uri);
  907. } //fireStartNamespaceDeclarationEvent
  908. /**
  909. * Fires an end namespace declaration event
  910. * @param string The namespace prefix
  911. */
  912. function fireEndNamespaceDeclarationEvent($prefix) {
  913. call_user_func($this->endNamespaceDeclarationHandler, $this, $prefix);
  914. } //fireEndNamespaceDeclarationEvent
  915. /**
  916. * Returns the current error code
  917. * @return int The current error code
  918. */
  919. function xml_get_error_code() {
  920. return $this->errorCode;
  921. } //xml_get_error_code
  922. /**
  923. * Returns a textual description of the error code
  924. * @param int The error code
  925. * @return string The error message
  926. */
  927. function xml_error_string($code) {
  928. switch ($code) {
  929. case SAXY_XML_ERROR_NONE:
  930. return "No error";
  931. break;
  932. case SAXY_XML_ERROR_NO_MEMORY:
  933. return "Out of memory";
  934. break;
  935. case SAXY_XML_ERROR_SYNTAX:
  936. return "Syntax error";
  937. break;
  938. case SAXY_XML_ERROR_NO_ELEMENTS:
  939. return "No elements in document";
  940. break;
  941. case SAXY_XML_ERROR_INVALID_TOKEN:
  942. return "Invalid token";
  943. break;
  944. case SAXY_XML_ERROR_UNCLOSED_TOKEN:
  945. return "Unclosed token";
  946. break;
  947. case SAXY_XML_ERROR_PARTIAL_CHAR:
  948. return "Partial character";
  949. break;
  950. case SAXY_XML_ERROR_TAG_MISMATCH:
  951. return "Tag mismatch";
  952. break;
  953. case SAXY_XML_ERROR_DUPLICATE_ATTRIBUTE:
  954. return "Duplicate attribute";
  955. break;
  956. case SAXY_XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
  957. return "Junk encountered after document element";
  958. break;
  959. case SAXY_XML_ERROR_PARAM_ENTITY_REF:
  960. return "Parameter entity reference error";
  961. break;
  962. case SAXY_XML_ERROR_UNDEFINED_ENTITY:
  963. return "Undefined entity";
  964. break;
  965. case SAXY_XML_ERROR_RECURSIVE_ENTITY_REF:
  966. return "Recursive entity reference";
  967. break;
  968. case SAXY_XML_ERROR_ASYNC_ENTITY:
  969. return "Asynchronous internal entity found in external entity";
  970. break;
  971. case SAXY_XML_ERROR_BAD_CHAR_REF:
  972. return "Bad character reference";
  973. break;
  974. case SAXY_XML_ERROR_BINARY_ENTITY_REF:
  975. return "Binary entity reference";
  976. break;
  977. case SAXY_XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
  978. return "Attribute external entity reference";
  979. break;
  980. case SAXY_XML_ERROR_MISPLACED_XML_PI:
  981. return "Misplaced processing instruction";
  982. break;
  983. case SAXY_XML_ERROR_UNKNOWN_ENCODING:
  984. return "Unknown encoding";
  985. break;
  986. case SAXY_XML_ERROR_INCORRECT_ENCODING:
  987. return "Incorrect encoding";
  988. break;
  989. case SAXY_XML_ERROR_UNCLOSED_CDATA_SECTION:
  990. return "Unclosed CDATA Section";
  991. break;
  992. case SAXY_XML_ERROR_EXTERNAL_ENTITY_HANDLING:
  993. return "Problem in external entity handling";
  994. break;
  995. default:
  996. return "No definition for error code " . $code;
  997. break;
  998. }
  999. } //xml_error_string
  1000. } //SAXY_Parser
  1001. ?>