PageRenderTime 48ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/src/libraries/domit/xml_saxy_shared.php

http://kak.googlecode.com/
PHP | 294 lines | 261 code | 2 blank | 31 comment | 4 complexity | f3663d2c1cc4eb8564a547784fe0d041 MD5 | raw file
Possible License(s): LGPL-2.1, Apache-2.0, BSD-3-Clause
  1. <?php
  2. /**
  3. * SAXY_Parser_Base is a base class for SAXY and SAXY Lite
  4. * @package saxy-xmlparser
  5. * @version 1.0
  6. * @copyright (C) 2004 John Heinstein. All rights reserved
  7. * @license http://www.gnu.org/copyleft/lesser.html LGPL License
  8. * @author John Heinstein <johnkarl@nbnet.nb.ca>
  9. * @link http://www.engageinteractive.com/saxy/ SAXY Home Page
  10. * SAXY is Free Software
  11. **/
  12. /** the initial characters of a cdata section */
  13. define('SAXY_SEARCH_CDATA', '![CDATA[');
  14. /** the length of the initial characters of a cdata section */
  15. define('SAXY_CDATA_LEN', 8);
  16. /** the initial characters of a notation */
  17. define('SAXY_SEARCH_NOTATION', '!NOTATION');
  18. /** the initial characters of a doctype */
  19. define('SAXY_SEARCH_DOCTYPE', '!DOCTYPE');
  20. /** saxy parse state, just before parsing an attribute */
  21. define('SAXY_STATE_ATTR_NONE', 0);
  22. /** saxy parse state, parsing an attribute key */
  23. define('SAXY_STATE_ATTR_KEY', 1);
  24. /** saxy parse state, parsing an attribute value */
  25. define('SAXY_STATE_ATTR_VALUE', 2);
  26. /**
  27. * The base SAX Parser class
  28. *
  29. * @package saxy-xmlparser
  30. * @author John Heinstein <johnkarl@nbnet.nb.ca>
  31. */
  32. class SAXY_Parser_Base {
  33. /** @var int The current state of the parser */
  34. var $state;
  35. /** @var int A temporary container for parsed characters */
  36. var $charContainer;
  37. /** @var Object A reference to the start event handler */
  38. var $startElementHandler;
  39. /** @var Object A reference to the end event handler */
  40. var $endElementHandler;
  41. /** @var Object A reference to the data event handler */
  42. var $characterDataHandler;
  43. /** @var Object A reference to the CDATA Section event handler */
  44. var $cDataSectionHandler = null;
  45. /** @var boolean True if predefined entities are to be converted into characters */
  46. var $convertEntities = true;
  47. /** @var Array Translation table for predefined entities */
  48. var $predefinedEntities = array('&amp;' => '&', '&lt;' => '<', '&gt;' => '>',
  49. '&quot;' => '"', '&apos;' => "'");
  50. /** @var Array User defined translation table for entities */
  51. var $definedEntities = array();
  52. /** @var boolean True if whitespace is to be preserved during parsing. NOT YET IMPLEMENTED! */
  53. var $preserveWhitespace = false;
  54. /**
  55. * Constructor for SAX parser
  56. */
  57. function SAXY_Parser_Base() {
  58. $this->charContainer = '';
  59. } //SAXY_Parser_Base
  60. /**
  61. * Sets a reference to the handler for the start element event
  62. * @param mixed A reference to the start element handler
  63. */
  64. function xml_set_element_handler($startHandler, $endHandler) {
  65. $this->startElementHandler = $startHandler;
  66. $this->endElementHandler = $endHandler;
  67. } //xml_set_element_handler
  68. /**
  69. * Sets a reference to the handler for the data event
  70. * @param mixed A reference to the data handler
  71. */
  72. function xml_set_character_data_handler($handler) {
  73. $this->characterDataHandler =& $handler;
  74. } //xml_set_character_data_handler
  75. /**
  76. * Sets a reference to the handler for the CDATA Section event
  77. * @param mixed A reference to the CDATA Section handler
  78. */
  79. function xml_set_cdata_section_handler($handler) {
  80. $this->cDataSectionHandler =& $handler;
  81. } //xml_set_cdata_section_handler
  82. /**
  83. * Sets whether predefined entites should be replaced with their equivalent characters during parsing
  84. * @param boolean True if entity replacement is to occur
  85. */
  86. function convertEntities($truthVal) {
  87. $this->convertEntities = $truthVal;
  88. } //convertEntities
  89. /**
  90. * Appends an array of entity mappings to the existing translation table
  91. *
  92. * Intended mainly to facilitate the conversion of non-ASCII entities into equivalent characters
  93. *
  94. * @param array A list of entity mappings in the format: array('&amp;' => '&');
  95. */
  96. function appendEntityTranslationTable($table) {
  97. $this->definedEntities = $table;
  98. } //appendEntityTranslationTable
  99. /**
  100. * Gets the nth character from the end of the string
  101. * @param string The text to be queried
  102. * @param int The index from the end of the string
  103. * @return string The found character
  104. */
  105. function getCharFromEnd($text, $index) {
  106. $len = strlen($text);
  107. $char = $text{($len - 1 - $index)};
  108. return $char;
  109. } //getCharFromEnd
  110. /**
  111. * Parses the attributes string into an array of key / value pairs
  112. * @param string The attribute text
  113. * @return Array An array of key / value pairs
  114. */
  115. function parseAttributes($attrText) {
  116. $attrText = trim($attrText);
  117. $attrArray = array();
  118. $maybeEntity = false;
  119. $total = strlen($attrText);
  120. $keyDump = '';
  121. $valueDump = '';
  122. $currentState = SAXY_STATE_ATTR_NONE;
  123. $quoteType = '';
  124. for ($i = 0; $i < $total; $i++) {
  125. // $currentChar = $attrText{$i};
  126. $currentChar = substr($attrText, $i, 1);
  127. if ($currentState == SAXY_STATE_ATTR_NONE) {
  128. if (trim($currentChar != '')) {
  129. $currentState = SAXY_STATE_ATTR_KEY;
  130. }
  131. }
  132. switch ($currentChar) {
  133. case "\t":
  134. if ($currentState == SAXY_STATE_ATTR_VALUE) {
  135. $valueDump .= $currentChar;
  136. }
  137. else {
  138. $currentChar = '';
  139. }
  140. break;
  141. case "\x0B": //vertical tab
  142. case "\n":
  143. case "\r":
  144. $currentChar = '';
  145. break;
  146. case '=':
  147. if ($currentState == SAXY_STATE_ATTR_VALUE) {
  148. $valueDump .= $currentChar;
  149. }
  150. else {
  151. $currentState = SAXY_STATE_ATTR_VALUE;
  152. $quoteType = '';
  153. $maybeEntity = false;
  154. }
  155. break;
  156. case '"':
  157. if ($currentState == SAXY_STATE_ATTR_VALUE) {
  158. if ($quoteType == '') {
  159. $quoteType = '"';
  160. }
  161. else {
  162. if ($quoteType == $currentChar) {
  163. // Joomla! hack
  164. if (isset( $this ) && $this->convertEntities && $maybeEntity) {
  165. $valueDump = strtr($valueDump, $this->predefinedEntities);
  166. $valueDump = strtr($valueDump, $this->definedEntities);
  167. }
  168. $keyDump = trim($keyDump);
  169. $attrArray[$keyDump] = $valueDump;
  170. $keyDump = $valueDump = $quoteType = '';
  171. $currentState = SAXY_STATE_ATTR_NONE;
  172. }
  173. else {
  174. $valueDump .= $currentChar;
  175. }
  176. }
  177. }
  178. break;
  179. case "'":
  180. if ($currentState == SAXY_STATE_ATTR_VALUE) {
  181. if ($quoteType == '') {
  182. $quoteType = "'";
  183. }
  184. else {
  185. if ($quoteType == $currentChar) {
  186. // Joomla! hack
  187. if (isset( $this ) && $this->convertEntities && $maybeEntity) {
  188. $valueDump = strtr($valueDump, $this->predefinedEntities);
  189. $valueDump = strtr($valueDump, $this->definedEntities);
  190. }
  191. $keyDump = trim($keyDump);
  192. $attrArray[$keyDump] = $valueDump;
  193. $keyDump = $valueDump = $quoteType = '';
  194. $currentState = SAXY_STATE_ATTR_NONE;
  195. }
  196. else {
  197. $valueDump .= $currentChar;
  198. }
  199. }
  200. }
  201. break;
  202. case '&':
  203. //might be an entity
  204. $maybeEntity = true;
  205. $valueDump .= $currentChar;
  206. break;
  207. default:
  208. if ($currentState == SAXY_STATE_ATTR_KEY) {
  209. $keyDump .= $currentChar;
  210. }
  211. else {
  212. $valueDump .= $currentChar;
  213. }
  214. }
  215. }
  216. return $attrArray;
  217. } //parseAttributes
  218. /**
  219. * Parses character data
  220. * @param string The character data
  221. */
  222. function parseBetweenTags($betweenTagText) {
  223. if (trim($betweenTagText) != ''){
  224. $this->fireCharacterDataEvent($betweenTagText);
  225. }
  226. } //parseBetweenTags
  227. /**
  228. * Fires a start element event
  229. * @param string The start element tag name
  230. * @param Array The start element attributes
  231. */
  232. function fireStartElementEvent($tagName, $attributes) {
  233. call_user_func($this->startElementHandler, $this, $tagName, $attributes);
  234. } //fireStartElementEvent
  235. /**
  236. * Fires an end element event
  237. * @param string The end element tag name
  238. */
  239. function fireEndElementEvent($tagName) {
  240. call_user_func($this->endElementHandler, $this, $tagName);
  241. } //fireEndElementEvent
  242. /**
  243. * Fires a character data event
  244. * @param string The character data
  245. */
  246. function fireCharacterDataEvent($data) {
  247. if ($this->convertEntities && ((strpos($data, "&") != -1))) {
  248. $data = strtr($data, $this->predefinedEntities);
  249. $data = strtr($data, $this->definedEntities);
  250. }
  251. call_user_func($this->characterDataHandler, $this, $data);
  252. } //fireCharacterDataEvent
  253. /**
  254. * Fires a CDATA Section event
  255. * @param string The CDATA Section data
  256. */
  257. function fireCDataSectionEvent($data) {
  258. call_user_func($this->cDataSectionHandler, $this, $data);
  259. } //fireCDataSectionEvent
  260. } //SAXY_Parser_Base
  261. ?>