/2010/plugins/editors/rokpad/lib/htmlpurifier-2.1.5/HTMLPurifier/EntityParser.php

https://bitbucket.org/elijahvsjesus/tandava · PHP · 156 lines · 88 code · 15 blank · 53 comment · 9 complexity · 37a338b76f4ee5cc111aca151dd0b227 MD5 · raw file

  1. <?php
  2. require_once 'HTMLPurifier/EntityLookup.php';
  3. require_once 'HTMLPurifier/Encoder.php';
  4. // if want to implement error collecting here, we'll need to use some sort
  5. // of global data (probably trigger_error) because it's impossible to pass
  6. // $config or $context to the callback functions.
  7. /**
  8. * Handles referencing and derefencing character entities
  9. */
  10. class HTMLPurifier_EntityParser
  11. {
  12. /**
  13. * Reference to entity lookup table.
  14. * @protected
  15. */
  16. var $_entity_lookup;
  17. /**
  18. * Callback regex string for parsing entities.
  19. * @protected
  20. */
  21. var $_substituteEntitiesRegex =
  22. '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
  23. // 1. hex 2. dec 3. string (XML style)
  24. /**
  25. * Decimal to parsed string conversion table for special entities.
  26. * @protected
  27. */
  28. var $_special_dec2str =
  29. array(
  30. 34 => '"',
  31. 38 => '&',
  32. 39 => "'",
  33. 60 => '<',
  34. 62 => '>'
  35. );
  36. /**
  37. * Stripped entity names to decimal conversion table for special entities.
  38. * @protected
  39. */
  40. var $_special_ent2dec =
  41. array(
  42. 'quot' => 34,
  43. 'amp' => 38,
  44. 'lt' => 60,
  45. 'gt' => 62
  46. );
  47. /**
  48. * Substitutes non-special entities with their parsed equivalents. Since
  49. * running this whenever you have parsed character is t3h 5uck, we run
  50. * it before everything else.
  51. *
  52. * @protected
  53. * @param $string String to have non-special entities parsed.
  54. * @returns Parsed string.
  55. */
  56. function substituteNonSpecialEntities($string) {
  57. // it will try to detect missing semicolons, but don't rely on it
  58. return preg_replace_callback(
  59. $this->_substituteEntitiesRegex,
  60. array($this, 'nonSpecialEntityCallback'),
  61. $string
  62. );
  63. }
  64. /**
  65. * Callback function for substituteNonSpecialEntities() that does the work.
  66. *
  67. * @warning Though this is public in order to let the callback happen,
  68. * calling it directly is not recommended.
  69. * @param $matches PCRE matches array, with 0 the entire match, and
  70. * either index 1, 2 or 3 set with a hex value, dec value,
  71. * or string (respectively).
  72. * @returns Replacement string.
  73. */
  74. function nonSpecialEntityCallback($matches) {
  75. // replaces all but big five
  76. $entity = $matches[0];
  77. $is_num = (@$matches[0][1] === '#');
  78. if ($is_num) {
  79. $is_hex = (@$entity[2] === 'x');
  80. $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
  81. // abort for special characters
  82. if (isset($this->_special_dec2str[$code])) return $entity;
  83. return HTMLPurifier_Encoder::unichr($code);
  84. } else {
  85. if (isset($this->_special_ent2dec[$matches[3]])) return $entity;
  86. if (!$this->_entity_lookup) {
  87. $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
  88. }
  89. if (isset($this->_entity_lookup->table[$matches[3]])) {
  90. return $this->_entity_lookup->table[$matches[3]];
  91. } else {
  92. return $entity;
  93. }
  94. }
  95. }
  96. /**
  97. * Substitutes only special entities with their parsed equivalents.
  98. *
  99. * @notice We try to avoid calling this function because otherwise, it
  100. * would have to be called a lot (for every parsed section).
  101. *
  102. * @protected
  103. * @param $string String to have non-special entities parsed.
  104. * @returns Parsed string.
  105. */
  106. function substituteSpecialEntities($string) {
  107. return preg_replace_callback(
  108. $this->_substituteEntitiesRegex,
  109. array($this, 'specialEntityCallback'),
  110. $string);
  111. }
  112. /**
  113. * Callback function for substituteSpecialEntities() that does the work.
  114. *
  115. * This callback has same syntax as nonSpecialEntityCallback().
  116. *
  117. * @warning Though this is public in order to let the callback happen,
  118. * calling it directly is not recommended.
  119. * @param $matches PCRE-style matches array, with 0 the entire match, and
  120. * either index 1, 2 or 3 set with a hex value, dec value,
  121. * or string (respectively).
  122. * @returns Replacement string.
  123. */
  124. function specialEntityCallback($matches) {
  125. $entity = $matches[0];
  126. $is_num = (@$matches[0][1] === '#');
  127. if ($is_num) {
  128. $is_hex = (@$entity[2] === 'x');
  129. $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
  130. return isset($this->_special_dec2str[$int]) ?
  131. $this->_special_dec2str[$int] :
  132. $entity;
  133. } else {
  134. return isset($this->_special_ent2dec[$matches[3]]) ?
  135. $this->_special_ent2dec[$matches[3]] :
  136. $entity;
  137. }
  138. }
  139. }