PageRenderTime 46ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/common/libraries/plugin/htmlpurifier/library/HTMLPurifier/EntityParser.php

https://bitbucket.org/chamilo/chamilo-dev/
PHP | 136 lines | 75 code | 15 blank | 46 comment | 6 complexity | 9336489e17aec894589c473c351bf604 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, LGPL-2.1, LGPL-3.0, GPL-3.0, MIT
  1. <?php
  2. // if want to implement error collecting here, we'll need to use some sort
  3. // of global data (probably trigger_error) because it's impossible to pass
  4. // $config or $context to the callback functions.
  5. /**
  6. * Handles referencing and derefencing character entities
  7. */
  8. class HTMLPurifier_EntityParser
  9. {
  10. /**
  11. * Reference to entity lookup table.
  12. */
  13. protected $_entity_lookup;
  14. /**
  15. * Callback regex string for parsing entities.
  16. */
  17. protected $_substituteEntitiesRegex = '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
  18. // 1. hex 2. dec 3. string (XML style)
  19. /**
  20. * Decimal to parsed string conversion table for special entities.
  21. */
  22. protected $_special_dec2str = array(34 => '"', 38 => '&', 39 => "'", 60 => '<', 62 => '>');
  23. /**
  24. * Stripped entity names to decimal conversion table for special entities.
  25. */
  26. protected $_special_ent2dec = array('quot' => 34, 'amp' => 38, 'lt' => 60, 'gt' => 62);
  27. /**
  28. * Substitutes non-special entities with their parsed equivalents. Since
  29. * running this whenever you have parsed character is t3h 5uck, we run
  30. * it before everything else.
  31. *
  32. * @param $string String to have non-special entities parsed.
  33. * @returns Parsed string.
  34. */
  35. public function substituteNonSpecialEntities($string)
  36. {
  37. // it will try to detect missing semicolons, but don't rely on it
  38. return preg_replace_callback($this->_substituteEntitiesRegex, array($this, 'nonSpecialEntityCallback'), $string);
  39. }
  40. /**
  41. * Callback function for substituteNonSpecialEntities() that does the work.
  42. *
  43. * @param $matches PCRE matches array, with 0 the entire match, and
  44. * either index 1, 2 or 3 set with a hex value, dec value,
  45. * or string (respectively).
  46. * @returns Replacement string.
  47. */
  48. protected function nonSpecialEntityCallback($matches)
  49. {
  50. // replaces all but big five
  51. $entity = $matches[0];
  52. $is_num = (@$matches[0][1] === '#');
  53. if ($is_num)
  54. {
  55. $is_hex = (@$entity[2] === 'x');
  56. $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
  57. // abort for special characters
  58. if (isset($this->_special_dec2str[$code]))
  59. return $entity;
  60. return HTMLPurifier_Encoder :: unichr($code);
  61. }
  62. else
  63. {
  64. if (isset($this->_special_ent2dec[$matches[3]]))
  65. return $entity;
  66. if (! $this->_entity_lookup)
  67. {
  68. $this->_entity_lookup = HTMLPurifier_EntityLookup :: instance();
  69. }
  70. if (isset($this->_entity_lookup->table[$matches[3]]))
  71. {
  72. return $this->_entity_lookup->table[$matches[3]];
  73. }
  74. else
  75. {
  76. return $entity;
  77. }
  78. }
  79. }
  80. /**
  81. * Substitutes only special entities with their parsed equivalents.
  82. *
  83. * @notice We try to avoid calling this function because otherwise, it
  84. * would have to be called a lot (for every parsed section).
  85. *
  86. * @param $string String to have non-special entities parsed.
  87. * @returns Parsed string.
  88. */
  89. public function substituteSpecialEntities($string)
  90. {
  91. return preg_replace_callback($this->_substituteEntitiesRegex, array($this, 'specialEntityCallback'), $string);
  92. }
  93. /**
  94. * Callback function for substituteSpecialEntities() that does the work.
  95. *
  96. * This callback has same syntax as nonSpecialEntityCallback().
  97. *
  98. * @param $matches PCRE-style matches array, with 0 the entire match, and
  99. * either index 1, 2 or 3 set with a hex value, dec value,
  100. * or string (respectively).
  101. * @returns Replacement string.
  102. */
  103. protected function specialEntityCallback($matches)
  104. {
  105. $entity = $matches[0];
  106. $is_num = (@$matches[0][1] === '#');
  107. if ($is_num)
  108. {
  109. $is_hex = (@$entity[2] === 'x');
  110. $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
  111. return isset($this->_special_dec2str[$int]) ? $this->_special_dec2str[$int] : $entity;
  112. }
  113. else
  114. {
  115. return isset($this->_special_ent2dec[$matches[3]]) ? $this->_special_ent2dec[$matches[3]] : $entity;
  116. }
  117. }
  118. }
  119. // vim: et sw=4 sts=4