PageRenderTime 49ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/moodle/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php

https://bitbucket.org/geek745/moodle-db2
PHP | 475 lines | 295 code | 60 blank | 120 comment | 32 complexity | 14b400c41cca6131ec1048a953cb6d2d MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, BSD-3-Clause, LGPL-2.0
  1. <?php
  2. require_once 'HTMLPurifier/Definition.php';
  3. require_once 'HTMLPurifier/HTMLModuleManager.php';
  4. // this definition and its modules MUST NOT define configuration directives
  5. // outside of the HTML or Attr namespaces
  6. HTMLPurifier_ConfigSchema::define(
  7. 'HTML', 'DefinitionID', null, 'string/null', '
  8. <p>
  9. Unique identifier for a custom-built HTML definition. If you edit
  10. the raw version of the HTMLDefinition, introducing changes that the
  11. configuration object does not reflect, you must specify this variable.
  12. If you change your custom edits, you should change this directive, or
  13. clear your cache. Example:
  14. </p>
  15. <pre>
  16. $config = HTMLPurifier_Config::createDefault();
  17. $config->set(\'HTML\', \'DefinitionID\', \'1\');
  18. $def = $config->getHTMLDefinition();
  19. $def->addAttribute(\'a\', \'tabindex\', \'Number\');
  20. </pre>
  21. <p>
  22. In the above example, the configuration is still at the defaults, but
  23. using the advanced API, an extra attribute has been added. The
  24. configuration object normally has no way of knowing that this change
  25. has taken place, so it needs an extra directive: %HTML.DefinitionID.
  26. If someone else attempts to use the default configuration, these two
  27. pieces of code will not clobber each other in the cache, since one has
  28. an extra directive attached to it.
  29. </p>
  30. <p>
  31. This directive has been available since 2.0.0, and in that version or
  32. later you <em>must</em> specify a value to this directive to use the
  33. advanced API features.
  34. </p>
  35. ');
  36. HTMLPurifier_ConfigSchema::define(
  37. 'HTML', 'DefinitionRev', 1, 'int', '
  38. <p>
  39. Revision identifier for your custom definition specified in
  40. %HTML.DefinitionID. This serves the same purpose: uniquely identifying
  41. your custom definition, but this one does so in a chronological
  42. context: revision 3 is more up-to-date then revision 2. Thus, when
  43. this gets incremented, the cache handling is smart enough to clean
  44. up any older revisions of your definition as well as flush the
  45. cache. This directive has been available since 2.0.0.
  46. </p>
  47. ');
  48. HTMLPurifier_ConfigSchema::define(
  49. 'HTML', 'BlockWrapper', 'p', 'string', '
  50. <p>
  51. String name of element to wrap inline elements that are inside a block
  52. context. This only occurs in the children of blockquote in strict mode.
  53. </p>
  54. <p>
  55. Example: by default value,
  56. <code>&lt;blockquote&gt;Foo&lt;/blockquote&gt;</code> would become
  57. <code>&lt;blockquote&gt;&lt;p&gt;Foo&lt;/p&gt;&lt;/blockquote&gt;</code>.
  58. The <code>&lt;p&gt;</code> tags can be replaced with whatever you desire,
  59. as long as it is a block level element. This directive has been available
  60. since 1.3.0.
  61. </p>
  62. ');
  63. HTMLPurifier_ConfigSchema::define(
  64. 'HTML', 'Parent', 'div', 'string', '
  65. <p>
  66. String name of element that HTML fragment passed to library will be
  67. inserted in. An interesting variation would be using span as the
  68. parent element, meaning that only inline tags would be allowed.
  69. This directive has been available since 1.3.0.
  70. </p>
  71. ');
  72. HTMLPurifier_ConfigSchema::define(
  73. 'HTML', 'AllowedElements', null, 'lookup/null', '
  74. <p>
  75. If HTML Purifier\'s tag set is unsatisfactory for your needs, you
  76. can overload it with your own list of tags to allow. Note that this
  77. method is subtractive: it does its job by taking away from HTML Purifier
  78. usual feature set, so you cannot add a tag that HTML Purifier never
  79. supported in the first place (like embed, form or head). If you
  80. change this, you probably also want to change %HTML.AllowedAttributes.
  81. </p>
  82. <p>
  83. <strong>Warning:</strong> If another directive conflicts with the
  84. elements here, <em>that</em> directive will win and override.
  85. This directive has been available since 1.3.0.
  86. </p>
  87. ');
  88. HTMLPurifier_ConfigSchema::define(
  89. 'HTML', 'AllowedAttributes', null, 'lookup/null', '
  90. <p>
  91. If HTML Purifier\'s attribute set is unsatisfactory, overload it!
  92. The syntax is "tag.attr" or "*.attr" for the global attributes
  93. (style, id, class, dir, lang, xml:lang).
  94. </p>
  95. <p>
  96. <strong>Warning:</strong> If another directive conflicts with the
  97. elements here, <em>that</em> directive will win and override. For
  98. example, %HTML.EnableAttrID will take precedence over *.id in this
  99. directive. You must set that directive to true before you can use
  100. IDs at all. This directive has been available since 1.3.0.
  101. </p>
  102. ');
  103. HTMLPurifier_ConfigSchema::define(
  104. 'HTML', 'Allowed', null, 'itext/null', '
  105. <p>
  106. This is a convenience directive that rolls the functionality of
  107. %HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
  108. Specify elements and attributes that are allowed using:
  109. <code>element1[attr1|attr2],element2...</code>. You can also use
  110. newlines instead of commas to separate elements.
  111. </p>
  112. <p>
  113. <strong>Warning</strong>:
  114. All of the constraints on the component directives are still enforced.
  115. The syntax is a <em>subset</em> of TinyMCE\'s <code>valid_elements</code>
  116. whitelist: directly copy-pasting it here will probably result in
  117. broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes
  118. are set, this directive has no effect.
  119. This directive has been available since 2.0.0.
  120. </p>
  121. ');
  122. /**
  123. * Definition of the purified HTML that describes allowed children,
  124. * attributes, and many other things.
  125. *
  126. * Conventions:
  127. *
  128. * All member variables that are prefixed with info
  129. * (including the main $info array) are used by HTML Purifier internals
  130. * and should not be directly edited when customizing the HTMLDefinition.
  131. * They can usually be set via configuration directives or custom
  132. * modules.
  133. *
  134. * On the other hand, member variables without the info prefix are used
  135. * internally by the HTMLDefinition and MUST NOT be used by other HTML
  136. * Purifier internals. Many of them, however, are public, and may be
  137. * edited by userspace code to tweak the behavior of HTMLDefinition.
  138. *
  139. * @note This class is inspected by Printer_HTMLDefinition; please
  140. * update that class if things here change.
  141. */
  142. class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
  143. {
  144. // FULLY-PUBLIC VARIABLES ---------------------------------------------
  145. /**
  146. * Associative array of element names to HTMLPurifier_ElementDef
  147. * @public
  148. */
  149. var $info = array();
  150. /**
  151. * Associative array of global attribute name to attribute definition.
  152. * @public
  153. */
  154. var $info_global_attr = array();
  155. /**
  156. * String name of parent element HTML will be going into.
  157. * @public
  158. */
  159. var $info_parent = 'div';
  160. /**
  161. * Definition for parent element, allows parent element to be a
  162. * tag that's not allowed inside the HTML fragment.
  163. * @public
  164. */
  165. var $info_parent_def;
  166. /**
  167. * String name of element used to wrap inline elements in block context
  168. * @note This is rarely used except for BLOCKQUOTEs in strict mode
  169. * @public
  170. */
  171. var $info_block_wrapper = 'p';
  172. /**
  173. * Associative array of deprecated tag name to HTMLPurifier_TagTransform
  174. * @public
  175. */
  176. var $info_tag_transform = array();
  177. /**
  178. * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
  179. * @public
  180. */
  181. var $info_attr_transform_pre = array();
  182. /**
  183. * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
  184. * @public
  185. */
  186. var $info_attr_transform_post = array();
  187. /**
  188. * Nested lookup array of content set name (Block, Inline) to
  189. * element name to whether or not it belongs in that content set.
  190. * @public
  191. */
  192. var $info_content_sets = array();
  193. /**
  194. * Doctype object
  195. */
  196. var $doctype;
  197. // RAW CUSTOMIZATION STUFF --------------------------------------------
  198. /**
  199. * Adds a custom attribute to a pre-existing element
  200. * @note This is strictly convenience, and does not have a corresponding
  201. * method in HTMLPurifier_HTMLModule
  202. * @param $element_name String element name to add attribute to
  203. * @param $attr_name String name of attribute
  204. * @param $def Attribute definition, can be string or object, see
  205. * HTMLPurifier_AttrTypes for details
  206. */
  207. function addAttribute($element_name, $attr_name, $def) {
  208. $module =& $this->getAnonymousModule();
  209. if (!isset($module->info[$element_name])) {
  210. $element =& $module->addBlankElement($element_name);
  211. } else {
  212. $element =& $module->info[$element_name];
  213. }
  214. $element->attr[$attr_name] = $def;
  215. }
  216. /**
  217. * Adds a custom element to your HTML definition
  218. * @note See HTMLPurifier_HTMLModule::addElement for detailed
  219. * parameter and return value descriptions.
  220. */
  221. function &addElement($element_name, $type, $contents, $attr_collections, $attributes) {
  222. $module =& $this->getAnonymousModule();
  223. // assume that if the user is calling this, the element
  224. // is safe. This may not be a good idea
  225. $element =& $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
  226. return $element;
  227. }
  228. /**
  229. * Adds a blank element to your HTML definition, for overriding
  230. * existing behavior
  231. * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
  232. * parameter and return value descriptions.
  233. */
  234. function &addBlankElement($element_name) {
  235. $module =& $this->getAnonymousModule();
  236. $element =& $module->addBlankElement($element_name);
  237. return $element;
  238. }
  239. /**
  240. * Retrieves a reference to the anonymous module, so you can
  241. * bust out advanced features without having to make your own
  242. * module.
  243. */
  244. function &getAnonymousModule() {
  245. if (!$this->_anonModule) {
  246. $this->_anonModule = new HTMLPurifier_HTMLModule();
  247. $this->_anonModule->name = 'Anonymous';
  248. }
  249. return $this->_anonModule;
  250. }
  251. var $_anonModule;
  252. // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
  253. var $type = 'HTML';
  254. var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
  255. /**
  256. * Performs low-cost, preliminary initialization.
  257. */
  258. function HTMLPurifier_HTMLDefinition() {
  259. $this->manager = new HTMLPurifier_HTMLModuleManager();
  260. }
  261. function doSetup($config) {
  262. $this->processModules($config);
  263. $this->setupConfigStuff($config);
  264. unset($this->manager);
  265. // cleanup some of the element definitions
  266. foreach ($this->info as $k => $v) {
  267. unset($this->info[$k]->content_model);
  268. unset($this->info[$k]->content_model_type);
  269. }
  270. }
  271. /**
  272. * Extract out the information from the manager
  273. */
  274. function processModules($config) {
  275. if ($this->_anonModule) {
  276. // for user specific changes
  277. // this is late-loaded so we don't have to deal with PHP4
  278. // reference wonky-ness
  279. $this->manager->addModule($this->_anonModule);
  280. unset($this->_anonModule);
  281. }
  282. $this->manager->setup($config);
  283. $this->doctype = $this->manager->doctype;
  284. foreach ($this->manager->modules as $module) {
  285. foreach($module->info_tag_transform as $k => $v) {
  286. if ($v === false) unset($this->info_tag_transform[$k]);
  287. else $this->info_tag_transform[$k] = $v;
  288. }
  289. foreach($module->info_attr_transform_pre as $k => $v) {
  290. if ($v === false) unset($this->info_attr_transform_pre[$k]);
  291. else $this->info_attr_transform_pre[$k] = $v;
  292. }
  293. foreach($module->info_attr_transform_post as $k => $v) {
  294. if ($v === false) unset($this->info_attr_transform_post[$k]);
  295. else $this->info_attr_transform_post[$k] = $v;
  296. }
  297. }
  298. $this->info = $this->manager->getElements();
  299. $this->info_content_sets = $this->manager->contentSets->lookup;
  300. }
  301. /**
  302. * Sets up stuff based on config. We need a better way of doing this.
  303. */
  304. function setupConfigStuff($config) {
  305. $block_wrapper = $config->get('HTML', 'BlockWrapper');
  306. if (isset($this->info_content_sets['Block'][$block_wrapper])) {
  307. $this->info_block_wrapper = $block_wrapper;
  308. } else {
  309. trigger_error('Cannot use non-block element as block wrapper',
  310. E_USER_ERROR);
  311. }
  312. $parent = $config->get('HTML', 'Parent');
  313. $def = $this->manager->getElement($parent, true);
  314. if ($def) {
  315. $this->info_parent = $parent;
  316. $this->info_parent_def = $def;
  317. } else {
  318. trigger_error('Cannot use unrecognized element as parent',
  319. E_USER_ERROR);
  320. $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
  321. }
  322. // support template text
  323. $support = "(for information on implementing this, see the ".
  324. "support forums) ";
  325. // setup allowed elements
  326. $allowed_elements = $config->get('HTML', 'AllowedElements');
  327. $allowed_attributes = $config->get('HTML', 'AllowedAttributes');
  328. if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
  329. $allowed = $config->get('HTML', 'Allowed');
  330. if (is_string($allowed)) {
  331. list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
  332. }
  333. }
  334. if (is_array($allowed_elements)) {
  335. foreach ($this->info as $name => $d) {
  336. if(!isset($allowed_elements[$name])) unset($this->info[$name]);
  337. unset($allowed_elements[$name]);
  338. }
  339. // emit errors
  340. foreach ($allowed_elements as $element => $d) {
  341. $element = htmlspecialchars($element);
  342. trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
  343. }
  344. }
  345. $allowed_attributes_mutable = $allowed_attributes; // by copy!
  346. if (is_array($allowed_attributes)) {
  347. foreach ($this->info_global_attr as $attr_key => $info) {
  348. if (!isset($allowed_attributes["*.$attr_key"])) {
  349. unset($this->info_global_attr[$attr_key]);
  350. } elseif (isset($allowed_attributes_mutable["*.$attr_key"])) {
  351. unset($allowed_attributes_mutable["*.$attr_key"]);
  352. }
  353. }
  354. foreach ($this->info as $tag => $info) {
  355. foreach ($info->attr as $attr => $attr_info) {
  356. if (!isset($allowed_attributes["$tag.$attr"]) &&
  357. !isset($allowed_attributes["*.$attr"])) {
  358. unset($this->info[$tag]->attr[$attr]);
  359. } else {
  360. if (isset($allowed_attributes_mutable["$tag.$attr"])) {
  361. unset($allowed_attributes_mutable["$tag.$attr"]);
  362. } elseif (isset($allowed_attributes_mutable["*.$attr"])) {
  363. unset($allowed_attributes_mutable["*.$attr"]);
  364. }
  365. }
  366. }
  367. }
  368. // emit errors
  369. foreach ($allowed_attributes_mutable as $elattr => $d) {
  370. list($element, $attribute) = explode('.', $elattr);
  371. $element = htmlspecialchars($element);
  372. $attribute = htmlspecialchars($attribute);
  373. if ($element == '*') {
  374. trigger_error("Global attribute '$attribute' is not ".
  375. "supported in any elements $support",
  376. E_USER_WARNING);
  377. } else {
  378. trigger_error("Attribute '$attribute' in element '$element' not supported $support",
  379. E_USER_WARNING);
  380. }
  381. }
  382. }
  383. }
  384. /**
  385. * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
  386. * separate lists for processing. Format is element[attr1|attr2],element2...
  387. * @warning Although it's largely drawn from TinyMCE's implementation,
  388. * it is different, and you'll probably have to modify your lists
  389. * @param $list String list to parse
  390. * @param array($allowed_elements, $allowed_attributes)
  391. */
  392. function parseTinyMCEAllowedList($list) {
  393. $elements = array();
  394. $attributes = array();
  395. $chunks = preg_split('/(,|[\n\r]+)/', $list);
  396. foreach ($chunks as $chunk) {
  397. if (empty($chunk)) continue;
  398. // remove TinyMCE element control characters
  399. if (!strpos($chunk, '[')) {
  400. $element = $chunk;
  401. $attr = false;
  402. } else {
  403. list($element, $attr) = explode('[', $chunk);
  404. }
  405. if ($element !== '*') $elements[$element] = true;
  406. if (!$attr) continue;
  407. $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
  408. $attr = explode('|', $attr);
  409. foreach ($attr as $key) {
  410. $attributes["$element.$key"] = true;
  411. }
  412. }
  413. return array($elements, $attributes);
  414. }
  415. }