PageRenderTime 44ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/moodle/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php

https://bitbucket.org/geek745/moodle-db2
PHP | 492 lines | 301 code | 69 blank | 122 comment | 34 complexity | 662bc0f7c029f96650a2d8bbd65919f0 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, BSD-3-Clause, LGPL-2.0
  1. <?php
  2. require_once 'HTMLPurifier/HTMLModule.php';
  3. require_once 'HTMLPurifier/ElementDef.php';
  4. require_once 'HTMLPurifier/Doctype.php';
  5. require_once 'HTMLPurifier/DoctypeRegistry.php';
  6. require_once 'HTMLPurifier/ContentSets.php';
  7. require_once 'HTMLPurifier/AttrTypes.php';
  8. require_once 'HTMLPurifier/AttrCollections.php';
  9. require_once 'HTMLPurifier/AttrDef.php';
  10. require_once 'HTMLPurifier/AttrDef/Enum.php';
  11. // W3C modules
  12. require_once 'HTMLPurifier/HTMLModule/CommonAttributes.php';
  13. require_once 'HTMLPurifier/HTMLModule/Text.php';
  14. require_once 'HTMLPurifier/HTMLModule/Hypertext.php';
  15. require_once 'HTMLPurifier/HTMLModule/List.php';
  16. require_once 'HTMLPurifier/HTMLModule/Presentation.php';
  17. require_once 'HTMLPurifier/HTMLModule/Edit.php';
  18. require_once 'HTMLPurifier/HTMLModule/Bdo.php';
  19. require_once 'HTMLPurifier/HTMLModule/Tables.php';
  20. require_once 'HTMLPurifier/HTMLModule/Image.php';
  21. require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php';
  22. require_once 'HTMLPurifier/HTMLModule/Legacy.php';
  23. require_once 'HTMLPurifier/HTMLModule/Target.php';
  24. require_once 'HTMLPurifier/HTMLModule/Scripting.php';
  25. require_once 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
  26. require_once 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
  27. require_once 'HTMLPurifier/HTMLModule/Ruby.php';
  28. require_once 'HTMLPurifier/HTMLModule/Object.php';
  29. // tidy modules
  30. require_once 'HTMLPurifier/HTMLModule/Tidy.php';
  31. require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
  32. require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
  33. require_once 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
  34. HTMLPurifier_ConfigSchema::define(
  35. 'HTML', 'Doctype', '', 'string',
  36. 'Doctype to use during filtering. '.
  37. 'Technically speaking this is not actually a doctype (as it does '.
  38. 'not identify a corresponding DTD), but we are using this name '.
  39. 'for sake of simplicity. When non-blank, this will override any older directives '.
  40. 'like %HTML.XHTML or %HTML.Strict.'
  41. );
  42. HTMLPurifier_ConfigSchema::defineAllowedValues('HTML', 'Doctype', array(
  43. '', 'HTML 4.01 Transitional', 'HTML 4.01 Strict',
  44. 'XHTML 1.0 Transitional', 'XHTML 1.0 Strict',
  45. 'XHTML 1.1'
  46. ));
  47. HTMLPurifier_ConfigSchema::define(
  48. 'HTML', 'CustomDoctype', null, 'string/null',
  49. '
  50. A custom doctype for power-users who defined there own document
  51. type. This directive only applies when %HTML.Doctype is blank.
  52. This directive has been available since 2.0.1.
  53. '
  54. );
  55. HTMLPurifier_ConfigSchema::define(
  56. 'HTML', 'Trusted', false, 'bool',
  57. 'Indicates whether or not the user input is trusted or not. If the '.
  58. 'input is trusted, a more expansive set of allowed tags and attributes '.
  59. 'will be used. This directive has been available since 2.0.0.'
  60. );
  61. HTMLPurifier_ConfigSchema::define(
  62. 'HTML', 'AllowedModules', null, 'lookup/null', '
  63. <p>
  64. A doctype comes with a set of usual modules to use. Without having
  65. to mucking about with the doctypes, you can quickly activate or
  66. disable these modules by specifying which modules you wish to allow
  67. with this directive. This is most useful for unit testing specific
  68. modules, although end users may find it useful for their own ends.
  69. </p>
  70. <p>
  71. If you specify a module that does not exist, the manager will silently
  72. fail to use it, so be careful! User-defined modules are not affected
  73. by this directive. Modules defined in %HTML.CoreModules are not
  74. affected by this directive. This directive has been available since 2.0.0.
  75. </p>
  76. ');
  77. HTMLPurifier_ConfigSchema::define(
  78. 'HTML', 'CoreModules', array(
  79. 'Structure' => true,
  80. 'Text' => true,
  81. 'Hypertext' => true,
  82. 'List' => true,
  83. 'NonXMLCommonAttributes' => true,
  84. 'XMLCommonAttributes' => true,
  85. 'CommonAttributes' => true
  86. ), 'lookup', '
  87. <p>
  88. Certain modularized doctypes (XHTML, namely), have certain modules
  89. that must be included for the doctype to be an conforming document
  90. type: put those modules here. By default, XHTML\'s core modules
  91. are used. You can set this to a blank array to disable core module
  92. protection, but this is not recommended. This directive has been
  93. available since 2.0.0.
  94. </p>
  95. ');
  96. class HTMLPurifier_HTMLModuleManager
  97. {
  98. /**
  99. * Instance of HTMLPurifier_DoctypeRegistry
  100. * @public
  101. */
  102. var $doctypes;
  103. /**
  104. * Instance of current doctype
  105. * @public
  106. */
  107. var $doctype;
  108. /**
  109. * Instance of HTMLPurifier_AttrTypes
  110. * @public
  111. */
  112. var $attrTypes;
  113. /**
  114. * Active instances of modules for the specified doctype are
  115. * indexed, by name, in this array.
  116. */
  117. var $modules = array();
  118. /**
  119. * Array of recognized HTMLPurifier_Module instances, indexed by
  120. * module's class name. This array is usually lazy loaded, but a
  121. * user can overload a module by pre-emptively registering it.
  122. */
  123. var $registeredModules = array();
  124. /**
  125. * List of extra modules that were added by the user using addModule().
  126. * These get unconditionally merged into the current doctype, whatever
  127. * it may be.
  128. */
  129. var $userModules = array();
  130. /**
  131. * Associative array of element name to list of modules that have
  132. * definitions for the element; this array is dynamically filled.
  133. */
  134. var $elementLookup = array();
  135. /** List of prefixes we should use for registering small names */
  136. var $prefixes = array('HTMLPurifier_HTMLModule_');
  137. var $contentSets; /**< Instance of HTMLPurifier_ContentSets */
  138. var $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
  139. /** If set to true, unsafe elements and attributes will be allowed */
  140. var $trusted = false;
  141. function HTMLPurifier_HTMLModuleManager() {
  142. // editable internal objects
  143. $this->attrTypes = new HTMLPurifier_AttrTypes();
  144. $this->doctypes = new HTMLPurifier_DoctypeRegistry();
  145. // setup default HTML doctypes
  146. // module reuse
  147. $common = array(
  148. 'CommonAttributes', 'Text', 'Hypertext', 'List',
  149. 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
  150. 'StyleAttribute', 'Scripting', 'Object'
  151. );
  152. $transitional = array('Legacy', 'Target');
  153. $xml = array('XMLCommonAttributes');
  154. $non_xml = array('NonXMLCommonAttributes');
  155. $this->doctypes->register(
  156. 'HTML 4.01 Transitional', false,
  157. array_merge($common, $transitional, $non_xml),
  158. array('Tidy_Transitional', 'Tidy_Proprietary'),
  159. array(),
  160. '-//W3C//DTD HTML 4.01 Transitional//EN',
  161. 'http://www.w3.org/TR/html4/loose.dtd'
  162. );
  163. $this->doctypes->register(
  164. 'HTML 4.01 Strict', false,
  165. array_merge($common, $non_xml),
  166. array('Tidy_Strict', 'Tidy_Proprietary'),
  167. array(),
  168. '-//W3C//DTD HTML 4.01//EN',
  169. 'http://www.w3.org/TR/html4/strict.dtd'
  170. );
  171. $this->doctypes->register(
  172. 'XHTML 1.0 Transitional', true,
  173. array_merge($common, $transitional, $xml, $non_xml),
  174. array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary'),
  175. array(),
  176. '-//W3C//DTD XHTML 1.0 Transitional//EN',
  177. 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
  178. );
  179. $this->doctypes->register(
  180. 'XHTML 1.0 Strict', true,
  181. array_merge($common, $xml, $non_xml),
  182. array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary'),
  183. array(),
  184. '-//W3C//DTD XHTML 1.0 Strict//EN',
  185. 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
  186. );
  187. $this->doctypes->register(
  188. 'XHTML 1.1', true,
  189. array_merge($common, $xml, array('Ruby')),
  190. array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict'), // Tidy_XHTML1_1
  191. array(),
  192. '-//W3C//DTD XHTML 1.1//EN',
  193. 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
  194. );
  195. }
  196. /**
  197. * Registers a module to the recognized module list, useful for
  198. * overloading pre-existing modules.
  199. * @param $module Mixed: string module name, with or without
  200. * HTMLPurifier_HTMLModule prefix, or instance of
  201. * subclass of HTMLPurifier_HTMLModule.
  202. * @note This function will not call autoload, you must instantiate
  203. * (and thus invoke) autoload outside the method.
  204. * @note If a string is passed as a module name, different variants
  205. * will be tested in this order:
  206. * - Check for HTMLPurifier_HTMLModule_$name
  207. * - Check all prefixes with $name in order they were added
  208. * - Check for literal object name
  209. * - Throw fatal error
  210. * If your object name collides with an internal class, specify
  211. * your module manually. All modules must have been included
  212. * externally: registerModule will not perform inclusions for you!
  213. * @warning If your module has the same name as an already loaded
  214. * module, your module will overload the old one WITHOUT
  215. * warning.
  216. */
  217. function registerModule($module) {
  218. if (is_string($module)) {
  219. // attempt to load the module
  220. $original_module = $module;
  221. $ok = false;
  222. foreach ($this->prefixes as $prefix) {
  223. $module = $prefix . $original_module;
  224. if ($this->_classExists($module)) {
  225. $ok = true;
  226. break;
  227. }
  228. }
  229. if (!$ok) {
  230. $module = $original_module;
  231. if (!$this->_classExists($module)) {
  232. trigger_error($original_module . ' module does not exist',
  233. E_USER_ERROR);
  234. return;
  235. }
  236. }
  237. $module = new $module();
  238. }
  239. if (empty($module->name)) {
  240. trigger_error('Module instance of ' . get_class($module) . ' must have name');
  241. return;
  242. }
  243. $this->registeredModules[$module->name] = $module;
  244. }
  245. /**
  246. * Safely tests for class existence without invoking __autoload in PHP5
  247. * or greater.
  248. * @param $name String class name to test
  249. * @note If any other class needs it, we'll need to stash in a
  250. * conjectured "compatibility" class
  251. * @private
  252. */
  253. function _classExists($name) {
  254. static $is_php_4 = null;
  255. if ($is_php_4 === null) {
  256. $is_php_4 = version_compare(PHP_VERSION, '5', '<');
  257. }
  258. if ($is_php_4) {
  259. return class_exists($name);
  260. } else {
  261. return class_exists($name, false);
  262. }
  263. }
  264. /**
  265. * Adds a module to the current doctype by first registering it,
  266. * and then tacking it on to the active doctype
  267. */
  268. function addModule($module) {
  269. $this->registerModule($module);
  270. if (is_object($module)) $module = $module->name;
  271. $this->userModules[] = $module;
  272. }
  273. /**
  274. * Adds a class prefix that registerModule() will use to resolve a
  275. * string name to a concrete class
  276. */
  277. function addPrefix($prefix) {
  278. $this->prefixes[] = $prefix;
  279. }
  280. /**
  281. * Performs processing on modules, after being called you may
  282. * use getElement() and getElements()
  283. * @param $config Instance of HTMLPurifier_Config
  284. */
  285. function setup($config) {
  286. $this->trusted = $config->get('HTML', 'Trusted');
  287. // generate
  288. $this->doctype = $this->doctypes->make($config);
  289. $modules = $this->doctype->modules;
  290. // take out the default modules that aren't allowed
  291. $lookup = $config->get('HTML', 'AllowedModules');
  292. $special_cases = $config->get('HTML', 'CoreModules');
  293. if (is_array($lookup)) {
  294. foreach ($modules as $k => $m) {
  295. if (isset($special_cases[$m])) continue;
  296. if (!isset($lookup[$m])) unset($modules[$k]);
  297. }
  298. }
  299. // merge in custom modules
  300. $modules = array_merge($modules, $this->userModules);
  301. foreach ($modules as $module) {
  302. $this->processModule($module);
  303. $this->modules[$module]->setup($config);
  304. }
  305. foreach ($this->doctype->tidyModules as $module) {
  306. $this->processModule($module);
  307. $this->modules[$module]->setup($config);
  308. }
  309. // setup lookup table based on all valid modules
  310. foreach ($this->modules as $module) {
  311. foreach ($module->info as $name => $def) {
  312. if (!isset($this->elementLookup[$name])) {
  313. $this->elementLookup[$name] = array();
  314. }
  315. $this->elementLookup[$name][] = $module->name;
  316. }
  317. }
  318. // note the different choice
  319. $this->contentSets = new HTMLPurifier_ContentSets(
  320. // content set assembly deals with all possible modules,
  321. // not just ones deemed to be "safe"
  322. $this->modules
  323. );
  324. $this->attrCollections = new HTMLPurifier_AttrCollections(
  325. $this->attrTypes,
  326. // there is no way to directly disable a global attribute,
  327. // but using AllowedAttributes or simply not including
  328. // the module in your custom doctype should be sufficient
  329. $this->modules
  330. );
  331. }
  332. /**
  333. * Takes a module and adds it to the active module collection,
  334. * registering it if necessary.
  335. */
  336. function processModule($module) {
  337. if (!isset($this->registeredModules[$module]) || is_object($module)) {
  338. $this->registerModule($module);
  339. }
  340. $this->modules[$module] = $this->registeredModules[$module];
  341. }
  342. /**
  343. * Retrieves merged element definitions.
  344. * @return Array of HTMLPurifier_ElementDef
  345. */
  346. function getElements() {
  347. $elements = array();
  348. foreach ($this->modules as $module) {
  349. foreach ($module->info as $name => $v) {
  350. if (isset($elements[$name])) continue;
  351. // if element is not safe, don't use it
  352. if (!$this->trusted && ($v->safe === false)) continue;
  353. $elements[$name] = $this->getElement($name);
  354. }
  355. }
  356. // remove dud elements, this happens when an element that
  357. // appeared to be safe actually wasn't
  358. foreach ($elements as $n => $v) {
  359. if ($v === false) unset($elements[$n]);
  360. }
  361. return $elements;
  362. }
  363. /**
  364. * Retrieves a single merged element definition
  365. * @param $name Name of element
  366. * @param $trusted Boolean trusted overriding parameter: set to true
  367. * if you want the full version of an element
  368. * @return Merged HTMLPurifier_ElementDef
  369. */
  370. function getElement($name, $trusted = null) {
  371. $def = false;
  372. if ($trusted === null) $trusted = $this->trusted;
  373. $modules = $this->modules;
  374. if (!isset($this->elementLookup[$name])) {
  375. return false;
  376. }
  377. foreach($this->elementLookup[$name] as $module_name) {
  378. $module = $modules[$module_name];
  379. // copy is used because, ideally speaking, the original
  380. // definition should not be modified. Usually, this will
  381. // make no difference, but for consistency's sake
  382. $new_def = $module->info[$name]->copy();
  383. // refuse to create/merge in a definition that is deemed unsafe
  384. if (!$trusted && ($new_def->safe === false)) {
  385. $def = false;
  386. continue;
  387. }
  388. if (!$def && $new_def->standalone) {
  389. // element with unknown safety is not to be trusted.
  390. // however, a merge-in definition with undefined safety
  391. // is fine
  392. if (!$trusted && !$new_def->safe) continue;
  393. $def = $new_def;
  394. } elseif ($def) {
  395. $def->mergeIn($new_def);
  396. } else {
  397. // could "save it for another day":
  398. // non-standalone definitions that don't have a standalone
  399. // to merge into could be deferred to the end
  400. continue;
  401. }
  402. // attribute value expansions
  403. $this->attrCollections->performInclusions($def->attr);
  404. $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
  405. // descendants_are_inline, for ChildDef_Chameleon
  406. if (is_string($def->content_model) &&
  407. strpos($def->content_model, 'Inline') !== false) {
  408. if ($name != 'del' && $name != 'ins') {
  409. // this is for you, ins/del
  410. $def->descendants_are_inline = true;
  411. }
  412. }
  413. $this->contentSets->generateChildDef($def, $module);
  414. }
  415. // add information on required attributes
  416. foreach ($def->attr as $attr_name => $attr_def) {
  417. if ($attr_def->required) {
  418. $def->required_attr[] = $attr_name;
  419. }
  420. }
  421. return $def;
  422. }
  423. }