PageRenderTime 39ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/core/components/Filter.class.php

http://plant.googlecode.com/
PHP | 544 lines | 193 code | 90 blank | 261 comment | 35 complexity | 38d3cc2682bf0dd7c4d8d76c9f53e63f MD5 | raw file
Possible License(s): GPL-3.0
  1. <?php
  2. /**
  3. * Filter.class.php
  4. *
  5. * Groups all the standard Plant filters together in one file.
  6. * @package plant_core
  7. * @subpackage components
  8. */
  9. /**
  10. * Content Filtering Controller
  11. *
  12. * Takes any kind of string content and transforms or parses it into
  13. * other string content. Uses easily extensible FilterModels.
  14. *
  15. * @author Ivo Janssen <ivo@codedealers.com>
  16. * @copyright Copyright (c) 2008, Ivo Janssen
  17. * @license http://opensource.org/licenses/gpl-3.0.html GNU General Public License, version 3
  18. * @package plant_core
  19. * @subpackage components
  20. * @uses FILTER_DEFAULT_FILTERS Set a string with the default filters that need to be executed on every call to Filter::it() (set in config)
  21. * @uses FILTER_PREFIX Prefix applied to all filter classes (set in config)
  22. * @version 1.1
  23. */
  24. class Filter {
  25. /**
  26. * Main filter
  27. *
  28. * Call this with some input and any combination of comma-delimited filter names in
  29. * $filters. Filters must exist as Filter<name> and extend FilterModel. Filters will be
  30. * processed in order they're in in $filters.
  31. *
  32. * @param string $input The content to filter
  33. * @param string $filters A comma-delimited list of filters. Will be appended to FILTER_DEFAULT_FILTERS.
  34. * @param array $args Optional array of arguments in the form of array("filterName" => "argument1=value1,argument2=value2", "otherFilter" => "argument2=value2") etc.
  35. * @return string Filtered content
  36. * @uses FILTER_DEFAULT_FILTERS Set a string with the default filters that need to be executed on every call to Filter::it() (set in config)
  37. * @uses FilterModel::filter()
  38. * @uses config()
  39. * @uses get()
  40. * @uses stringToArray()
  41. */
  42. public static function it($input, $filters = "", $args = array()) {
  43. // Check arguments
  44. if (!is_string($input) && !is_numeric($input)) throw new Exception("Content to filter needs to be a string!");
  45. if (!is_string($filters)) throw new Exception("Filters needs to specified as a valid string or array!");
  46. if (!is_array($args)) throw new Exception("Filter arguments needs to be an associative array!");
  47. // Add default filters
  48. $filters = config("FILTER_DEFAULT_FILTERS") . "," . $filters;
  49. // Explode the filters into an array
  50. $filters = explode(",",$filters);
  51. // Execute every filter and feed the result to the next one
  52. foreach ($filters as $filterName) {
  53. if (trim($filterName) == "") continue;
  54. // Load the filter object
  55. $filter = Filter::get(trim($filterName));
  56. // Set arguments
  57. if (isset($args[$filterName])) {
  58. $args[$filterName] = Filter::stringToArray($args[$filterName]);
  59. foreach($args[$filterName] as $argKey => $argValue) {
  60. if (!isset($filter->$argKey)) $filter->$argKey = $argValue;
  61. }
  62. }
  63. // Filter it
  64. $input = $filter->filter($input);
  65. }
  66. return $input;
  67. }
  68. /**
  69. * String to array conversion
  70. *
  71. * String to array conversion for strings supplied like arg1=val1,arg2=val2,etc
  72. *
  73. * @param string $argString The formatted argument/value string
  74. * @return array Previous example becomes ("arg1" => "val1", "arg2" => "val2")
  75. */
  76. public static function stringToArray($argString) {
  77. // Exit if array already
  78. if (is_array($argString)) return $argString;
  79. $argArray = array();
  80. // Split on comma and convert
  81. foreach(explode(",",$argString) as $argument) {
  82. // Continue if the attribute is empty
  83. if (trim($argument) == "") continue;
  84. // Check for syntax
  85. if (!stripos($argument, "=")) throw new Exception("Arguments must be in the form of 'arg1=val1,arg2=val2'!");
  86. // Add to arg array
  87. $argKey = substr($argument, 0, strpos($argument, "="));
  88. $argValue = substr($argument, strpos($argument, "=") + 1);
  89. $argArray[$argKey] = $argValue;
  90. }
  91. return $argArray;
  92. }
  93. /**
  94. * Specific filter retrieval
  95. *
  96. * Provided a filter name (EG 'isutf8'), will look for a corresponding FilterModel named <FILTER_PREFIX>isutf8
  97. *
  98. * @param string $filterName The name of the filter to get
  99. * @return FilterModel Found filter
  100. * @uses FILTER_PREFIX Prefix applied to all filter classes (set in config)
  101. * @uses config()
  102. */
  103. private static function get($filterName) {
  104. // Check arguments
  105. if (!is_string($filterName) || !$filterName) throw new Exception("Name of filter must be a valid string!");
  106. // Check if the filter exists
  107. $filterClass = config("FILTER_PREFIX") . ucfirst(strtolower($filterName));
  108. if (!class_exists($filterClass)) throw new Exception("Filter '" . $filterName . "' does not exist!");
  109. // Return it
  110. return new $filterClass;
  111. }
  112. }
  113. /**
  114. * Basic Content Filter
  115. *
  116. * Must be extended for any other filter, offers no functionality
  117. *
  118. * @author Ivo Janssen <ivo@codedealers.com>
  119. * @copyright Copyright (c) 2008, Ivo Janssen
  120. * @license http://opensource.org/licenses/gpl-3.0.html GNU General Public License, version 3
  121. * @package plant_core
  122. * @subpackage components
  123. * @version 1.0
  124. */
  125. abstract class FilterModel {
  126. // Only public method to filter content
  127. abstract public function filter($input);
  128. }
  129. /**
  130. * Add Paragraphs Filter
  131. *
  132. * Convert return characters to newlines, generate P paragraphs from double newlines
  133. * and BR breaks from single newlines.
  134. *
  135. * Use <kbd>addparagraphs</kbd> in the Filter::it() filter string to use.
  136. *
  137. * @author Ivo Janssen <ivo@codedealers.com>
  138. * @copyright Copyright (c) 2008, Ivo Janssen
  139. * @license http://opensource.org/licenses/gpl-3.0.html GNU General Public License, version 3
  140. * @package plant_core
  141. * @subpackage components
  142. * @version 1.1
  143. */
  144. class FilterAddparagraphs extends FilterModel {
  145. /**
  146. * Required filtering function
  147. * @param string $input String to filter
  148. * @return string String with paragraphs and breaks added
  149. */
  150. public function filter($input) {
  151. // Convert returns and combinations to newlines (\n)
  152. $input = preg_replace("/(\r\n|\n|\r)/", "\n", $input);
  153. // Build regex for element which should be kept free
  154. $elementsToKeepFree = array("address", "article", "aside", "blockquote", "canvas", "col", "colgroup", "datalist", "details", "div", "dl", "fieldset", "figure", "form", "h1", "h2", "h3", "h4", "h5", "h6", "hgroup", "iframe", "li", "nav", "ol", "p", "section", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "ul");
  155. $keepFreeRegex = "";
  156. $freeRegex = "";
  157. foreach ($elementsToKeepFree as $element) {
  158. if ($freeRegex) $freeRegex .= "|";
  159. if ($keepFreeRegex) $keepFreeRegex .= "|";
  160. $freeRegex .= "<" . $element . "(?:\s+[^>]*)?\s*/?>|</\s*" . $element . "\s*>";
  161. $keepFreeRegex .= "<" . $element . "(?:\s+[^>]*)?\s*>.*</\s*" . $element . "\s*>";
  162. }
  163. // Keep non-inline or sub elements free (if they aren't yet)
  164. $input = preg_replace("%(" . $keepFreeRegex . ")%s", "\n\n$1\n\n", $input);
  165. // Remove surplus newlines
  166. $input = preg_replace("/\n\n+/", "\n\n", $input);
  167. // Create paragraphs
  168. $input = preg_replace("/\n?(.+?)(\n\n|\z)/s", "<p>$1</p>\n", $input);
  169. // Remove paragraphs around non-inline or sub elements
  170. $input = preg_replace("%<p>\s*((?:" . $freeRegex . ").*?(?:" . $freeRegex . "))\s*</p>%s", "\$1", $input);
  171. // Add line breaks for newlines which do not follow non-inline or sub elements
  172. $elementsNotToBreak = array("blockquote", "canvas", "col", "colgroup", "dd", "dl", "dt", "figure", "h1", "h2", "h3", "h4", "h5", "h6", "hgroup", "iframe", "li", "ol", "p", "table", "tbody", "td", "tfoot", "thead", "th", "tr", "ul", "video");
  173. $breakRegex = "";
  174. foreach ($elementsNotToBreak as $element) {
  175. if ($breakRegex) $breakRegex .= "|";
  176. $breakRegex .= "<" . $element . ">|</" . $element . ">";
  177. }
  178. $input = preg_replace("%(?<!${breakRegex})\s*\n%i", "<br/>\n", $input);
  179. return $input;
  180. }
  181. }
  182. /**
  183. * Entity Conversion Filter
  184. *
  185. * Converts HTML entities back to their unicode equivalents
  186. *
  187. * Use <kbd>convertentities</kbd> in the Filter::it() filter string to use.
  188. *
  189. * @author Ivo Janssen <ivo@codedealers.com>
  190. * @copyright Copyright (c) 2008, Ivo Janssen
  191. * @license http://opensource.org/licenses/gpl-3.0.html GNU General Public License, version 3
  192. * @package plant_core
  193. * @subpackage components
  194. * @version 1.0
  195. */
  196. class FilterConvertentities extends FilterModel {
  197. /**
  198. * Required filtering function
  199. * @param string $input String to filter
  200. * @return string String with HTML entities converted back to UTF8
  201. */
  202. public function filter($input) {
  203. // Convert HTML entities back to their UTF-8 equivalents
  204. return html_entity_decode($input, ENT_QUOTES, "UTF-8");
  205. }
  206. }
  207. /**
  208. * Querystring->Array Filter
  209. *
  210. * Converts a querystring with <kbd>key:"value"</kbd> pairs and <kdb>"multiple word queries"</kbd>
  211. * to an easily digestible array of keys and values.
  212. *
  213. * Use <kbd>querystring</kbd> in the Filter::it() filter string to use.
  214. *
  215. * @author Ivo Janssen <ivo@codedealers.com>
  216. * @copyright Copyright (c) 2009, Ivo Janssen
  217. * @license http://opensource.org/licenses/gpl-3.0.html GNU General Public License, version 3
  218. * @package plant_core
  219. * @subpackage components
  220. * @version 1.0
  221. */
  222. class FilterQuerystring extends FilterModel {
  223. /**
  224. * Required filtering function
  225. * @param string $searchQuery Search query to process
  226. * @return array Array of key/value pairs according to query syntax
  227. */
  228. public function filter($searchQuery) {
  229. // Init search terms
  230. $searchTerms = array();
  231. // Find advanced parts of query
  232. $numAdvancedTerms = preg_match_all('/(^| )([a-z]+):"([^"]+)"($)?/i', trim($searchQuery), $advancedTerms);
  233. $searchQuery = trim(preg_replace('/(^| )([a-z]+):"([^"]+)"($)?/i', '', $searchQuery));
  234. if ($numAdvancedTerms) $searchTerms = $searchTerms + array_combine($advancedTerms[2], $advancedTerms[3]);
  235. // Find multiple word terms of a query
  236. $numMultiWordTerms = preg_match_all('/(^| )"([^"]+)"($)?/i', trim($searchQuery), $multipleWordTerms);
  237. $searchQuery = trim(preg_replace('/(^| )"([^"]+)"($)?/i', '', $searchQuery));
  238. if ($numMultiWordTerms) $searchTerms = array_merge($searchTerms, $multipleWordTerms[2]);
  239. // Add remaining terms
  240. if ($searchQuery) $searchTerms = array_merge($searchTerms, explode(" ", $searchQuery));
  241. return $searchTerms;
  242. }
  243. }
  244. /**
  245. * Paragraph Removal Filter
  246. *
  247. * Converts P tags back to double newlines, and BR tags back to single newlines
  248. *
  249. * Use <kbd>removeparagraphs</kbd> in the Filter::it() filter string to use.
  250. *
  251. * @author Ivo Janssen <ivo@codedealers.com>
  252. * @copyright Copyright (c) 2008, Ivo Janssen
  253. * @license http://opensource.org/licenses/gpl-3.0.html GNU General Public License, version 3
  254. * @package plant_core
  255. * @subpackage components
  256. * @version 1.0
  257. */
  258. class FilterRemoveparagraphs extends FilterModel {
  259. /**
  260. * Required filtering function
  261. * @param string $input String to filter
  262. * @return string String with paragraph and break HTML tags removed and converted to newlines
  263. */
  264. public function filter($input) {
  265. // Convert paragraphs and breaks back to newlines
  266. $input = preg_replace("%(<p>|</p>)%", "\n", $input);
  267. $input = preg_replace("%<br\s*/>%", "\r", $input);
  268. // Remove surplus newlines
  269. $input = preg_replace("/\n\n+/", "\n\n", $input);
  270. return trim($input);
  271. }
  272. }
  273. /**
  274. * URL-safe conversion Filter
  275. *
  276. * Takes a string and converts it to a 'clean' string for use in a URL
  277. *
  278. * Use <kbd>tourl</kbd> in the Filter::it() filter string to use.
  279. *
  280. * @author Ivo Janssen <ivo@codedealers.com>
  281. * @copyright Copyright (c) 2008, Ivo Janssen
  282. * @license http://opensource.org/licenses/gpl-3.0.html GNU General Public License, version 3
  283. * @package plant_core
  284. * @subpackage components
  285. * @version 1.0
  286. */
  287. class FilterToURL extends FilterModel {
  288. /**
  289. * Required filtering function
  290. * @param string $input String to filter
  291. * @return string String with accents converted to their alphanumeric equivalents and non-safe characters removed or converted
  292. * @uses removeAccents()
  293. */
  294. public function filter($input) {
  295. // 1. Convert accented characters to an unaccented version
  296. $input = $this->removeAccents($input);
  297. // 2. Remove tags and trim the string
  298. $input = strip_tags(strtolower(trim($input)));
  299. // 3. Convert space characters, slashes and underscores to hyphens
  300. $input = preg_replace("%(\s+|_|/|\\\\)%", "-", $input);
  301. // 4. Remove everything but lowercase letters and numbers
  302. $input = preg_replace("/[^a-z0-9-]+/", "", $input);
  303. // 5. Remove surplus hyphens
  304. $input = preg_replace("/-+/", "-", $input);
  305. return $input;
  306. }
  307. /**
  308. * Accent conversion function
  309. *
  310. * Takes most accented characters and converts them to their closest alphanumeric equivalent
  311. * @param string $input The string to convert
  312. * @return string String with accents removed
  313. */
  314. private function removeAccents($input) {
  315. // Table to convert accented characters to an alphabet equivalent
  316. $conversionTable = array(
  317. "a" => array("ŕ","á","â","ä","ă","ĺ","?","?","?","?","?"),
  318. "A" => array("Ŕ","Á","Â","Ä","Ă","Ĺ","?","?","?","?","?"),
  319. "ae" => array("ć","?"),
  320. "AE" => array("Ć","?"),
  321. "at" => array("@"),
  322. "c" => array("ç","?","?","?","?"),
  323. "C" => array("Ç","?","?","?","?"),
  324. "d" => array("?","?"),
  325. "D" => array("?"),
  326. "dh" => array("đ"),
  327. "DH" => array("Đ"),
  328. "e" => array("č","é","ę","ë","?","?","?","?","?"),
  329. "E" => array("Č","É","Ę","Ë","?","?","?","?","?","€"),
  330. "g" => array("?","?","?","?"),
  331. "G" => array("?","?","?","?"),
  332. "h" => array("?","?"),
  333. "H" => array("?","?"),
  334. "i" => array("ě","í","î","ď","?","?","?","?","?"),
  335. "I" => array("Ě","Í","Î","Ď","?","?","?","?","?"),
  336. "ij" => array("?"),
  337. "IJ" => array("?"),
  338. "j" => array("?"),
  339. "J" => array("?"),
  340. "k" => array("?","?"),
  341. "K" => array("?"),
  342. "l" => array("?","?","?","?","?"),
  343. "L" => array("?","?","?","?","?"),
  344. "n" => array("ń","?","?","?","?","?"),
  345. "N" => array("Ń","?","?","?","?"),
  346. "o" => array("ň","ó","ô","ö","ő","ř","?","?","?"),
  347. "O" => array("Ň","Ó","Ô","Ő","Ö","Ř","?","?","?"),
  348. "oe" => array("œ"),
  349. "OE" => array("Œ"),
  350. "r" => array("?","?","?"),
  351. "R" => array("?","?","?"),
  352. "s" => array("?","?","?","š"),
  353. "S" => array("?","?","?","Š"),
  354. "ss" => array("ß"),
  355. "t" => array("?","?","?"),
  356. "T" => array("?","?","?"),
  357. "th" => array("ţ"),
  358. "TH" => array("Ţ"),
  359. "u" => array("ů","ú","ű","ü","?","?","?","?","?","?"),
  360. "U" => array("Ů","Ú","Ű","Ü","?","?","?","?","?","?"),
  361. "w" => array("?"),
  362. "W" => array("?"),
  363. "y" => array("ý","˙","?"),
  364. "Y" => array("Ý","?","Ÿ"),
  365. "z" => array("?","?","ž"),
  366. "Z" => array("?","?","Ž"),
  367. );
  368. // Do a str_replace for every character
  369. foreach($conversionTable as $convertTo => $fromArray) {
  370. $input = str_replace($fromArray, $convertTo, $input);
  371. }
  372. return $input;
  373. }
  374. }
  375. /**
  376. * UTF8 conversion Filter
  377. *
  378. * Converts ASCII/ISO charsets to UTF-8
  379. *
  380. * Use <kbd>toutf8</kbd> in the Filter::it() filter string to use.
  381. *
  382. * @author Ivo Janssen <ivo@codedealers.com>
  383. * @copyright Copyright (c) 2008, Ivo Janssen
  384. * @license http://opensource.org/licenses/gpl-3.0.html GNU General Public License, version 3
  385. * @package plant_core
  386. * @subpackage components
  387. * @version 1.0
  388. */
  389. class FilterToUTF8 extends FilterModel {
  390. /**
  391. * Required filtering function
  392. * @param string $input String to filter
  393. * @return string UTF8 compatible string
  394. * @uses isUTF8()
  395. */
  396. public function filter($input) {
  397. // Encode to UTF-8 if it's not already
  398. if (!$this->isUTF8($input)) $input = utf8_encode($input);
  399. return $input;
  400. }
  401. /**
  402. * UTF8 Checking function
  403. *
  404. * Checks if a string is already UTF-8
  405. * @author bmorel <bmorel@ssi.fr>
  406. * @link http://us3.php.net/manual/en/function.utf8-encode.php#39986 PHP Function Manual for utf8_encode()
  407. * @param string $input String to check
  408. * @return bool
  409. */
  410. private function isUTF8($input) {
  411. for ($i=0; $i < strlen($input); $i++) {
  412. if (ord($input[$i]) < 0x80) continue; # 0bbbbbbb
  413. elseif ((ord($input[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb
  414. elseif ((ord($input[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb
  415. elseif ((ord($input[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb
  416. elseif ((ord($input[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb
  417. elseif ((ord($input[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b
  418. else return false; # Does not match any model
  419. for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
  420. if ((++$i == strlen($input)) || ((ord($input[$i]) & 0xC0) != 0x80))
  421. return false;
  422. }
  423. }
  424. return true;
  425. }
  426. }
  427. /**
  428. * XML Entity Filter
  429. *
  430. * Convert certain characters to their XML entities
  431. *
  432. * Use <kbd>xmlentities</kbd> in the Filter::it() filter string to use.
  433. *
  434. * @author Ivo Janssen <ivo@codedealers.com>
  435. * @copyright Copyright (c) 2008, Ivo Janssen
  436. * @license http://opensource.org/licenses/gpl-3.0.html GNU General Public License, version 3
  437. * @package plant_core
  438. * @subpackage components
  439. * @version 1.0
  440. */
  441. class FilterXMLEntities extends FilterModel {
  442. /**
  443. * Required filtering function
  444. * @param string $input String to filter
  445. * @return string XML safe string
  446. */
  447. public function filter($input) {
  448. // Convert certain characters
  449. return str_replace(array("&","\"","'","<",">","'"), array("&amp;","&quot;","&#39;","&lt;","&gt;","&apos;"), $input);
  450. }
  451. }
  452. ?>