PageRenderTime 1306ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/carl_util/tidy/tidy.php

https://github.com/luthercollege/reason_package
PHP | 297 lines | 264 code | 8 blank | 25 comment | 8 complexity | 6655b3ed94ac9ee640a35e7be860dfbc MD5 | raw file
  1. <?php
  2. /**
  3. * Tidy HTML
  4. *
  5. * tidy function converts HTML to XHTML in a sort of round-a-bout way
  6. * uses the w3c's tidy program and some command line and string manipulation trickery
  7. * to produce valid XHTML from HTML
  8. *
  9. * @author dave hendler
  10. * @author nate white
  11. * @package carl_util
  12. * @subpackage tidy
  13. */
  14. /**
  15. * Turn a string or array into valid, standards-compliant (x)HTML
  16. *
  17. * Uses configuraton options in tidy.conf - which should minimally have show-body-only set to yes
  18. *
  19. * @param mixed $text The data to be tidied up
  20. * @return mixed $result Tidied data
  21. */
  22. function tidy( $text )
  23. {
  24. static $tidy_funcs;
  25. static $tidy_conf;
  26. if (!isset($tidy_conf)) $tidy_conf = SETTINGS_INC . 'tidy.conf';
  27. if(is_array($text))
  28. {
  29. $result = array();
  30. foreach(array_keys($text) as $key)
  31. {
  32. $result[$key] = tidy($text[$key]);
  33. }
  34. return $result;
  35. }
  36. // determine what tidy libraries are available
  37. if (empty($tidy_funcs)) $tidy_funcs = get_extension_funcs('tidy');
  38. $tidy_1_lib_available = (!empty($tidy_funcs)) && (array_search('tidy_setopt', $tidy_funcs) !== false);
  39. $tidy_2_lib_available = (!empty($tidy_funcs)) && (array_search('tidy_setopt', $tidy_funcs) === false);
  40. $tidy_command_line_available = (TIDY_EXE) ? file_exists(TIDY_EXE) : false;
  41. $text = protect_string_from_tidy( $text );
  42. $text = '<html><body>'.$text.'</body></html>';
  43. if ($tidy_2_lib_available) // Run tidy for PHP 5
  44. {
  45. $tidy = new tidy();
  46. $tidy->parseString($text, $tidy_conf, 'utf8');
  47. $tidy->cleanRepair();
  48. $result = $tidy;
  49. }
  50. elseif ($tidy_1_lib_available) // Run tidy for PHP 4
  51. {
  52. tidy_load_config($tidy_conf);
  53. tidy_set_encoding('utf8');
  54. tidy_parse_string($text);
  55. tidy_clean_repair();
  56. $result = tidy_get_output();
  57. }
  58. elseif ($tidy_command_line_available) // attempt to run COMMAND LINE tidy
  59. {
  60. $arg = escapeshellarg( $text ); // escape the bad stuff in the text
  61. $cmd = 'echo '.$arg.' | '.TIDY_EXE.' -q -config '.$tidy_conf.' 2> /dev/null'; // the actual command - pipes the input to tidy which diverts its output to the random file
  62. $result = shell_exec($cmd); // execute the command
  63. }
  64. else
  65. {
  66. trigger_error('tidy does not appear to be available within php or at the command line - no tidying is taking place.');
  67. $result = $text;
  68. }
  69. return trim($result);
  70. }
  71. /**
  72. * See where this is used and provide better error handling for tidylib in php 4 and 5
  73. */
  74. function tidy_err( $text )
  75. {
  76. static $tidy_conf;
  77. if (!isset($tidy_conf)) $tidy_conf = SETTINGS_INC . 'tidy.conf';
  78. $arg = escapeshellarg( $text );
  79. $err = shell_exec( 'echo '.$arg.' | '.TIDY_EXE.' -q -config '.$tidy_conf.' 2>&1' );
  80. $err = explode( "\n", $err );
  81. $errors = array();
  82. foreach( $err AS $line )
  83. {
  84. // look for both type and value inequality
  85. if( strstr( $line, 'Error:' ) !== false )
  86. $errors[] = $line;
  87. }
  88. return implode("\n",$errors);
  89. }
  90. function protect_string_from_tidy( $str )
  91. {
  92. $utf_entity_trans = array(
  93. '&nbsp;' => chr('194').chr('160'),
  94. '&#160;' => chr('194').chr('160'),
  95. '&iexcl;' => chr('194').chr('161'),
  96. '&#161;' => chr('194').chr('161'),
  97. '&cent;' => chr('194').chr('162'),
  98. '&#162;' => chr('194').chr('162'),
  99. '&pound;' => chr('194').chr('163'),
  100. '&#163;' => chr('194').chr('163'),
  101. '&curren;' => chr('194').chr('164'),
  102. '&#164;' => chr('194').chr('164'),
  103. '&yen;' => chr('194').chr('165'),
  104. '&#165;' => chr('194').chr('165'),
  105. '&brvbar;' => chr('194').chr('166'),
  106. '&#166;' => chr('194').chr('166'),
  107. '&sect;' => chr('194').chr('167'),
  108. '&#167;' => chr('194').chr('167'),
  109. '&uml;' => chr('194').chr('168'),
  110. '&#168;' => chr('194').chr('168'),
  111. '&copy;' => chr('194').chr('169'),
  112. '&#169;' => chr('194').chr('169'),
  113. '&ordf;' => chr('194').chr('170'),
  114. '&#170;' => chr('194').chr('170'),
  115. '&laquo;' => chr('194').chr('171'),
  116. '&#171;' => chr('194').chr('171'),
  117. '&not;' => chr('194').chr('172'),
  118. '&#172;' => chr('194').chr('172'),
  119. '&shy;' => chr('194').chr('173'),
  120. '&#173;' => chr('194').chr('173'),
  121. '&reg;' => chr('194').chr('174'),
  122. '&#174;' => chr('194').chr('174'),
  123. '&macr;' => chr('194').chr('175'),
  124. '&#175;' => chr('194').chr('175'),
  125. '&deg;' => chr('194').chr('176'),
  126. '&#176;' => chr('194').chr('176'),
  127. '&plusmn;' => chr('194').chr('177'),
  128. '&#177;' => chr('194').chr('177'),
  129. '&sup2;' => chr('194').chr('178'),
  130. '&#178;' => chr('194').chr('178'),
  131. '&sup3;' => chr('194').chr('179'),
  132. '&#179;' => chr('194').chr('179'),
  133. '&acute;' => chr('194').chr('180'),
  134. '&#180;' => chr('194').chr('180'),
  135. '&micro;' => chr('194').chr('181'),
  136. '&#181;' => chr('194').chr('181'),
  137. '&para;' => chr('194').chr('182'),
  138. '&#182;' => chr('194').chr('182'),
  139. '&middot;' => chr('194').chr('183'),
  140. '&#183;' => chr('194').chr('183'),
  141. '&cedil;' => chr('194').chr('184'),
  142. '&#184;' => chr('194').chr('184'),
  143. '&sup1;' => chr('194').chr('185'),
  144. '&#185;' => chr('194').chr('185'),
  145. '&ordm;' => chr('194').chr('186'),
  146. '&#186;' => chr('194').chr('186'),
  147. '&raquo;' => chr('194').chr('187'),
  148. '&#187;' => chr('194').chr('187'),
  149. '&frac14;' => chr('194').chr('188'),
  150. '&#188;' => chr('194').chr('188'),
  151. '&frac12;' => chr('194').chr('189'),
  152. '&#189;' => chr('194').chr('189'),
  153. '&frac34;' => chr('194').chr('190'),
  154. '&#190;' => chr('194').chr('190'),
  155. '&iquest;' => chr('194').chr('191'),
  156. '&#191;' => chr('194').chr('191'),
  157. '&Agrave;' => chr('195').chr('128'),
  158. '&#192;' => chr('195').chr('128'),
  159. '&Aacute;' => chr('195').chr('129'),
  160. '&#193;' => chr('195').chr('129'),
  161. '&Acirc;' => chr('195').chr('130'),
  162. '&#194;' => chr('195').chr('130'),
  163. '&Atilde;' => chr('195').chr('131'),
  164. '&#195;' => chr('195').chr('131'),
  165. '&Auml;' => chr('195').chr('132'),
  166. '&#196;' => chr('195').chr('132'),
  167. '&Aring;' => chr('195').chr('133'),
  168. '&#197;' => chr('195').chr('133'),
  169. '&AElig;' => chr('195').chr('134'),
  170. '&#198;' => chr('195').chr('134'),
  171. '&Ccedil;' => chr('195').chr('135'),
  172. '&#199;' => chr('195').chr('135'),
  173. '&Egrave;' => chr('195').chr('136'),
  174. '&#200;' => chr('195').chr('136'),
  175. '&Eacute;' => chr('195').chr('137'),
  176. '&#201;' => chr('195').chr('137'),
  177. '&Ecirc;' => chr('195').chr('138'),
  178. '&#202;' => chr('195').chr('138'),
  179. '&Euml;' => chr('195').chr('139'),
  180. '&#203;' => chr('195').chr('139'),
  181. '&Igrave;' => chr('195').chr('140'),
  182. '&#204;' => chr('195').chr('140'),
  183. '&Iacute;' => chr('195').chr('141'),
  184. '&#205;' => chr('195').chr('141'),
  185. '&Icirc;' => chr('195').chr('142'),
  186. '&#206;' => chr('195').chr('142'),
  187. '&Iuml;' => chr('195').chr('143'),
  188. '&#207;' => chr('195').chr('143'),
  189. '&ETH;' => chr('195').chr('144'),
  190. '&#208;' => chr('195').chr('144'),
  191. '&Ntilde;' => chr('195').chr('145'),
  192. '&#209;' => chr('195').chr('145'),
  193. '&Ograve;' => chr('195').chr('146'),
  194. '&#210;' => chr('195').chr('146'),
  195. '&Oacute;' => chr('195').chr('147'),
  196. '&#211;' => chr('195').chr('147'),
  197. '&Ocirc;' => chr('195').chr('148'),
  198. '&#212;' => chr('195').chr('148'),
  199. '&Otilde;' => chr('195').chr('149'),
  200. '&#213;' => chr('195').chr('149'),
  201. '&Ouml;' => chr('195').chr('150'),
  202. '&#214;' => chr('195').chr('150'),
  203. '&times;' => chr('195').chr('151'),
  204. '&#215;' => chr('195').chr('151'),
  205. '&Oslash;' => chr('195').chr('152'),
  206. '&#216;' => chr('195').chr('152'),
  207. '&Ugrave;' => chr('195').chr('153'),
  208. '&#217;' => chr('195').chr('153'),
  209. '&Uacute;' => chr('195').chr('154'),
  210. '&#218;' => chr('195').chr('154'),
  211. '&Ucirc;' => chr('195').chr('155'),
  212. '&#219;' => chr('195').chr('155'),
  213. '&Uuml;' => chr('195').chr('156'),
  214. '&#220;' => chr('195').chr('156'),
  215. '&Yacute;' => chr('195').chr('157'),
  216. '&#221;' => chr('195').chr('157'),
  217. '&THORN;' => chr('195').chr('158'),
  218. '&#222;' => chr('195').chr('158'),
  219. '&szlig;' => chr('195').chr('159'),
  220. '&#223;' => chr('195').chr('159'),
  221. '&agrave;' => chr('195').chr('160'),
  222. '&#224;' => chr('195').chr('160'),
  223. '&aacute;' => chr('195').chr('161'),
  224. '&#225;' => chr('195').chr('161'),
  225. '&acirc;' => chr('195').chr('162'),
  226. '&#226;' => chr('195').chr('162'),
  227. '&atilde;' => chr('195').chr('163'),
  228. '&#227;' => chr('195').chr('163'),
  229. '&auml;' => chr('195').chr('164'),
  230. '&#228;' => chr('195').chr('164'),
  231. '&aring;' => chr('195').chr('165'),
  232. '&#229;' => chr('195').chr('165'),
  233. '&aelig;' => chr('195').chr('166'),
  234. '&#230;' => chr('195').chr('166'),
  235. '&ccedil;' => chr('195').chr('167'),
  236. '&#231;' => chr('195').chr('167'),
  237. '&egrave;' => chr('195').chr('168'),
  238. '&#232;' => chr('195').chr('168'),
  239. '&eacute;' => chr('195').chr('169'),
  240. '&#233;' => chr('195').chr('169'),
  241. '&ecirc;' => chr('195').chr('170'),
  242. '&#234;' => chr('195').chr('170'),
  243. '&euml;' => chr('195').chr('171'),
  244. '&#235;' => chr('195').chr('171'),
  245. '&igrave;' => chr('195').chr('172'),
  246. '&#236;' => chr('195').chr('172'),
  247. '&iacute;' => chr('195').chr('173'),
  248. '&#237;' => chr('195').chr('173'),
  249. '&icirc;' => chr('195').chr('174'),
  250. '&#238;' => chr('195').chr('174'),
  251. '&iuml;' => chr('195').chr('175'),
  252. '&#239;' => chr('195').chr('175'),
  253. '&eth;' => chr('195').chr('176'),
  254. '&#240;' => chr('195').chr('176'),
  255. '&ntilde;' => chr('195').chr('177'),
  256. '&#241;' => chr('195').chr('177'),
  257. '&ograve;' => chr('195').chr('178'),
  258. '&#242;' => chr('195').chr('178'),
  259. '&oacute;' => chr('195').chr('179'),
  260. '&#243;' => chr('195').chr('179'),
  261. '&ocirc;' => chr('195').chr('180'),
  262. '&#244;' => chr('195').chr('180'),
  263. '&otilde;' => chr('195').chr('181'),
  264. '&#245;' => chr('195').chr('181'),
  265. '&ouml;' => chr('195').chr('182'),
  266. '&#246;' => chr('195').chr('182'),
  267. '&divide;' => chr('195').chr('183'),
  268. '&#247;' => chr('195').chr('183'),
  269. '&oslash;' => chr('195').chr('184'),
  270. '&#248;' => chr('195').chr('184'),
  271. '&ugrave;' => chr('195').chr('185'),
  272. '&#249;' => chr('195').chr('185'),
  273. '&uacute;' => chr('195').chr('186'),
  274. '&#250;' => chr('195').chr('186'),
  275. '&ucirc;' => chr('195').chr('187'),
  276. '&#251;' => chr('195').chr('187'),
  277. '&uuml;' => chr('195').chr('188'),
  278. '&#252;' => chr('195').chr('188'),
  279. '&yacute;' => chr('195').chr('189'),
  280. '&#253;' => chr('195').chr('189'),
  281. '&thorn;' => chr('195').chr('190'),
  282. '&#254;' => chr('195').chr('190'),
  283. '&yuml;' => chr('195').chr('191'),
  284. '&#255;' => chr('195').chr('191'),
  285. );
  286. $str = str_replace( array_keys( $utf_entity_trans ), $utf_entity_trans, $str );
  287. return $str;
  288. }
  289. ?>