/system/helpers/typography_helper.php

https://github.com/xupisco/Xare-a-Link · PHP · 540 lines · 336 code · 62 blank · 142 comment · 36 complexity · 76905da6a71c75b7fbb086e815104b53 MD5 · raw file

  1. <?php if (!defined('BASEPATH')) exit('No direct script access allowed');
  2. /**
  3. * CodeIgniter
  4. *
  5. * An open source application development framework for PHP 4.3.2 or newer
  6. *
  7. * @package CodeIgniter
  8. * @author Rick Ellis
  9. * @copyright Copyright (c) 2006, EllisLab, Inc.
  10. * @license http://www.codeignitor.com/user_guide/license.html
  11. * @link http://www.codeigniter.com
  12. * @since Version 1.0
  13. * @filesource
  14. */
  15. // ------------------------------------------------------------------------
  16. /**
  17. * CodeIgniter Typography Helpers
  18. *
  19. * @package CodeIgniter
  20. * @subpackage Helpers
  21. * @category Helpers
  22. * @author Rick Ellis
  23. * @link http://www.codeigniter.com/user_guide/helpers/typography_helper.html
  24. */
  25. // ------------------------------------------------------------------------
  26. /**
  27. * Convert newlines to HTML line breaks except within PRE tags
  28. *
  29. * @access public
  30. * @param string
  31. * @return string
  32. */
  33. function nl2br_except_pre($str)
  34. {
  35. $ex = explode("pre>",$str);
  36. $ct = count($ex);
  37. $newstr = "";
  38. for ($i = 0; $i < $ct; $i++)
  39. {
  40. if (($i % 2) == 0)
  41. {
  42. $newstr .= nl2br($ex[$i]);
  43. }
  44. else
  45. {
  46. $newstr .= $ex[$i];
  47. }
  48. if ($ct - 1 != $i)
  49. $newstr .= "pre>";
  50. }
  51. return $newstr;
  52. }
  53. // ------------------------------------------------------------------------
  54. /**
  55. * Auto Typography Wrapper Function
  56. *
  57. *
  58. * @access public
  59. * @param string
  60. * @return string
  61. */
  62. function auto_typography($str)
  63. {
  64. $TYPE = new Auto_typography();
  65. return $TYPE->convert($str);
  66. }
  67. // ------------------------------------------------------------------------
  68. /**
  69. * Auto Typography Class
  70. *
  71. *
  72. * @access private
  73. * @category Helpers
  74. * @author Rick Ellis
  75. * @author Paul Burdick
  76. * @link http://www.codeigniter.com/user_guide/helpers/
  77. */
  78. class Auto_typography {
  79. // Block level elements that should not be wrapped inside <p> tags
  80. var $block_elements = 'div|blockquote|pre|code|h\d|script|ol|un';
  81. // Elements that should not have <p> and <br /> tags within them.
  82. var $skip_elements = 'pre|ol|ul';
  83. // Tags we want the parser to completely ignore when splitting the string.
  84. var $ignore_elements = 'a|b|i|em|strong|span|img|li';
  85. /**
  86. * Main Processing Function
  87. *
  88. */
  89. function convert($str)
  90. {
  91. if ($str == '')
  92. {
  93. return '';
  94. }
  95. $str = ' '.$str.' ';
  96. // Standardize Newlines to make matching easier
  97. $str = preg_replace("/(\r\n|\r)/", "\n", $str);
  98. /*
  99. * Reduce line breaks
  100. *
  101. * If there are more than two consecutive line
  102. * breaks we'll compress them down to a maximum
  103. * of two since there's no benefit to more.
  104. *
  105. */
  106. $str = preg_replace("/\n\n+/", "\n\n", $str);
  107. /*
  108. * Convert quotes within tags to temporary marker
  109. *
  110. * We don't want quotes converted within
  111. * tags so we'll temporarily convert them to
  112. * {@DQ} and {@SQ}
  113. *
  114. */
  115. if (preg_match_all("#\<.+?>#si", $str, $matches))
  116. {
  117. for ($i = 0; $i < count($matches['0']); $i++)
  118. {
  119. $str = str_replace($matches['0'][$i],
  120. str_replace(array("'",'"'), array('{@SQ}', '{@DQ}'), $matches['0'][$i]),
  121. $str);
  122. }
  123. }
  124. /*
  125. * Add closing/opening paragraph tags before/after "block" elements
  126. *
  127. * Since block elements (like <blockquotes>, <pre>, etc.) do not get
  128. * wrapped in paragraph tags we will add a closing </p> tag just before
  129. * each block element starts and an opening <p> tag right after the block element
  130. * ends. Later on we'll do some further clean up.
  131. *
  132. */
  133. $str = preg_replace("#(<.*?)(".$this->block_elements.")(.*?>)#", "</p>\\1\\2\\3", $str);
  134. $str = preg_replace("#(</.*?)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3<p>", $str);
  135. /*
  136. * Convert "ignore" tags to temporary marker
  137. *
  138. * The parser splits out the string at every tag
  139. * it encounters. Certain inline tags, like image
  140. * tags, links, span tags, etc. will be adversely
  141. * affected if they are split out so we'll convert
  142. * the opening < temporarily to: {@TAG}
  143. *
  144. */
  145. $str = preg_replace("#<(/*)(".$this->ignore_elements.")#i", "{@TAG}\\1\\2", $str);
  146. /*
  147. * Split the string at every tag
  148. *
  149. * This creates an array with this prototype:
  150. *
  151. * [array]
  152. * {
  153. * [0] = <opening tag>
  154. * [1] = Content contained between the tags
  155. * [2] = <closing tag>
  156. * Etc...
  157. * }
  158. *
  159. */
  160. $chunks = preg_split('/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY);
  161. /*
  162. * Build our finalized string
  163. *
  164. * We'll cycle through the array, skipping tags,
  165. * and processing the contained text
  166. *
  167. */
  168. $str = '';
  169. $process = TRUE;
  170. foreach ($chunks as $chunk)
  171. {
  172. /*
  173. * Are we dealing with a tag?
  174. *
  175. * If so, we'll skip the processing for this cycle.
  176. * Well also set the "process" flag which allows us
  177. * to skip <pre> tags and a few other things.
  178. *
  179. */
  180. if (preg_match("#<(/*)(".$this->block_elements.").*?\>#", $chunk, $match))
  181. {
  182. if (preg_match("#".$this->skip_elements."#", $match['2']))
  183. {
  184. $process = ($match['1'] == '/') ? TRUE : FALSE;
  185. }
  186. $str .= $chunk;
  187. continue;
  188. }
  189. if ($process == FALSE)
  190. {
  191. $str .= $chunk;
  192. continue;
  193. }
  194. // Convert Newlines into <p> and <br /> tags
  195. $str .= $this->format_newlines($chunk);
  196. }
  197. // FINAL CLEAN UP
  198. // IMPORTANT: DO NOT ALTER THE ORDER OF THE ITEMS BELOW!
  199. /*
  200. * Clean up paragraph tags before/after "block" elements
  201. *
  202. * Earlier we added <p></p> tags before/after block level elements.
  203. * Then, we added paragraph tags around double line breaks. This
  204. * potentially created incorrectly formatted paragraphs so we'll
  205. * clean it up here.
  206. *
  207. */
  208. $str = preg_replace("#<p>({@TAG}.*?)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3", $str);
  209. $str = preg_replace("#({@TAG}/.*?)(".$this->block_elements.")(.*?>)</p>#", "\\1\\2\\3", $str);
  210. // Convert Quotes and other characters
  211. $str = $this->format_characters($str);
  212. // Fix an artifact that happens during the paragraph replacement
  213. $str = preg_replace('#(<p>\n*</p>)#', '', $str);
  214. // If the user submitted their own paragraph tags with class data
  215. // in them we will retain them instead of using our tags.
  216. $str = preg_replace('#(<p.*?>)<p>#', "\\1", $str);
  217. // Final clean up
  218. $str = str_replace(
  219. array(
  220. '</p></p>',
  221. '</p><p>',
  222. '<p> ',
  223. ' </p>',
  224. '{@TAG}',
  225. '{@DQ}',
  226. '{@SQ}',
  227. '<p></p>'
  228. ),
  229. array(
  230. '</p>',
  231. '<p>',
  232. '<p>',
  233. '</p>',
  234. '<',
  235. '"',
  236. "'",
  237. ''
  238. ),
  239. $str
  240. );
  241. return $str;
  242. }
  243. // --------------------------------------------------------------------
  244. /**
  245. * Format Characters
  246. *
  247. * This function mainly converts double and single quotes
  248. * to entities, but since these are directional, it does
  249. * it based on some rules. It also converts em-dashes
  250. * and a couple other things.
  251. */
  252. function format_characters($str)
  253. {
  254. $table = array(
  255. ' "' => " &#8220;",
  256. '" ' => "&#8221; ",
  257. " '" => " &#8216;",
  258. "' " => "&#8217; ",
  259. '>"' => ">&#8220;",
  260. '"<' => "&#8221;<",
  261. ">'" => ">&#8216;",
  262. "'<" => "&#8217;<",
  263. "\"." => "&#8221;.",
  264. "\"," => "&#8221;,",
  265. "\";" => "&#8221;;",
  266. "\":" => "&#8221;:",
  267. "\"!" => "&#8221;!",
  268. "\"?" => "&#8221;?",
  269. ". " => ".&nbsp; ",
  270. "? " => "?&nbsp; ",
  271. "! " => "!&nbsp; ",
  272. ": " => ":&nbsp; ",
  273. );
  274. // These deal with quotes within quotes, like: "'hi here'"
  275. $start = 0;
  276. $space = array("\n", "\t", " ");
  277. while(TRUE)
  278. {
  279. $current = strpos(substr($str, $start), "\"'");
  280. if ($current === FALSE) break;
  281. $one_before = substr($str, $start+$current-1, 1);
  282. $one_after = substr($str, $start+$current+2, 1);
  283. if ( ! in_array($one_after, $space, TRUE) && $one_after != "<")
  284. {
  285. $str = str_replace( $one_before."\"'".$one_after,
  286. $one_before."&#8220;&#8216;".$one_after,
  287. $str);
  288. }
  289. elseif ( ! in_array($one_before, $space, TRUE) && (in_array($one_after, $space, TRUE) OR $one_after == '<'))
  290. {
  291. $str = str_replace( $one_before."\"'".$one_after,
  292. $one_before."&#8221;&#8217;".$one_after,
  293. $str);
  294. }
  295. $start = $start+$current+2;
  296. }
  297. $start = 0;
  298. while(TRUE)
  299. {
  300. $current = strpos(substr($str, $start), "'\"");
  301. if ($current === FALSE) break;
  302. $one_before = substr($str, $start+$current-1, 1);
  303. $one_after = substr($str, $start+$current+2, 1);
  304. if ( in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE) && $one_after != "<")
  305. {
  306. $str = str_replace( $one_before."'\"".$one_after,
  307. $one_before."&#8216;&#8220;".$one_after,
  308. $str);
  309. }
  310. elseif ( ! in_array($one_before, $space, TRUE) && $one_before != ">")
  311. {
  312. $str = str_replace( $one_before."'\"".$one_after,
  313. $one_before."&#8217;&#8221;".$one_after,
  314. $str);
  315. }
  316. $start = $start+$current+2;
  317. }
  318. // Are there quotes within a word, as in: ("something")
  319. if (preg_match_all("/(.)\"(\S+?)\"(.)/", $str, $matches))
  320. {
  321. for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
  322. {
  323. if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
  324. {
  325. $str = str_replace( $matches['0'][$i],
  326. $matches['1'][$i]."&#8220;".$matches['2'][$i]."&#8221;".$matches['3'][$i],
  327. $str);
  328. }
  329. }
  330. }
  331. if (preg_match_all("/(.)\'(\S+?)\'(.)/", $str, $matches))
  332. {
  333. for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
  334. {
  335. if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
  336. {
  337. $str = str_replace( $matches['0'][$i],
  338. $matches['1'][$i]."&#8216;".$matches['2'][$i]."&#8217;".$matches['3'][$i],
  339. $str);
  340. }
  341. }
  342. }
  343. // How about one apostrophe, as in Rick's
  344. $start = 0;
  345. while(TRUE)
  346. {
  347. $current = strpos(substr($str, $start), "'");
  348. if ($current === FALSE) break;
  349. $one_before = substr($str, $start+$current-1, 1);
  350. $one_after = substr($str, $start+$current+1, 1);
  351. if ( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
  352. {
  353. $str = str_replace( $one_before."'".$one_after,
  354. $one_before."&#8217;".$one_after,
  355. $str);
  356. }
  357. $start = $start+$current+2;
  358. }
  359. // Em-dashes
  360. $start = 0;
  361. while(TRUE)
  362. {
  363. $current = strpos(substr($str, $start), "--");
  364. if ($current === FALSE) break;
  365. $one_before = substr($str, $start+$current-1, 1);
  366. $one_after = substr($str, $start+$current+2, 1);
  367. $two_before = substr($str, $start+$current-2, 1);
  368. $two_after = substr($str, $start+$current+3, 1);
  369. if (( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
  370. OR
  371. ( ! in_array($two_before, $space, TRUE) && ! in_array($two_after, $space, TRUE) && $one_before == ' ' && $one_after == ' ')
  372. )
  373. {
  374. $str = str_replace( $two_before.$one_before."--".$one_after.$two_after,
  375. $two_before.trim($one_before)."&#8212;".trim($one_after).$two_after,
  376. $str);
  377. }
  378. $start = $start+$current+2;
  379. }
  380. // Ellipsis
  381. $str = preg_replace("#(\w)\.\.\.(\s|<br />|</p>)#", "\\1&#8230;\\2", $str);
  382. $str = preg_replace("#(\s|<br />|</p>)\.\.\.(\w)#", "\\1&#8230;\\2", $str);
  383. // Run the translation array we defined above
  384. $str = str_replace(array_keys($table), array_values($table), $str);
  385. // If there are any stray double quotes we'll catch them here
  386. $start = 0;
  387. while(TRUE)
  388. {
  389. $current = strpos(substr($str, $start), '"');
  390. if ($current === FALSE) break;
  391. $one_before = substr($str, $start+$current-1, 1);
  392. $one_after = substr($str, $start+$current+1, 1);
  393. if ( ! in_array($one_after, $space, TRUE))
  394. {
  395. $str = str_replace( $one_before.'"'.$one_after,
  396. $one_before."&#8220;".$one_after,
  397. $str);
  398. }
  399. elseif( ! in_array($one_before, $space, TRUE))
  400. {
  401. $str = str_replace( $one_before."'".$one_after,
  402. $one_before."&#8221;".$one_after,
  403. $str);
  404. }
  405. $start = $start+$current+2;
  406. }
  407. $start = 0;
  408. while(TRUE)
  409. {
  410. $current = strpos(substr($str, $start), "'");
  411. if ($current === FALSE) break;
  412. $one_before = substr($str, $start+$current-1, 1);
  413. $one_after = substr($str, $start+$current+1, 1);
  414. if ( ! in_array($one_after, $space, TRUE))
  415. {
  416. $str = str_replace( $one_before."'".$one_after,
  417. $one_before."&#8216;".$one_after,
  418. $str);
  419. }
  420. elseif( ! in_array($one_before, $space, TRUE))
  421. {
  422. $str = str_replace( $one_before."'".$one_after,
  423. $one_before."&#8217;".$one_after,
  424. $str);
  425. }
  426. $start = $start+$current+2;
  427. }
  428. return $str;
  429. }
  430. // --------------------------------------------------------------------
  431. /**
  432. * Format Newlines
  433. *
  434. * Converts newline characters into either <p> tags or <br />
  435. *
  436. */
  437. function format_newlines($str)
  438. {
  439. if ($str == '')
  440. {
  441. return $str;
  442. }
  443. if (strpos($str, "\n") === FALSE)
  444. {
  445. return '<p>'.$str.'</p>';
  446. }
  447. $str = str_replace("\n\n", "</p>\n\n<p>", $str);
  448. $str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
  449. return '<p>'.$str.'</p>';
  450. }
  451. }
  452. ?>