PageRenderTime 55ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/system/core/core.typography.php

https://github.com/danboy/Croissierd
PHP | 1727 lines | 1022 code | 320 blank | 385 comment | 177 complexity | 154f0245ed9ea4b81e126a04df690814 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. /*
  3. =====================================================
  4. ExpressionEngine - by EllisLab
  5. -----------------------------------------------------
  6. http://expressionengine.com/
  7. -----------------------------------------------------
  8. Copyright (c) 2003 - 2010 EllisLab, Inc.
  9. =====================================================
  10. THIS IS COPYRIGHTED SOFTWARE
  11. PLEASE READ THE LICENSE AGREEMENT
  12. http://expressionengine.com/docs/license.html
  13. =====================================================
  14. File: core.typography.php
  15. -----------------------------------------------------
  16. Purpose: Typographic rendering class
  17. =====================================================
  18. */
  19. if ( ! defined('EXT'))
  20. {
  21. exit('Invalid file request');
  22. }
  23. class Typography {
  24. var $single_line_pgfs = TRUE; // Whether to treat single lines as paragraphs in auto-xhtml
  25. var $text_format = 'xhtml'; // xhtml, br, none, or lite
  26. var $html_format = 'safe'; // safe, all, none
  27. var $auto_links = 'y';
  28. var $allow_img_url = 'n';
  29. var $parse_images = FALSE;
  30. var $encode_email = TRUE;
  31. var $encode_type = 'javascript'; // javascript or noscript
  32. var $use_span_tags = TRUE;
  33. var $popup_links = FALSE;
  34. var $bounce = '';
  35. var $smiley_array = FALSE;
  36. var $parse_smileys = TRUE;
  37. var $highlight_code = TRUE;
  38. var $convert_curly = TRUE; // Convert Curly Brackets Into Entities
  39. var $emoticon_path = '';
  40. var $site_index = '';
  41. var $word_censor = FALSE;
  42. var $censored_words = array();
  43. var $censored_replace = '';
  44. var $file_paths = array();
  45. var $text_fmt_types = array('xhtml', 'br', 'none', 'lite');
  46. var $text_fmt_plugins = array();
  47. var $html_fmt_types = array('safe', 'all', 'none');
  48. var $yes_no_syntax = array('y', 'n');
  49. var $code_chunks = array();
  50. var $code_counter = 0;
  51. var $http_hidden = 'ed9f01a60cc1ac21bf6f1684e5a3be23f38a51b9'; // hash to protect URLs in [url] pMcode
  52. // Block level elements that should not be wrapped inside <p> tags
  53. var $block_elements = 'address|blockquote|div|dl|fieldset|form|h\d|hr|noscript|object|ol|p|pre|script|table|ul';
  54. // Elements that should not have <p> and <br /> tags within them.
  55. var $skip_elements = 'p|pre|ol|ul|dl|object|table|h\d';
  56. // Tags we want the parser to completely ignore when splitting the string.
  57. var $inline_elements = 'a|abbr|acronym|b|bdo|big|br|button|cite|code|del|dfn|em|i|img|ins|input|label|map|kbd|q|samp|select|small|span|strong|sub|sup|textarea|tt|var';
  58. // array of block level elements that require inner content to be within another block level element
  59. var $inner_block_required = array('blockquote');
  60. // the last block element parsed
  61. var $last_block_element = '';
  62. // whether or not to protect quotes within { curly braces }
  63. var $protect_braced_quotes = FALSE;
  64. /** -------------------------------------
  65. /** Allowed tags
  66. /** -------------------------------------*/
  67. // Note: The decoding array is associative, allowing more precise mapping
  68. var $safe_encode = array('b', 'i', 'em', 'del', 'ins', 'strong', 'pre', 'code', 'blockquote', 'abbr');
  69. var $safe_decode = array(
  70. 'b' => 'b',
  71. 'i' => 'i',
  72. 'em' => 'em',
  73. 'del' => 'del',
  74. 'ins' => 'ins',
  75. 'strong' => 'strong',
  76. 'pre' => 'pre',
  77. 'code' => 'code',
  78. 'blockquote' => 'blockquote',
  79. 'quote' => 'blockquote',
  80. 'QUOTE' => 'blockquote',
  81. 'abbr' => 'abbr'
  82. );
  83. /** -------------------------------------
  84. /** Constructor
  85. /** -------------------------------------*/
  86. function Typography($parse_images = TRUE, $allow_headings = TRUE)
  87. {
  88. global $PREFS, $FNS;
  89. $this->protect_braced_quotes = TRUE;
  90. if ($parse_images == TRUE)
  91. {
  92. $this->file_paths = $FNS->fetch_file_paths();
  93. }
  94. $this->parse_images = $parse_images;
  95. if ($allow_headings == TRUE)
  96. {
  97. foreach (array('h2', 'h3', 'h4', 'h5', 'h6') as $val)
  98. {
  99. $this->safe_encode[] = $val;
  100. $this->safe_decode[$val] = $val;
  101. }
  102. }
  103. /** -------------------------------------
  104. /** Fetch emoticon prefs
  105. /** -------------------------------------*/
  106. if ($PREFS->ini('enable_emoticons') == 'y')
  107. {
  108. if (is_file(PATH_MOD.'emoticon/emoticons'.EXT))
  109. {
  110. require PATH_MOD.'emoticon/emoticons'.EXT;
  111. if (is_array($smileys))
  112. {
  113. $this->smiley_array = $smileys;
  114. $this->emoticon_path = $PREFS->ini('emoticon_path', 1);
  115. }
  116. }
  117. }
  118. /* -------------------------------------------
  119. /* Hidden Configuration Variables
  120. /* - popup_link => Have links created by Typography class open in a new window (y/n)
  121. /* -------------------------------------------*/
  122. if ($PREFS->ini('popup_link') !== FALSE)
  123. {
  124. $this->popup_links = ($PREFS->ini('popup_link') == 'y') ? TRUE : FALSE;
  125. }
  126. /** -------------------------------------
  127. /** Fetch word censoring prefs
  128. /** -------------------------------------*/
  129. if ($PREFS->ini('enable_censoring') == 'y' AND $PREFS->ini('censored_words') != '')
  130. {
  131. if ($PREFS->ini('censor_replacement') !== FALSE)
  132. {
  133. $this->censored_replace = $PREFS->ini('censor_replacement');
  134. }
  135. $words = preg_replace("/\s+/", "", trim($PREFS->ini('censored_words')));
  136. $words = str_replace('||', '|', $words);
  137. if (substr($words, -1) == "|")
  138. {
  139. $words = substr($words, 0, -1);
  140. }
  141. $this->censored_words = explode("|", $words);
  142. if (count($this->censored_words) > 0)
  143. {
  144. $this->word_censor = TRUE;
  145. }
  146. }
  147. /** -------------------------------------
  148. /** Fetch plugins
  149. /** -------------------------------------*/
  150. $this->text_fmt_plugins = $this->fetch_plugins();
  151. }
  152. /* END */
  153. /** -------------------------------------
  154. /** Fetch installed plugins
  155. /** -------------------------------------*/
  156. function fetch_plugins()
  157. {
  158. global $PREFS;
  159. $exclude = array('auto_xhtml');
  160. $filelist = array();
  161. if ($fp = @opendir(PATH_PI))
  162. {
  163. while (false !== ($file = readdir($fp)))
  164. {
  165. if ( preg_match("/pi\.[a-z\_0-9]+?".preg_quote(EXT, '/')."$/", $file))
  166. {
  167. $file = substr($file, 3, - strlen(EXT));
  168. if ( ! in_array($file, $exclude))
  169. {
  170. $filelist[] = $file;
  171. }
  172. }
  173. }
  174. closedir($fp);
  175. }
  176. sort($filelist);
  177. return $filelist;
  178. }
  179. /* END */
  180. /** ----------------------------------------
  181. /** Parse file paths
  182. /** ----------------------------------------*/
  183. function parse_file_paths($str)
  184. {
  185. global $DB;
  186. if ($this->parse_images == FALSE OR count($this->file_paths) == 0)
  187. {
  188. return $str;
  189. }
  190. foreach ($this->file_paths as $key => $val)
  191. {
  192. $str = str_replace(array("{filedir_{$key}}", "&#123;filedir_{$key}&#125;"), $val, $str);
  193. }
  194. return $str;
  195. }
  196. /* END */
  197. /** -------------------------------------
  198. /** Typographic parser
  199. /** -------------------------------------*/
  200. // Note: The processing order is very important in this function so don't change it!
  201. function parse_type($str, $prefs = '')
  202. {
  203. global $REGX, $FNS, $EXT, $IN;
  204. if ($str == '')
  205. {
  206. return;
  207. }
  208. if (strpos($this->inline_elements, '|u|strike') === FALSE)
  209. {
  210. $this->inline_elements .= '|u|strike';
  211. }
  212. // -------------------------------------------
  213. // 'typography_parse_type_start' hook.
  214. // - Modify string prior to all other typography processing
  215. //
  216. if ($EXT->active_hook('typography_parse_type_start') === TRUE)
  217. {
  218. $str = $EXT->call_extension('typography_parse_type_start', $str, $this, $prefs);
  219. }
  220. //
  221. // -------------------------------------------
  222. /** -------------------------------------
  223. /** Encode PHP tags
  224. /** -------------------------------------*/
  225. // Before we do anything else, we'll convert PHP tags into character entities.
  226. // This is so that PHP submitted in weblog entries, comments, etc. won't get parsed.
  227. // Since you can enable templates to parse PHP, it would open up a security
  228. // hole to leave PHP submitted in entries and comments intact.
  229. $str = $REGX->encode_php_tags($str);
  230. /** -------------------------------------
  231. /** Encode EE tags
  232. /** -------------------------------------*/
  233. // Next, we need to encode EE tags contained in entries, comments, etc. so that they don't get parsed.
  234. $str = $REGX->encode_ee_tags($str, $this->convert_curly);
  235. /** -------------------------------------
  236. /** Set up our preferences
  237. /** -------------------------------------*/
  238. if (is_array($prefs))
  239. {
  240. if (isset($prefs['text_format']))
  241. {
  242. if ($prefs['text_format'] != 'none')
  243. {
  244. if (in_array($prefs['text_format'], $this->text_fmt_types))
  245. {
  246. $this->text_format = $prefs['text_format'];
  247. }
  248. else
  249. {
  250. if (in_array($prefs['text_format'], $this->text_fmt_plugins) AND file_exists(PATH_PI.'pi.'.$prefs['text_format'].EXT))
  251. {
  252. $this->text_format = $prefs['text_format'];
  253. }
  254. }
  255. }
  256. else
  257. {
  258. $this->text_format = 'none';
  259. }
  260. }
  261. if (isset($prefs['html_format']) AND in_array($prefs['html_format'], $this->html_fmt_types))
  262. {
  263. $this->html_format = $prefs['html_format'];
  264. }
  265. if (isset($prefs['auto_links']) AND in_array($prefs['auto_links'], $this->yes_no_syntax))
  266. {
  267. $this->auto_links = $prefs['auto_links'];
  268. }
  269. if (isset($prefs['allow_img_url']) AND in_array($prefs['allow_img_url'], $this->yes_no_syntax))
  270. {
  271. $this->allow_img_url = $prefs['allow_img_url'];
  272. }
  273. }
  274. /** -------------------------------------
  275. /** Are single lines considered paragraphs?
  276. /** -------------------------------------*/
  277. if ($this->single_line_pgfs != TRUE)
  278. {
  279. if ($this->text_format == 'xhtml' AND ! preg_match("/(\015\012)|(\015)|(\012)/", $str))
  280. {
  281. $this->text_format = 'lite';
  282. }
  283. }
  284. /** -------------------------------------
  285. /** Fix emoticon bug
  286. /** -------------------------------------*/
  287. $str = str_replace(array('>:-(', '>:('), array(':angry:', ':mad:'), $str);
  288. /** -------------------------------------
  289. /** Highlight text within [code] tags
  290. /** -------------------------------------*/
  291. // If highlighting is enabled, we'll highlight <pre> tags as well.
  292. if ($this->highlight_code == TRUE)
  293. {
  294. $str = str_replace(array('[pre]', '[/pre]'), array('[code]', '[/code]'), $str);
  295. }
  296. // We don't want pMcode parsed if it's within code examples so we'll convert the brackets
  297. if (preg_match_all("/\[code\](.+?)\[\/code\]/si", $str, $matches))
  298. {
  299. for ($i = 0; $i < count($matches['1']); $i++)
  300. {
  301. $temp = str_replace(array('[', ']'), array('&#91;', '&#93;'), $matches['1'][$i]);
  302. $str = str_replace($matches['0'][$i], '[code]'.$temp.'[/code]', $str);
  303. }
  304. }
  305. if ($this->highlight_code == TRUE)
  306. {
  307. $str = $this->text_highlight($str);
  308. }
  309. else
  310. {
  311. $str = str_replace(array('[code]', '[/code]'), array('<code>', '</code>'), $str);
  312. }
  313. /** -------------------------------------
  314. /** Strip IMG tags if not allowed
  315. /** -------------------------------------*/
  316. if ($this->allow_img_url == 'n')
  317. {
  318. $str = $this->strip_images($str);
  319. }
  320. /** -------------------------------------
  321. /** Format HTML
  322. /** -------------------------------------*/
  323. $str = $this->format_html($str);
  324. /** -------------------------------------
  325. /** Auto-link URLs and email addresses
  326. /** -------------------------------------*/
  327. if ($this->auto_links == 'y' AND $this->html_format != 'none')
  328. {
  329. $str = $this->auto_linker($str);
  330. }
  331. /** -------------------------------------
  332. /** Parse file paths (in images)
  333. /** -------------------------------------*/
  334. $str = $this->parse_file_paths($str);
  335. /** ---------------------------------------
  336. /** Convert HTML links in CP to pMcode
  337. /** ---------------------------------------*/
  338. // Forces HTML links output in the control panel to pMcode so they will be formatted
  339. // as redirects, to prevent the control panel address from showing up in referrer logs
  340. // except when sending emails, where we don't want created links piped through the site
  341. if (REQ == 'CP' && $IN->GBL('M', 'GET') != 'send_email')
  342. {
  343. $str = preg_replace("#<a\s+(.*?)href=(\042|\047)([^\\2]*?)\\2(.*?)\>(.*?)</a>#si", "[url=\"\\3\"\\1\\4]\\5[/url]", $str);
  344. }
  345. /** -------------------------------------
  346. /** Decode pMcode
  347. /** -------------------------------------*/
  348. $str = $this->decode_pmcode($str);
  349. /** -------------------------------------
  350. /** Format text
  351. /** -------------------------------------*/
  352. switch ($this->text_format)
  353. {
  354. case 'none';
  355. break;
  356. case 'xhtml' : $str = $this->auto_typography($str);
  357. break;
  358. case 'lite' : $str = $this->format_characters($str); // Used with weblog entry titles
  359. break;
  360. case 'br' : $str = $this->nl2br_except_pre($str);
  361. break;
  362. default :
  363. if ( ! class_exists('Template'))
  364. {
  365. global $TMPL;
  366. require PATH_CORE.'core.template'.EXT;
  367. $TMPL = new Template();
  368. }
  369. $plugin = ucfirst($prefs['text_format']);
  370. if ( ! class_exists($plugin))
  371. {
  372. require_once PATH_PI.'pi.'.$prefs['text_format'].EXT;
  373. }
  374. if (class_exists($plugin))
  375. {
  376. $PLG = new $plugin($str);
  377. if (isset($PLG->return_data))
  378. {
  379. $str = $PLG->return_data;
  380. }
  381. }
  382. break;
  383. }
  384. /** -------------------------------------
  385. /** Parse emoticons
  386. /** -------------------------------------*/
  387. $str = $this->emoticon_replace($str);
  388. /** -------------------------------------
  389. /** Parse censored words
  390. /** -------------------------------------*/
  391. $str = $this->filter_censored_words($str);
  392. /** ------------------------------------------
  393. /** Decode and spam-protect email addresses
  394. /** ------------------------------------------*/
  395. // {encode="you@yoursite.com" title="Click Me"}
  396. // Note: We only do this here if it's a CP request since the
  397. // template parser handles this for page requets
  398. if (REQ == 'CP')
  399. {
  400. if (preg_match_all("/\{encode=(.+?)\}/i", $str, $matches))
  401. {
  402. for ($j = 0; $j < count($matches['0']); $j++)
  403. {
  404. $str = str_replace($matches['0'][$j], $FNS->encode_email($matches['1'][$j]), $str);
  405. }
  406. }
  407. }
  408. // Standard email addresses
  409. $str = $this->decode_emails($str);
  410. /** ------------------------------------------
  411. /** Insert the cached code tags
  412. /** ------------------------------------------*/
  413. // The hightlight function called earlier converts the original code strings into markers
  414. // so that the auth_xhtml function doesn't attempt to process the highlighted code chunks.
  415. // Here we convert the markers back to their correct state.
  416. if (count($this->code_chunks) > 0)
  417. {
  418. foreach ($this->code_chunks as $key => $val)
  419. {
  420. $str = str_replace('{'.$key.'yH45k02wsSdrp}', $val, $str);
  421. }
  422. $this->code_chunks = array();
  423. }
  424. // -------------------------------------------
  425. // 'typography_parse_type_end' hook.
  426. // - Modify string after all other typography processing
  427. //
  428. if ($EXT->active_hook('typography_parse_type_end') === TRUE)
  429. {
  430. $str = $EXT->call_extension('typography_parse_type_end', $str, $this, $prefs);
  431. }
  432. //
  433. // -------------------------------------------
  434. return $str;
  435. }
  436. /* END */
  437. /** -------------------------------------
  438. /** Format HTML
  439. /** -------------------------------------*/
  440. function format_html($str)
  441. {
  442. global $REGX;
  443. $html_options = array('all', 'safe', 'none');
  444. if ( ! in_array($this->html_format, $html_options))
  445. {
  446. $this->html_format = 'safe';
  447. }
  448. if ($this->html_format == 'all')
  449. {
  450. return $str;
  451. }
  452. if ($this->html_format == 'none')
  453. {
  454. return $this->encode_tags($str);
  455. }
  456. /** -------------------------------------
  457. /** Permit only safe HTML
  458. /** -------------------------------------*/
  459. $str = $REGX->xss_clean($str);
  460. // We strip any JavaScript event handlers from image links or anchors
  461. // This prevents cross-site scripting hacks.
  462. $js = array(
  463. 'onblur',
  464. 'onchange',
  465. 'onclick',
  466. 'onfocus',
  467. 'onload',
  468. 'onmouseover',
  469. 'onmouseup',
  470. 'onmousedown',
  471. 'onselect',
  472. 'onsubmit',
  473. 'onunload',
  474. 'onkeypress',
  475. 'onkeydown',
  476. 'onkeyup',
  477. 'onresize'
  478. );
  479. foreach ($js as $val)
  480. {
  481. $str = preg_replace("/<img src\s*=(.+?)".$val."\s*\=.+?\>/i", "<img src=\\1 />", $str);
  482. $str = preg_replace("/<a href\s*=(.+?)".$val."\s*\=.+?\>/i", "<a href=\\1>", $str);
  483. }
  484. // Turn <br /> tags into newlines
  485. $str = preg_replace("#<br>|<br />#i", "\n", $str);
  486. // Strip paragraph tags
  487. $str = preg_replace("#<p>|<p[^>]*?>|</p>#i", "", preg_replace("#<\/p><p[^>]*?>#i", "\n", $str));
  488. // Convert allowed HTML to pMcode
  489. foreach($this->safe_encode as $val)
  490. {
  491. $str = preg_replace("#<".$val.">(.+?)</".$val.">#si", "[$val]\\1[/$val]", $str);
  492. }
  493. // Convert anchors to pMcode
  494. // We do this to prevent allowed HTML from getting converted in the next step
  495. // Old method would only convert links that had href= as the first tag attribute
  496. // $str = preg_replace("#<a\s+href=[\"'](\S+?)[\"'](.*?)\>(.*?)</a>#si", "[url=\"\\1\"\\2]\\3[/url]", $str);
  497. $str = preg_replace("#<a\s+(.*?)href=(\042|\047)([^\\2]*?)\\2(.*?)\>(.*?)</a>#si", "[url=\"\\3\"\\1\\4]\\5[/url]", $str);
  498. // Convert image tags pMcode
  499. $str = str_replace("/>", ">", $str);
  500. $str = preg_replace("#<img(.*?)src=\s*[\"'](.+?)[\"'](.*?)\s*\>#si", "[img]\\2\\3\\1[/img]", $str);
  501. $str = preg_replace( "#(^|\s|\()((http(s?)://)|(www\.))(\w+[^\s\)\<]+)\.(jpg|jpeg|gif|png)#i", "\\1[img]http\\4://\\5\\6.\\7[/img]", $str);
  502. return $this->encode_tags($str);
  503. }
  504. /* END */
  505. /** -------------------------------------
  506. /** Auto link URLs and email addresses
  507. /** -------------------------------------*/
  508. function auto_linker($str)
  509. {
  510. global $FNS, $PREFS, $IN;
  511. $str .= ' ';
  512. // We don't want any links that appear in the control panel (in weblog entries, comments, etc.)
  513. // to point directly at URLs. Why? Becuase the control panel URL will end up in people's referrer logs,
  514. // This would be a bad thing. So, we'll point all links to the "bounce server"
  515. $qm = ($PREFS->ini('force_query_string') == 'y') ? '' : '?';
  516. $this->bounce = ((REQ == 'CP' && $IN->GBL('M', 'GET') != 'send_email') || $PREFS->ini('redirect_submitted_links') == 'y') ? $FNS->fetch_site_index().$qm.'URL=' : '';
  517. // Protect URLs that are already in [url] pMCode
  518. $str = preg_replace("/(\[url[^\]]*?\])http/is", '${1}'.$this->http_hidden, str_replace('[url=http', '[url='.$this->http_hidden, $str));
  519. // New version. Blame Paul if it doesn't work
  520. // The parentheses on the end attempt to call any content after the URL.
  521. // This way we can make sure it is not [url=http://site.com]http://site.com[/url]
  522. $str = preg_replace_callback("#(^|\s|\(|..\])((http(s?)://)|(www\.))(\w+[^\s\)\<\[]+)#im", array(&$this, 'auto_linker_callback'), $str);
  523. // Auto link email
  524. $str = preg_replace("/(^|\s|\(|\>)([a-zA-Z0-9_\.\-]+)@([a-zA-Z0-9\-]+)\.([a-zA-Z0-9\-\.]*)/i", "\\1[email]\\2@\\3.\\4[/email]", $str);
  525. // Clear period(s) from the end of emails
  526. $str = preg_replace("|(\.+)\[\/email\]|i ", "[/email]\\1", $str);
  527. // UnProtect URLs that are already in [url] pMCode
  528. $str = str_replace($this->http_hidden, 'http', $str);
  529. return substr($str, 0, -1); // Removes space added above
  530. }
  531. /* END */
  532. /** -------------------------------------
  533. /** Callback function used above
  534. /** -------------------------------------*/
  535. function auto_linker_callback($matches)
  536. {
  537. global $PREFS;
  538. // If it is in pMCode, then we do not auto link
  539. if (strtolower($matches['1']) == 'mg]' OR
  540. strtolower($matches['1']) == 'rl]' OR
  541. strtolower(substr(trim($matches[6]), 0, 6)) == '[/url]'
  542. )
  543. {
  544. return $matches['0'];
  545. }
  546. /** -----------------------------------
  547. /** Moved the Comment and Period Modification Here
  548. /** -----------------------------------*/
  549. $end = '';
  550. if (preg_match("/^(.+?)([\.\,]+)$/",$matches['6'], $punc_match))
  551. {
  552. $end = $punc_match[2];
  553. $matches[6] = $punc_match[1];
  554. }
  555. /** -----------------------------------
  556. /** Modified 2006-02-07 to send back pMCode instead of HTML. Insures correct sanitizing.
  557. /** -----------------------------------*/
  558. return $matches['1'].'[url=http'.
  559. $matches['4'].'://'.
  560. $matches['5'].
  561. $matches['6'].']http'.
  562. $matches['4'].'://'.
  563. $matches['5'].
  564. $matches['6'].'[/url]'.
  565. $end;
  566. /** -----------------------------------
  567. /** Old Way
  568. /** -----------------------------------*/
  569. $url_core = (REQ == 'CP' || $PREFS->ini('redirect_submitted_links') == 'y') ? urlencode($matches['6']) : $matches['6'];
  570. return $matches['1'].'<a href="'.$this->bounce.'http'.
  571. $matches['4'].'://'.
  572. $matches['5'].
  573. $url_core.'"'.(($this->popup_links == TRUE) ? ' onclick="window.open(this.href); return false;" ' : '').'>http'.
  574. $matches['4'].'://'.
  575. $matches['5'].
  576. $matches['6'].'</a>'.
  577. $end;
  578. }
  579. /* END */
  580. /** -------------------------------------
  581. /** Decode pMcode
  582. /** -------------------------------------*/
  583. function decode_pmcode($str)
  584. {
  585. global $FNS, $PREFS, $IN;
  586. /** -------------------------------------
  587. /** Remap some deprecated tags with valid counterparts
  588. /** -------------------------------------*/
  589. $str = str_replace(array('[strike]', '[/strike]', '[u]', '[/u]'), array('[del]', '[/del]', '[em]', '[/em]'), $str);
  590. /** -------------------------------------
  591. /** Decode pMcode array map
  592. /** -------------------------------------*/
  593. foreach($this->safe_decode as $key => $val)
  594. {
  595. $str = str_replace(array('['.$key.']', '[/'.$key.']'), array('<'.$val.'>', '</'.$val.'>'), $str);
  596. }
  597. /** -------------------------------------
  598. /** Decode codeblock division for code tag
  599. /** -------------------------------------*/
  600. if (count($this->code_chunks) > 0)
  601. {
  602. foreach ($this->code_chunks as $key => $val)
  603. {
  604. $str = str_replace('[div class="codeblock"]{'.$key.'yH45k02wsSdrp}[/div]', '<div class="codeblock">{'.$key.'yH45k02wsSdrp}</div>', $str);
  605. }
  606. }
  607. /** -------------------------------------
  608. /** Decode color tags
  609. /** -------------------------------------*/
  610. if ($this->use_span_tags == TRUE)
  611. {
  612. $str = preg_replace("/\[color=(.*?)\](.*?)\[\/color\]/si", "<span style=\"color:\\1;\">\\2</span>",$str);
  613. }
  614. else
  615. {
  616. $str = preg_replace("/\[color=(.*?)\](.*?)\[\/color\]/si", "<font color=\"\\1\">\\2</font>", $str);
  617. }
  618. /** -------------------------------------
  619. /** Decode size tags
  620. /** -------------------------------------*/
  621. if ($this->use_span_tags == TRUE)
  622. {
  623. $str = preg_replace_callback("/\[size=(.*?)\](.*?)\[\/size\]/si", array($this, "font_matrix"),$str);
  624. }
  625. else
  626. {
  627. $str = preg_replace("/\[size=(.*?)\](.*?)\[\/size\]/si", "<font color=\"\\1\">\\2</font>", $str);
  628. }
  629. /** -------------------------------------
  630. /** Convert [url] tags to links
  631. /** -------------------------------------*/
  632. $qm = ($PREFS->ini('force_query_string') == 'y') ? '' : '?';
  633. $bounce = ((REQ == 'CP' && $IN->GBL('M', 'GET') != 'send_email') || $PREFS->ini('redirect_submitted_links') == 'y') ? $FNS->fetch_site_index().$qm.'URL=' : '';
  634. $bad_things = array("'",'"', ';', '[', '(', ')', '!', '*', '>', '<', "\t", "\r", "\n", 'document.cookie'); // everything else
  635. $bad_things2 = array('[', '(', ')', '!', '*', '>', '<', "\t", 'document.cookie'); // style,title attributes
  636. $exceptions = array('http://', 'https://', 'irc://', 'feed://', 'ftp://', 'ftps://', 'mailto:', '/', '#');
  637. $allowed = array('rel', 'title', 'class', 'style', 'target');
  638. if (preg_match_all("/\[url(.*?)\](.*?)\[\/url\]/i", $str, $matches))
  639. {
  640. for($i=0, $s=sizeof($matches['0']), $add=TRUE; $i < $s; ++$i)
  641. {
  642. $matches['1'][$i] = trim($matches['1'][$i]);
  643. $url = ($matches['1'][$i] != '') ? trim($matches['1'][$i]) : $matches['2'][$i];
  644. $extra = '';
  645. // remove all attributes except for the href in "Safe" HTML formatting
  646. // Also force links output in the CP with the Typography class as "safe" so that
  647. // any other tag attributes that it might have are not slapped in with the URL
  648. if (($this->html_format == 'safe' OR REQ == 'CP') && stristr($matches['1'][$i],' '))
  649. {
  650. for($a=0, $sa=sizeof($allowed); $a < $sa; ++$a)
  651. {
  652. if (($p1 = strpos($url, $allowed[$a].'=')) !== FALSE)
  653. {
  654. $marker = substr($url, $p1 + strlen($allowed[$a].'='), 1);
  655. if ($marker != "'" && $marker != '"') continue;
  656. $p2 = strpos(substr($url, $p1 + strlen($allowed[$a].'=') + 1), $marker);
  657. if ($p2 === FALSE) continue;
  658. // Do not make me explain the math here, it gives me a headache - Paul
  659. $inside = str_replace((($allowed[$a] == 'style' OR $allowed[$a] == 'title') ? $bad_things2 : $bad_things),
  660. '',
  661. substr($url, $p1 + strlen($allowed[$a].'=') + 1, $p2));
  662. $extra .= ' '.$allowed[$a].'='.$marker.$inside.$marker;
  663. }
  664. }
  665. // remove everything but the URL up to the first space
  666. $url = substr($url, 0, strpos($url, ' '));
  667. // get rid of opening = and surrounding quotes
  668. $url = preg_replace(array('/^=(\042|\047)?/', '/(\042|\047)$/'), '', $url);
  669. // url encode a few characters that we want to allow, in the wiki for example
  670. $url = str_replace(array('"', "'", '!'), array('%22', '%27', '%21'), $url);
  671. }
  672. else
  673. {
  674. // get rid of opening = and surrounding quotes (again for allow all!)
  675. $url = preg_replace(array('/^=(\042|\047)?/', '/(\042|\047)$/'), '', $url);
  676. }
  677. // Clean out naughty stuff from URL.
  678. $url = ($this->html_format == 'all') ? str_replace($bad_things2, '', $url) : str_replace($bad_things, '', $url);
  679. $add = TRUE;
  680. foreach($exceptions as $exception)
  681. {
  682. if (substr($url, 0, strlen($exception)) == $exception)
  683. {
  684. $add = FALSE; break;
  685. }
  686. }
  687. if ($add === TRUE)
  688. {
  689. $url = "http://".$url;
  690. }
  691. $extra .= (($this->popup_links == TRUE) ? ' onclick="window.open(this.href); return false;" ' : '');
  692. if ($bounce != '')
  693. {
  694. $url = urlencode($url);
  695. }
  696. $str = str_replace($matches['0'][$i], '<a href="'.$bounce.trim($url).'"'.$extra.'>'.$matches['2'][$i]."</a>", $str);
  697. }
  698. }
  699. /** -------------------------------------
  700. /** Image tags
  701. /** -------------------------------------*/
  702. // [img] and [/img]
  703. if ($this->allow_img_url == 'y')
  704. {
  705. $str = preg_replace_callback("/\[img\](.*?)\[\/img\]/i", array($this, "image_sanitize"), $str);
  706. //$str = preg_replace("/\[img\](.*?)\[\/img\]/i", "<img src=\\1 />", $str);
  707. }
  708. elseif($this->auto_links == 'y' && $this->html_format != 'none')
  709. {
  710. if (preg_match_all("/\[img\](.*?)\[\/img\]/i", $str, $matches))
  711. {
  712. for($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
  713. {
  714. $str = str_replace($matches['0'][$i], '<a href="'.$bounce.str_replace($bad_things, '', $matches['1'][$i]).'">'.str_replace($bad_things, '', $matches['1'][$i])."</a>", $str);
  715. }
  716. }
  717. }
  718. else
  719. {
  720. $str = preg_replace("/\[img\](.*?)\[\/img\]/i", "\\1", $str);
  721. }
  722. // Add quotes back to image tag if missing
  723. if (preg_match("/\<img src=[^\"\'].*?\>/i", $str))
  724. {
  725. $str = preg_replace("/<img src=([^\"\'\s]+)(.*?)\/\>/i", "<img src=\"\\1\" \\2/>", $str);
  726. }
  727. /** -------------------------------------
  728. /** Style tags
  729. /** -------------------------------------*/
  730. // [style=class_name]stuff..[/style]
  731. $str = preg_replace("/\[style=(.*?)\](.*?)\[\/style\]/si", "<span class=\"\\1\">\\2</span>", $str);
  732. /** ---------------------------------------
  733. /** Attributed quotes, used in the Forum module
  734. /** ---------------------------------------*/
  735. // [quote author="Brett" date="11231189803874"]...[/quote]
  736. if (preg_match_all('/\[quote\s+(author=".*?"\s+date=".*?")\]/si', $str, $matches))
  737. {
  738. for ($i = 0; $i < count($matches['1']); $i++)
  739. {
  740. $str = str_replace('[quote '.$matches['1'][$i].']', '<blockquote '.$matches['1'][$i].'>', $str);
  741. }
  742. }
  743. return $str;
  744. }
  745. /* END */
  746. /** -----------------------------------------
  747. /** Make images safe
  748. /** -----------------------------------------*/
  749. // This simply removes parenthesis so that javascript event handlers
  750. // can't be invoked.
  751. function image_sanitize($matches)
  752. {
  753. $url = str_replace(array('(', ')'), '', $matches['1']);
  754. if (preg_match("/\s+alt=(\"|\')([^\\1]*?)\\1/", $matches['1'], $alt_match))
  755. {
  756. $url = trim(str_replace($alt_match['0'], '', $url));
  757. $alt = str_replace(array('"', "'"), '', $alt_match['2']);
  758. }
  759. else
  760. {
  761. $alt = str_replace(array('"', "'"), '', $url);
  762. if (substr($alt, -1) == '/')
  763. {
  764. $alt = substr($alt, 0, -1);
  765. }
  766. $alt = substr($alt, strrpos($alt, '/')+1);
  767. }
  768. return "<img src=".$url." alt='".$alt."' />";
  769. }
  770. /** -----------------------------------------
  771. /** Decode and spam protect email addresses
  772. /** -----------------------------------------*/
  773. function decode_emails($str)
  774. {
  775. // [email=your@yoursite]email[/email]
  776. $str = preg_replace_callback("/\[email=(.*?)\](.*?)\[\/email\]/i", array($this, "create_mailto"),$str);
  777. // [email]joe@xyz.com[/email]
  778. $str = preg_replace_callback("/\[email\](.*?)\[\/email\]/i", array($this, "create_mailto"),$str);
  779. return $str;
  780. }
  781. /* END */
  782. /** -------------------------------------
  783. /** Format Email via callback
  784. /** -------------------------------------*/
  785. function create_mailto($matches)
  786. {
  787. $title = ( ! isset($matches['2'])) ? $matches['1'] : $matches['2'];
  788. if ($this->encode_email == TRUE)
  789. {
  790. return $this->encode_email($matches['1'], $title, TRUE);
  791. }
  792. else
  793. {
  794. return "<a href=\"mailto:".$matches['1']."\">".$title."</a>";
  795. }
  796. }
  797. /* END */
  798. /** ----------------------------------------
  799. /** Font sizing matrix via callback
  800. /** ----------------------------------------*/
  801. function font_matrix($matches)
  802. {
  803. switch($matches['1'])
  804. {
  805. case 1 : $size = '9px';
  806. break;
  807. case 2 : $size = '11px';
  808. break;
  809. case 3 : $size = '14px';
  810. break;
  811. case 4 : $size = '16px';
  812. break;
  813. case 5 : $size = '18px';
  814. break;
  815. case 6 : $size = '20px';
  816. break;
  817. default : $size = '11px';
  818. break;
  819. }
  820. return "<span style=\"font-size:".$size.";\">".$matches['2']."</span>";
  821. }
  822. /* END */
  823. /** -------------------------------------
  824. /** Encode tags
  825. /** -------------------------------------*/
  826. function encode_tags($str)
  827. {
  828. return str_replace(array("<", ">"), array("&lt;", "&gt;"), $str);
  829. }
  830. /* END */
  831. /** -------------------------------------
  832. /** Strip IMG tags
  833. /** -------------------------------------*/
  834. function strip_images($str)
  835. {
  836. $str = preg_replace("#<img\s+.*?src\s*=\s*[\"'](.+?)[\"'].*?\>#", "\\1", $str);
  837. $str = preg_replace("#<img\s+.*?src\s*=\s*(.+?)\s*\>#", "\\1", $str);
  838. return $str;
  839. }
  840. /* END */
  841. /** -------------------------------------
  842. /** Emoticon replacement
  843. /** -------------------------------------*/
  844. function emoticon_replace($str)
  845. {
  846. if ($this->smiley_array === FALSE OR $this->parse_smileys === FALSE)
  847. {
  848. return $str;
  849. }
  850. $str = ' '.$str;
  851. foreach ($this->smiley_array as $key => $val)
  852. {
  853. if (strpos($str, $key) !== FALSE)
  854. {
  855. $img = "<img src=\"".$this->emoticon_path.$this->smiley_array[$key]['0']."\" width=\"".$this->smiley_array[$key]['1']."\" height=\"".$this->smiley_array[$key]['2']."\" alt=\"".$this->smiley_array[$key]['3']."\" style=\"border:0;\" />";
  856. foreach(array(' ', "\t", "\n", "\r", '.', ',', '>') as $char)
  857. {
  858. $str = str_replace($char.$key, $char.$img, $str);
  859. }
  860. }
  861. }
  862. return ltrim($str);
  863. }
  864. /* END */
  865. /** -------------------------------------
  866. /** Word censor
  867. /** -------------------------------------*/
  868. function filter_censored_words($str)
  869. {
  870. global $REGX;
  871. if ($this->word_censor == FALSE)
  872. {
  873. return $str;
  874. }
  875. $str = ' '.$str.' ';
  876. // \w, \b and a few others do not match on a unicode character
  877. // set for performance reasons. As a result words like 端ber
  878. // will not match on a word boundary. Instead, we'll assume that
  879. // a bad word will be bookeneded by any of these characters.
  880. $delim = '[-_\'\"`(){}<>\[\]|!?@#%&,.:;^~*+=\/ 0-9\n\r\t]';
  881. foreach ($this->censored_words as $badword)
  882. {
  883. // We have entered the high ASCII range, which means it is likely
  884. // that this character is a complete word or symbol that is not
  885. // allowed. So, instead of a preg_replace with a word boundary
  886. // we simply do a string replace for this bad word.
  887. if ((strlen($badword) == 4 OR strlen($badword) == 2) && stristr($badword, '*') === FALSE && ord($badword['0']) > 127 && ord($badword['1']) > 127)
  888. {
  889. $str = str_replace($badword, (($this->censored_replace != '') ? $this->censored_replace : '#'), $str);
  890. }
  891. else
  892. {
  893. if ($this->censored_replace != '')
  894. {
  895. $str = preg_replace("/({$delim})(".str_replace('\*', '\w*?', preg_quote($badword, '/')).")({$delim})/i", "\\1{$this->censored_replace}\\3", $str);
  896. }
  897. else
  898. {
  899. $str = preg_replace("/({$delim})(".str_replace('\*', '\w*?', preg_quote($badword, '/')).")({$delim})/ie", "'\\1'.str_repeat('#', strlen('\\2')).'\\3'", $str);
  900. }
  901. }
  902. }
  903. return trim($str);
  904. }
  905. /* END */
  906. /** -------------------------------------
  907. /** Colorize code strings
  908. /** -------------------------------------*/
  909. function text_highlight($str)
  910. {
  911. // No [code] tags? No reason to live. Goodbye cruel world...
  912. if ( ! preg_match_all("/\[code\](.+?)\[\/code\]/si", $str, $matches))
  913. {
  914. return $str;
  915. }
  916. for ($i = 0; $i < count($matches['1']); $i++)
  917. {
  918. $temp = trim($matches['1'][$i]);
  919. //$temp = $this->decode_pmcode(trim($matches['1'][$i]));
  920. // Turn <entities> back to ascii. The highlight string function
  921. // encodes and highlight brackets so we need them to start raw
  922. $temp = str_replace(array('&lt;', '&gt;'), array('<', '>'), $temp);
  923. // Replace any existing PHP tags to temporary markers so they don't accidentally
  924. // break the string out of PHP, and thus, thwart the highlighting.
  925. // While we're at it, convert EE braces
  926. $temp = str_replace(array('<?', '?>', '{', '}', '&#123;', '&#125;', '&#91;', '&#93;', '\\', '&#40;', '&#41;', '</script>'),
  927. array('phptagopen', 'phptagclose', 'braceopen', 'braceclose', 'braceopen', 'braceclose', 'bracketopen', 'bracketeclose', 'backslashtmp', 'parenthesisopen', 'parenthesisclose', 'scriptclose'),
  928. $temp);
  929. // The highlight_string function requires that the text be surrounded
  930. // by PHP tags, which we will remove later
  931. $temp = '<?php '.$temp.' ?>'; // <?
  932. // All the magic happens here, baby!
  933. $temp = highlight_string($temp, TRUE);
  934. // Prior to PHP 5, the highligh function used icky <font> tags
  935. // so we'll replace them with <span> tags.
  936. if (abs(PHP_VERSION) < 5)
  937. {
  938. $temp = str_replace(array('<font ', '</font>'), array('<span ', '</span>'), $temp);
  939. $temp = preg_replace('#color="(.*?)"#', 'style="color: \\1"', $temp);
  940. }
  941. // Remove our artificially added PHP, and the syntax highlighting that came with it
  942. $temp = preg_replace('/<span style="color: #([A-Z0-9]+)">&lt;\?php(&nbsp;| )/i', '<span style="color: #$1">', $temp);
  943. $temp = preg_replace('/(<span style="color: #[A-Z0-9]+">.*?)\?&gt;<\/span>\n<\/span>\n<\/code>/is', "$1</span>\n</span>\n</code>", $temp);
  944. $temp = preg_replace('/<span style="color: #[A-Z0-9]+"\><\/span>/i', '', $temp);
  945. // Replace our markers back to PHP tags.
  946. $temp = str_replace(array('phptagopen', 'phptagclose', 'braceopen', 'braceclose', 'bracketopen', 'bracketeclose', 'backslashtmp', 'parenthesisopen', 'parenthesisclose', 'scriptclose'),
  947. array('&lt;?', '?&gt;', '&#123;', '&#125;', '&#91;', '&#93;', '\\', '&#40;', '&#41;', '&lt;/script&gt;'),
  948. $temp); //<?
  949. // Cache the code chunk and insert a marker into the original string.
  950. // we do this so that the auth_xhtml function which gets called later
  951. // doesn't process our new code chunk
  952. $this->code_chunks[$this->code_counter] = $temp;
  953. $str = str_replace($matches['0'][$i], '[div class="codeblock"]{'.$this->code_counter.'yH45k02wsSdrp}[/div]', $str);
  954. $this->code_counter++;
  955. }
  956. return $str;
  957. }
  958. /* END */
  959. /** -------------------------------------
  960. /** NL to <br /> - Except within <pre>
  961. /** -------------------------------------*/
  962. function nl2br_except_pre($str)
  963. {
  964. $ex = explode("pre>",$str);
  965. $ct = count($ex);
  966. $newstr = "";
  967. for ($i = 0; $i < $ct; $i++)
  968. {
  969. if (($i % 2) == 0)
  970. $newstr .= nl2br($ex[$i]);
  971. else
  972. $newstr .= $ex[$i];
  973. if ($ct - 1 != $i)
  974. $newstr .= "pre>";
  975. }
  976. return $newstr;
  977. }
  978. /* END */
  979. /** -------------------------------------
  980. /** Convert ampersands to entities
  981. /** -------------------------------------*/
  982. function convert_ampersands($str)
  983. {
  984. $str = preg_replace("/&#(\d+);/", "AMP14TX903DVGHY4QW\\1;", $str);
  985. $str = preg_replace("/&(\w+);/", "AMP14TX903DVGHY4QT\\1;", $str);
  986. return str_replace(array("&","AMP14TX903DVGHY4QW","AMP14TX903DVGHY4QT"),array("&amp;", "&#","&"), $str);
  987. }
  988. /* END */
  989. // --------------------------------------------------------------------
  990. /**
  991. * Old version - use auto_typography() now
  992. */
  993. function xhtml_typography($str)
  994. {
  995. return $this->auto_typography($str);
  996. }
  997. // --------------------------------------------------------------------
  998. /**
  999. * Auto Typography
  1000. *
  1001. * This function converts text, making it typographically correct:
  1002. * - Converts double spaces into paragraphs.
  1003. * - Converts single line breaks into <br /> tags
  1004. * - Converts single and double quotes into correctly facing curly quote entities.
  1005. * - Converts three dots into ellipsis.
  1006. * - Converts double dashes into em-dashes.
  1007. * - Converts two spaces into entities
  1008. *
  1009. * @access public
  1010. * @param string
  1011. * @param bool whether to reduce more then two consecutive newlines to two
  1012. * @return string
  1013. */
  1014. function auto_typography($str, $reduce_linebreaks = FALSE)
  1015. {
  1016. if ($str == '')
  1017. {
  1018. return '';
  1019. }
  1020. // Standardize Newlines to make matching easier
  1021. if (strpos($str, "\r") !== FALSE)
  1022. {
  1023. $str = str_replace(array("\r\n", "\r"), "\n", $str);
  1024. }
  1025. // Reduce line breaks. If there are more than two consecutive linebreaks
  1026. // we'll compress them down to a maximum of two since there's no benefit to more.
  1027. if ($reduce_linebreaks === TRUE)
  1028. {
  1029. $str = preg_replace("/\n\n+/", "\n\n", $str);
  1030. }
  1031. // HTML comment tags don't conform to patterns of normal tags, so pull them out separately, only if needed
  1032. $html_comments = array();
  1033. if (strpos($str, '<!--') !== FALSE)
  1034. {
  1035. if (preg_match_all("#(<!\-\-.*?\-\->)#s", $str, $matches))
  1036. {
  1037. for ($i = 0, $total = count($matches[0]); $i < $total; $i++)
  1038. {
  1039. $html_comments[] = $matches[0][$i];
  1040. $str = str_replace($matches[0][$i], '{@HC'.$i.'}', $str);
  1041. }
  1042. }
  1043. }
  1044. // match and yank <pre> tags if they exist. It's cheaper to do this separately since most content will
  1045. // not contain <pre> tags, and it keeps the PCRE patterns below simpler and faster
  1046. if (strpos($str, '<pre') !== FALSE)
  1047. {
  1048. $str = preg_replace_callback("#<pre.*?>.*?</pre>#si", array($this, '_protect_characters'), $str);
  1049. }
  1050. // Convert quotes within tags to temporary markers.
  1051. $str = preg_replace_callback("#<.+?>#si", array($this, '_protect_characters'), $str);
  1052. // Do the same with braces if necessary
  1053. if ($this->protect_braced_quotes === TRUE)
  1054. {
  1055. $str = preg_replace_callback("#\{.+?\}#si", array($this, '_protect_characters'), $str);
  1056. }
  1057. // Convert "ignore" tags to temporary marker. The parser splits out the string at every tag
  1058. // it encounters. Certain inline tags, like image tags, links, span tags, etc. will be
  1059. // adversely affected if they are split out so we'll convert the opening bracket < temporarily to: {@TAG}
  1060. $str = preg_replace("#<(/*)(".$this->inline_elements.")([ >])#i", "{@TAG}\\1\\2\\3", $str);
  1061. // Split the string at every tag. This expression creates an array with this prototype:
  1062. //
  1063. // [array]
  1064. // {
  1065. // [0] = <opening tag>
  1066. // [1] = Content...
  1067. // [2] = <closing tag>
  1068. // Etc...
  1069. // }
  1070. $chunks = preg_split('/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY);
  1071. // Build our finalized string. We cycle through the array, skipping tags, and processing the contained text
  1072. $str = '';
  1073. $process = TRUE;
  1074. $paragraph = FALSE;
  1075. $current_chunk = 0;
  1076. $total_chunks = count($chunks);
  1077. foreach ($chunks as $chunk)
  1078. {
  1079. $current_chunk++;
  1080. // Are we dealing with a tag? If so, we'll skip the processing for this cycle.
  1081. // Well also set the "process" flag which allows us to skip <pre> tags and a few other things.
  1082. if (preg_match("#<(/*)(".$this->block_elements.").*?>#", $chunk, $match))
  1083. {
  1084. if (preg_match("#".$this->skip_elements."#", $match[2]))
  1085. {
  1086. $process = ($match[1] == '/') ? TRUE : FALSE;
  1087. }
  1088. if ($match[1] == '')
  1089. {
  1090. $this->last_block_element = $match[2];
  1091. }
  1092. $str .= $chunk;
  1093. continue;
  1094. }
  1095. if ($process == FALSE)
  1096. {
  1097. $str .= $chunk;
  1098. continue;
  1099. }
  1100. // Force a newline to make sure end tags get processed by _format_newlines()
  1101. if ($current_chunk == $total_chunks)
  1102. {
  1103. $chunk .= "\n";
  1104. }
  1105. // Convert Newlines into <p> and <br /> tags
  1106. $str .= $this->_format_newlines($chunk);
  1107. }
  1108. // No opening block level tag? Add it if needed.
  1109. if ( ! preg_match("/^\s*<(?:".$this->block_elements.")/i", $str))
  1110. {
  1111. $str = preg_replace("/^(.*?)<(".$this->block_elements.")/i", '<p>$1</p><$2', $str);
  1112. }
  1113. // Convert quotes, elipsis, em-dashes, non-breaking spaces, and ampersands
  1114. $str = $this->format_characters($str);
  1115. // restore HTML comments
  1116. for ($i = 0, $total = count($html_comments); $i < $total; $i++)
  1117. {
  1118. // remove surrounding paragraph tags, but only if there's an opening paragraph tag
  1119. // otherwise HTML comments at the ends of paragraphs will have the closing tag removed
  1120. // if '<p>{@HC1}' then replace <p>{@HC1}</p> with the comment, else replace only {@HC1} with the comment
  1121. $str = preg_replace('#(?(?=<p>\{@HC'.$i.'\})<p>\{@HC'.$i.'\}(\s*</p>)|\{@HC'.$i.'\})#s', $html_comments[$i], $str);
  1122. }
  1123. // Final clean up
  1124. $table = array(
  1125. // If the user submitted their own paragraph tags within the text
  1126. // we will retain them instead of using our tags.
  1127. '/(<p[^>*?]>)<p>/' => '$1', // <?php BBEdit syntax coloring bug fix
  1128. // Reduce multiple instances of opening/closing paragraph tags to a single one
  1129. '#(</p>)+#' => '</p>',
  1130. '/(<p>\W*<p>)+/' => '<p>',
  1131. // Clean up stray paragraph tags that appear before block level elements
  1132. '#<p></p><('.$this->block_elements.')#' => '<$1',
  1133. // Clean up stray non-breaking spaces preceeding block elements
  1134. '#(&nbsp;\s*)+<('.$this->block_elements.')#' => ' <$2',
  1135. // Replace the temporary markers we added earlier
  1136. '/\{@TAG\}/' => '<',
  1137. '/\{@DQ\}/' => '"',
  1138. '/\{@SQ\}/' => "'",
  1139. '/\{@DD\}/' => '--',
  1140. '/\{@NBS\}/' => ' '
  1141. );
  1142. // Do we need to reduce empty lines?
  1143. if ($reduce_linebreaks === TRUE)
  1144. {
  1145. $table['#<p>\n*</p>#'] = '';
  1146. }
  1147. else
  1148. {
  1149. // If we have empty paragraph tags we add a non-breaking space
  1150. // otherwise most browsers won't treat them as true paragraphs
  1151. $table['#<p></p>#'] = '<p>&nbsp;</p>';
  1152. }
  1153. return preg_replace(array_keys($table), $table, $str);
  1154. }
  1155. // --------------------------------------------------------------------
  1156. /**
  1157. * Format Characters
  1158. *
  1159. * This function mainly converts double and single quotes
  1160. * to curly entities, but it also converts em-dashes,
  1161. * double spaces, and ampersands
  1162. */
  1163. function format_characters($str)
  1164. {
  1165. static $table;
  1166. if ( ! isset($table))
  1167. {
  1168. $table = array(
  1169. // nested smart quotes, opening and closing
  1170. // note that rules for grammar (English) allow only for two levels deep
  1171. // and that single quotes are _supposed_ to always be on the outside
  1172. // but we'll accommodate both
  1173. // Note that in all cases, whitespace is the primary determining factor
  1174. // on which direction to curl, with non-word characters like punctuation
  1175. // being a secondary factor only after whitespace is addressed.
  1176. '/\'"(\s|$)/' => '&#8217;&#8221;$1',
  1177. '/(^|\s|<p>)\'"/' => '$1&#8216;&#8220;',
  1178. '/\'"(\W)/' => '&#8217;&#8221;$1',
  1179. '/(\W)\'"/' => '$1&#8216;&#8220;',
  1180. '/"\'(\s|$)/' => '&#8221;&#8217;$1',
  1181. '/(^|\s|<p>)"\'/' => '$1&#8220;&#8216;',
  1182. '/"\'(\W)/' => '&#8221;&#8217;$1',
  1183. '/(\W)"\'/' => '$1&#8220;&#8216;',
  1184. // single quote smart quotes
  1185. '/\'(\s|$)/' => '&#8217;$1',
  1186. '/(^|\s|<p>)\'/' => '$1&#8216;',
  1187. '/\'(\W)/' => '&#…

Large files files are truncated, but you can click here to view the full file