PageRenderTime 59ms CodeModel.GetById 32ms RepoModel.GetById 1ms app.codeStats 0ms

/app/controllers/TextHelper.php

https://github.com/BabelZilla/WTS
PHP | 495 lines | 291 code | 66 blank | 138 comment | 46 complexity | fd3a19afed25f913e6da0f7620327d4e MD5 | raw file
Possible License(s): LGPL-2.1, Apache-2.0, GPL-2.0
  1. <?php
  2. class TextHelper
  3. {
  4. /**
  5. * CodeIgniter
  6. *
  7. * An open source application development framework for PHP 5.1.6 or newer
  8. *
  9. * @package CodeIgniter
  10. * @author ExpressionEngine Dev Team
  11. * @copyright Copyright (c) 2008 - 2011, EllisLab, Inc.
  12. * @license http://codeigniter.com/user_guide/license.html
  13. * @link http://codeigniter.com
  14. * @since Version 1.0
  15. * @filesource
  16. */
  17. // ------------------------------------------------------------------------
  18. /**
  19. * CodeIgniter Text Helpers
  20. *
  21. * @package CodeIgniter
  22. * @subpackage Helpers
  23. * @category Helpers
  24. * @author ExpressionEngine Dev Team
  25. * @link http://codeigniter.com/user_guide/helpers/text_helper.html
  26. */
  27. // ------------------------------------------------------------------------
  28. /**
  29. * Word Limiter
  30. *
  31. * Limits a string to X number of words.
  32. *
  33. * @access public
  34. * @param string
  35. * @param integer
  36. * @param string the end character. Usually an ellipsis
  37. * @return string
  38. */
  39. public static function word_limiter($str, $limit = 100, $end_char = '&#8230;')
  40. {
  41. if (trim($str) == '') {
  42. return $str;
  43. }
  44. preg_match('/^\s*+(?:\S++\s*+){1,' . (int)$limit . '}/', $str, $matches);
  45. if (strlen($str) == strlen($matches[0])) {
  46. $end_char = '';
  47. }
  48. return rtrim($matches[0]) . $end_char;
  49. }
  50. // ------------------------------------------------------------------------
  51. /**
  52. * Character Limiter
  53. *
  54. * Limits the string based on the character count. Preserves complete words
  55. * so the character count may not be exactly as specified.
  56. *
  57. * @access public
  58. * @param string
  59. * @param integer
  60. * @param string the end character. Usually an ellipsis
  61. * @return string
  62. */
  63. public static function character_limiter($str, $n = 500, $end_char = '&#8230;')
  64. {
  65. if (strlen($str) < $n) {
  66. return $str;
  67. }
  68. $str = preg_replace("/\s+/", ' ', str_replace(array("\r\n", "\r", "\n"), ' ', $str));
  69. if (strlen($str) <= $n) {
  70. return $str;
  71. }
  72. $out = "";
  73. foreach (explode(' ', trim($str)) as $val) {
  74. $out .= $val . ' ';
  75. if (strlen($out) >= $n) {
  76. $out = trim($out);
  77. return (strlen($out) == strlen($str)) ? $out : $out . $end_char;
  78. }
  79. }
  80. }
  81. // ------------------------------------------------------------------------
  82. /**
  83. * High ASCII to Entities
  84. *
  85. * Converts High ascii text and MS Word special characters to character entities
  86. *
  87. * @access public
  88. * @param string
  89. * @return string
  90. */
  91. public static function ascii_to_entities($str)
  92. {
  93. $count = 1;
  94. $out = '';
  95. $temp = array();
  96. for ($i = 0, $s = strlen($str); $i < $s; $i++) {
  97. $ordinal = ord($str[$i]);
  98. if ($ordinal < 128) {
  99. /*
  100. If the $temp array has a value but we have moved on, then it seems only
  101. fair that we output that entity and restart $temp before continuing. -Paul
  102. */
  103. if (count($temp) == 1) {
  104. $out .= '&#' . array_shift($temp) . ';';
  105. $count = 1;
  106. }
  107. $out .= $str[$i];
  108. } else {
  109. if (count($temp) == 0) {
  110. $count = ($ordinal < 224) ? 2 : 3;
  111. }
  112. $temp[] = $ordinal;
  113. if (count($temp) == $count) {
  114. $number = ($count == 3) ? (($temp['0'] % 16) * 4096) + (($temp['1'] % 64) * 64) + ($temp['2'] % 64) : (($temp['0'] % 32) * 64) + ($temp['1'] % 64);
  115. $out .= '&#' . $number . ';';
  116. $count = 1;
  117. $temp = array();
  118. }
  119. }
  120. }
  121. return $out;
  122. }
  123. // ------------------------------------------------------------------------
  124. /**
  125. * Entities to ASCII
  126. *
  127. * Converts character entities back to ASCII
  128. *
  129. * @access public
  130. * @param string
  131. * @param bool
  132. * @return string
  133. */
  134. public static function entities_to_ascii($str, $all = true)
  135. {
  136. if (preg_match_all('/\&#(\d+)\;/', $str, $matches)) {
  137. for ($i = 0, $s = count($matches['0']); $i < $s; $i++) {
  138. $digits = $matches['1'][$i];
  139. $out = '';
  140. if ($digits < 128) {
  141. $out .= chr($digits);
  142. } elseif ($digits < 2048) {
  143. $out .= chr(192 + (($digits - ($digits % 64)) / 64));
  144. $out .= chr(128 + ($digits % 64));
  145. } else {
  146. $out .= chr(224 + (($digits - ($digits % 4096)) / 4096));
  147. $out .= chr(128 + ((($digits % 4096) - ($digits % 64)) / 64));
  148. $out .= chr(128 + ($digits % 64));
  149. }
  150. $str = str_replace($matches['0'][$i], $out, $str);
  151. }
  152. }
  153. if ($all) {
  154. $str = str_replace(
  155. array("&amp;", "&lt;", "&gt;", "&quot;", "&apos;", "&#45;"),
  156. array("&", "<", ">", "\"", "'", "-"),
  157. $str
  158. );
  159. }
  160. return $str;
  161. }
  162. // ------------------------------------------------------------------------
  163. /**
  164. * Word Censoring Function
  165. *
  166. * Supply a string and an array of disallowed words and any
  167. * matched words will be converted to #### or to the replacement
  168. * word you've submitted.
  169. *
  170. * @access public
  171. * @param string the text string
  172. * @param string the array of censoered words
  173. * @param string the optional replacement value
  174. * @return string
  175. */
  176. public static function word_censor($str, $censored, $replacement = '')
  177. {
  178. if (!is_array($censored)) {
  179. return $str;
  180. }
  181. $str = ' ' . $str . ' ';
  182. // \w, \b and a few others do not match on a unicode character
  183. // set for performance reasons. As a result words like über
  184. // will not match on a word boundary. Instead, we'll assume that
  185. // a bad word will be bookeneded by any of these characters.
  186. $delim = '[-_\'\"`(){}<>\[\]|!?@#%&,.:;^~*+=\/ 0-9\n\r\t]';
  187. foreach ($censored as $badword) {
  188. if ($replacement != '') {
  189. $str = preg_replace(
  190. "/({$delim})(" . str_replace('\*', '\w*?', preg_quote($badword, '/')) . ")({$delim})/i",
  191. "\\1{$replacement}\\3",
  192. $str
  193. );
  194. } else {
  195. $str = preg_replace(
  196. "/({$delim})(" . str_replace('\*', '\w*?', preg_quote($badword, '/')) . ")({$delim})/ie",
  197. "'\\1'.str_repeat('#', strlen('\\2')).'\\3'",
  198. $str
  199. );
  200. }
  201. }
  202. return trim($str);
  203. }
  204. // ------------------------------------------------------------------------
  205. /**
  206. * Code Highlighter
  207. *
  208. * Colorizes code strings
  209. *
  210. * @access public
  211. * @param string the text string
  212. * @return string
  213. */
  214. public static function highlight_code($str)
  215. {
  216. // The highlight string public static function encodes and highlights
  217. // brackets so we need them to start raw
  218. $str = str_replace(array('&lt;', '&gt;'), array('<', '>'), $str);
  219. // Replace any existing PHP tags to temporary markers so they don't accidentally
  220. // break the string out of PHP, and thus, thwart the highlighting.
  221. $str = str_replace(
  222. array('<?', '?>', '<%', '%>', '\\', '</script>'),
  223. array('phptagopen', 'phptagclose', 'asptagopen', 'asptagclose', 'backslashtmp', 'scriptclose'),
  224. $str
  225. );
  226. // The highlight_string public static function requires that the text be surrounded
  227. // by PHP tags, which we will remove later
  228. $str = '<?php ' . $str . ' ?>'; // <?
  229. // All the magic happens here, baby!
  230. $str = highlight_string($str, true);
  231. // Prior to PHP 5, the highligh public static function used icky <font> tags
  232. // so we'll replace them with <span> tags.
  233. if (abs(PHP_VERSION) < 5) {
  234. $str = str_replace(array('<font ', '</font>'), array('<span ', '</span>'), $str);
  235. $str = preg_replace('#color="(.*?)"#', 'style="color: \\1"', $str);
  236. }
  237. // Remove our artificially added PHP, and the syntax highlighting that came with it
  238. $str = preg_replace(
  239. '/<span style="color: #([A-Z0-9]+)">&lt;\?php(&nbsp;| )/i',
  240. '<span style="color: #$1">',
  241. $str
  242. );
  243. $str = preg_replace(
  244. '/(<span style="color: #[A-Z0-9]+">.*?)\?&gt;<\/span>\n<\/span>\n<\/code>/is',
  245. "$1</span>\n</span>\n</code>",
  246. $str
  247. );
  248. $str = preg_replace('/<span style="color: #[A-Z0-9]+"\><\/span>/i', '', $str);
  249. // Replace our markers back to PHP tags.
  250. $str = str_replace(
  251. array('phptagopen', 'phptagclose', 'asptagopen', 'asptagclose', 'backslashtmp', 'scriptclose'),
  252. array('&lt;?', '?&gt;', '&lt;%', '%&gt;', '\\', '&lt;/script&gt;'),
  253. $str
  254. );
  255. return $str;
  256. }
  257. // ------------------------------------------------------------------------
  258. /**
  259. * Phrase Highlighter
  260. *
  261. * Highlights a phrase within a text string
  262. *
  263. * @access public
  264. * @param string the text string
  265. * @param string the phrase you'd like to highlight
  266. * @param string the openging tag to precede the phrase with
  267. * @param string the closing tag to end the phrase with
  268. * @return string
  269. */
  270. public static function highlight_phrase($str, $phrase, $tag_open = '<strong>', $tag_close = '</strong>')
  271. {
  272. if ($str == '') {
  273. return '';
  274. }
  275. if ($phrase != '') {
  276. return preg_replace('/(' . preg_quote($phrase, '/') . ')/i', $tag_open . "\\1" . $tag_close, $str);
  277. }
  278. return $str;
  279. }
  280. // ------------------------------------------------------------------------
  281. /**
  282. * Word Wrap
  283. *
  284. * Wraps text at the specified character. Maintains the integrity of words.
  285. * Anything placed between {unwrap}{/unwrap} will not be word wrapped, nor
  286. * will URLs.
  287. *
  288. * @access public
  289. * @param string the text string
  290. * @param integer the number of characters to wrap at
  291. * @return string
  292. */
  293. public static function word_wrap($str, $charlim = '76')
  294. {
  295. // Se the character limit
  296. if (!is_numeric($charlim)) {
  297. $charlim = 76;
  298. }
  299. // Reduce multiple spaces
  300. $str = preg_replace("| +|", " ", $str);
  301. // Standardize newlines
  302. if (strpos($str, "\r") !== false) {
  303. $str = str_replace(array("\r\n", "\r"), "\n", $str);
  304. }
  305. // If the current word is surrounded by {unwrap} tags we'll
  306. // strip the entire chunk and replace it with a marker.
  307. $unwrap = array();
  308. if (preg_match_all("|(\{unwrap\}.+?\{/unwrap\})|s", $str, $matches)) {
  309. for ($i = 0; $i < count($matches['0']); $i++) {
  310. $unwrap[] = $matches['1'][$i];
  311. $str = str_replace($matches['1'][$i], "{{unwrapped" . $i . "}}", $str);
  312. }
  313. }
  314. // Use PHP's native public static function to do the initial wordwrap.
  315. // We set the cut flag to FALSE so that any individual words that are
  316. // too long get left alone. In the next step we'll deal with them.
  317. $str = wordwrap($str, $charlim, "\n", false);
  318. // Split the string into individual lines of text and cycle through them
  319. $output = "";
  320. foreach (explode("\n", $str) as $line) {
  321. // Is the line within the allowed character count?
  322. // If so we'll join it to the output and continue
  323. if (strlen($line) <= $charlim) {
  324. $output .= $line . "\n";
  325. continue;
  326. }
  327. $temp = '';
  328. while ((strlen($line)) > $charlim) {
  329. // If the over-length word is a URL we won't wrap it
  330. if (preg_match("!\[url.+\]|://|wwww.!", $line)) {
  331. break;
  332. }
  333. // Trim the word down
  334. $temp .= substr($line, 0, $charlim - 1);
  335. $line = substr($line, $charlim - 1);
  336. }
  337. // If $temp contains data it means we had to split up an over-length
  338. // word into smaller chunks so we'll add it back to our current line
  339. if ($temp != '') {
  340. $output .= $temp . "\n" . $line;
  341. } else {
  342. $output .= $line;
  343. }
  344. $output .= "\n";
  345. }
  346. // Put our markers back
  347. if (count($unwrap) > 0) {
  348. foreach ($unwrap as $key => $val) {
  349. $output = str_replace("{{unwrapped" . $key . "}}", $val, $output);
  350. }
  351. }
  352. // Remove the unwrap tags
  353. $output = str_replace(array('{unwrap}', '{/unwrap}'), '', $output);
  354. return $output;
  355. }
  356. // ------------------------------------------------------------------------
  357. /**
  358. * Ellipsize String
  359. *
  360. * This public static function will strip tags from a string, split it at its max_length and ellipsize
  361. *
  362. * @param string string to ellipsize
  363. * @param integer max length of string
  364. * @param mixed int (1|0) or float, .5, .2, etc for position to split
  365. * @param string ellipsis ; Default '...'
  366. * @return string ellipsized string
  367. */
  368. public static function ellipsize($str, $max_length, $position = 1, $ellipsis = '&hellip;')
  369. {
  370. // Strip tags
  371. $str = trim(strip_tags($str));
  372. // Is the string long enough to ellipsize?
  373. if (strlen($str) <= $max_length) {
  374. return $str;
  375. }
  376. $beg = substr($str, 0, floor($max_length * $position));
  377. $position = ($position > 1) ? 1 : $position;
  378. if ($position === 1) {
  379. $end = substr($str, 0, -($max_length - strlen($beg)));
  380. } else {
  381. $end = substr($str, -($max_length - strlen($beg)));
  382. }
  383. return $beg . $ellipsis . $end;
  384. }
  385. /**
  386. * Convert foreign character into ascii relevant
  387. */
  388. public static function utf2ascii($str, $replacespace = false, $by = '')
  389. {
  390. $chars = array(
  391. 'a' => array('ấ', 'ầ', 'ẩ', 'ẫ', 'ậ', 'ắ', 'ằ', 'ẳ', 'ẵ', 'ặ', 'á', 'à', 'ả', 'ã', 'ạ', 'â', 'ă'),
  392. 'A' => array('Ấ', 'Ầ', 'Ẩ', 'Ẫ', 'Ậ', 'Ắ', 'Ằ', 'Ẳ', 'Ẵ', 'Ặ', 'Á', 'À', 'Ả', 'Ã', 'Ạ', 'Â', 'Ă'),
  393. 'e' => array('ế', 'ề', 'ể', 'ễ', 'ệ', 'é', 'è', 'ẻ', 'ẽ', 'ẹ', 'ê'),
  394. 'E' => array('Ế', 'Ề', 'Ể', 'Ễ', 'Ệ', 'É', 'È', 'Ẻ', 'Ẽ', 'Ẹ', 'Ê'),
  395. 'i' => array('í', 'ì', 'ỉ', 'ĩ', 'ị'),
  396. 'I' => array('Í', 'Ì', 'Ỉ', 'Ĩ', 'Ị'),
  397. 'o' => array('ố', 'ồ', 'ổ', 'ỗ', 'ộ', 'ớ', 'ờ', 'ở', 'ỡ', 'ợ', 'ó', 'ò', 'ỏ', 'õ', 'ọ', 'ô', 'ơ'),
  398. 'O' => array('Ố', 'Ồ', 'Ổ', 'Ô', 'Ộ', 'Ớ', 'Ờ', 'Ở', 'Ỡ', 'Ợ', 'Ó', 'Ò', 'Ỏ', 'Õ', 'Ọ', 'Ô', 'Ơ'),
  399. 'u' => array('ứ', 'ừ', 'ử', 'ữ', 'ự', 'ú', 'ù', 'ủ', 'ũ', 'ụ', 'ư'),
  400. 'U' => array('Ứ', 'Ừ', 'Ử', 'Ữ', 'Ự', 'Ú', 'Ù', 'Ủ', 'Ũ', 'Ụ', 'Ư'),
  401. 'y' => array('ý', 'ỳ', 'ỷ', 'ỹ', 'ỵ'),
  402. 'Y' => array('Ý', 'Ỳ', 'Ỷ', 'Ỹ', 'Ỵ'),
  403. 'd' => array('Ä‘'),
  404. 'D' => array('Đ')
  405. );
  406. foreach ($chars as $key => $arr) {
  407. foreach ($arr as $val) {
  408. $str = str_replace($val, $key, $str);
  409. }
  410. }
  411. if ($replacespace) {
  412. $str = str_replace(' ', $by, $str);
  413. }
  414. return $str;
  415. }
  416. }
  417. /* End of file text_helper.php */
  418. /* Location: ./system/helpers/text_helper.php */