PageRenderTime 42ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/Arabic/Glyphs.php

https://bitbucket.org/swergroup/wp-uploadplus
PHP | 665 lines | 372 code | 87 blank | 206 comment | 87 complexity | d122bbc80ccc0beb34d657543bbfa676 MD5 | raw file
Possible License(s): LGPL-3.0, GPL-3.0
  1. <?php
  2. /**
  3. * ----------------------------------------------------------------------
  4. *
  5. * Copyright (c) 2006-2013 Khaled Al-Sham'aa.
  6. *
  7. * http://www.ar-php.org
  8. *
  9. * PHP Version 5
  10. *
  11. * ----------------------------------------------------------------------
  12. *
  13. * LICENSE
  14. *
  15. * This program is open source product; you can redistribute it and/or
  16. * modify it under the terms of the GNU Lesser General Public License (LGPL)
  17. * as published by the Free Software Foundation; either version 3
  18. * of the License, or (at your option) any later version.
  19. *
  20. * This program is distributed in the hope that it will be useful,
  21. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  22. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  23. * GNU Lesser General Public License for more details.
  24. *
  25. * You should have received a copy of the GNU Lesser General Public License
  26. * along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
  27. *
  28. * ----------------------------------------------------------------------
  29. *
  30. * Class Name: Arabic Glyphs is a simple class to render Arabic text
  31. *
  32. * Filename: Glyphs.php
  33. *
  34. * Original Author(s): Khaled Al-Sham'aa <khaled@ar-php.org>
  35. *
  36. * Purpose: This class takes Arabic text (encoded in Windows-1256 character
  37. * set) as input and performs Arabic glyph joining on it and outputs
  38. * a UTF-8 hexadecimals stream that is no longer logically arranged
  39. * but in a visual order which gives readable results when formatted
  40. * with a simple Unicode rendering just like GD and UFPDF libraries
  41. * that does not handle basic connecting glyphs of Arabic language
  42. * yet but simply outputs all stand alone glyphs in left-to-right
  43. * order.
  44. *
  45. * ----------------------------------------------------------------------
  46. *
  47. * Arabic Glyphs is class to render Arabic text
  48. *
  49. * PHP class to render Arabic text by performs Arabic glyph joining on it,
  50. * then output a UTF-8 hexadecimals stream gives readable results on PHP
  51. * libraries supports UTF-8.
  52. *
  53. * Example:
  54. * <code>
  55. * include('./I18N/Arabic.php');
  56. * $obj = new I18N_Arabic('Glyphs');
  57. *
  58. * $text = $obj->utf8Glyphs($text);
  59. *
  60. * imagettftext($im, 20, 0, 200, 100, $black, $font, $text);
  61. * </code>
  62. *
  63. * @category I18N
  64. * @package I18N_Arabic
  65. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  66. * @copyright 2006-2013 Khaled Al-Sham'aa
  67. *
  68. * @license LGPL <http://www.gnu.org/licenses/lgpl.txt>
  69. * @link http://www.ar-php.org
  70. */
  71. // New in PHP V5.3: Namespaces
  72. // namespace I18N\Arabic;
  73. //
  74. // $obj = new I18N\Arabic\Glyphs();
  75. //
  76. // use I18N\Arabic;
  77. // $obj = new Arabic\Glyphs();
  78. //
  79. // use I18N\Arabic\Glyphs as Glyphs;
  80. // $obj = new Glyphs();
  81. /**
  82. * This PHP class render Arabic text by performs Arabic glyph joining on it
  83. *
  84. * @category I18N
  85. * @package I18N_Arabic
  86. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  87. * @copyright 2006-2013 Khaled Al-Sham'aa
  88. *
  89. * @license LGPL <http://www.gnu.org/licenses/lgpl.txt>
  90. * @link http://www.ar-php.org
  91. */
  92. class I18N_Arabic_Glyphs
  93. {
  94. private $_glyphs = null;
  95. private $_hex = null;
  96. private $_prevLink = null;
  97. private $_nextLink = null;
  98. private $_vowel = null;
  99. /**
  100. * Loads initialize values
  101. *
  102. * @ignore
  103. */
  104. public function __construct()
  105. {
  106. $this->_prevLink = '???????????????????????????';
  107. $this->_nextLink = '????????????????????????????????????';
  108. $this->_vowel = '????????';
  109. /*
  110. $this->_glyphs['?'] = array('FE70','FE71');
  111. $this->_glyphs['?'] = array('FE72','FE72');
  112. $this->_glyphs['?'] = array('FE74','FE74');
  113. $this->_glyphs['?'] = array('FE76','FE77');
  114. $this->_glyphs['?'] = array('FE78','FE79');
  115. $this->_glyphs['?'] = array('FE7A','FE7B');
  116. $this->_glyphs['?'] = array('FE7C','FE7D');
  117. $this->_glyphs['?'] = array('FE7E','FE7E');
  118. */
  119. $this->_glyphs = '?????????';
  120. $this->_hex = '064B064B064B064B064C064C064C064C064D064D064D064D064E064E';
  121. $this->_hex .= '064E064E064F064F064F064F06500650065006500651065106510651';
  122. $this->_hex .= '06520652065206520670067006700670';
  123. $this->_glyphs .= '????????';
  124. $this->_hex .= 'FE80FE80FE80FE80FE81FE82FE81FE82FE83FE84FE83FE84FE85FE86';
  125. $this->_hex .= 'FE85FE86FE87FE88FE87FE88FE89FE8AFE8BFE8CFE8DFE8EFE8DFE8E';
  126. $this->_hex .= 'FE8FFE90FE91FE92';
  127. $this->_glyphs .= '????????';
  128. $this->_hex .= 'FE93FE94FE93FE94FE95FE96FE97FE98FE99FE9AFE9BFE9CFE9DFE9E';
  129. $this->_hex .= 'FE9FFEA0FEA1FEA2FEA3FEA4FEA5FEA6FEA7FEA8FEA9FEAAFEA9FEAA';
  130. $this->_hex .= 'FEABFEACFEABFEAC';
  131. $this->_glyphs .= '????????';
  132. $this->_hex .= 'FEADFEAEFEADFEAEFEAFFEB0FEAFFEB0FEB1FEB2FEB3FEB4FEB5FEB6';
  133. $this->_hex .= 'FEB7FEB8FEB9FEBAFEBBFEBCFEBDFEBEFEBFFEC0FEC1FEC2FEC3FEC4';
  134. $this->_hex .= 'FEC5FEC6FEC7FEC8';
  135. $this->_glyphs .= '????????';
  136. $this->_hex .= 'FEC9FECAFECBFECCFECDFECEFECFFED0FED1FED2FED3FED4FED5FED6';
  137. $this->_hex .= 'FED7FED8FED9FEDAFEDBFEDCFEDDFEDEFEDFFEE0FEE1FEE2FEE3FEE4';
  138. $this->_hex .= 'FEE5FEE6FEE7FEE8';
  139. $this->_glyphs .= '????????';
  140. $this->_hex .= 'FEE9FEEAFEEBFEECFEEDFEEEFEEDFEEEFEEFFEF0FEEFFEF0FEF1FEF2';
  141. $this->_hex .= 'FEF3FEF40640064006400640060C060C060C060C061F061F061F061F';
  142. $this->_hex .= '061B061B061B061B';
  143. // Support the extra 4 Persian letters (p), (ch), (zh) and (g)
  144. // This needs value in getGlyphs function to be 52 instead of 48
  145. // $this->_glyphs .= chr(129).chr(141).chr(142).chr(144);
  146. // $this->_hex .= 'FB56FB57FB58FB59FB7AFB7BFB7CFB7DFB8AFB8BFB8AFB8BFB92';
  147. // $this->_hex .= 'FB93FB94FB95';
  148. //
  149. // $this->_prevLink .= chr(129).chr(141).chr(142).chr(144);
  150. // $this->_nextLink .= chr(129).chr(141).chr(142).chr(144);
  151. //
  152. // Example: $text = '????? ???: ???? ????';
  153. // Email Yossi Beck <yosbeck@gmail.com> ask him to save that example
  154. // string using ANSI encoding in Notepad
  155. $this->_glyphs .= '';
  156. $this->_hex .= '';
  157. $this->_glyphs .= '????????';
  158. $this->_hex .= 'FEF5FEF6FEF5FEF6FEF7FEF8FEF7FEF8FEF9FEFAFEF9FEFAFEFBFEFC';
  159. $this->_hex .= 'FEFBFEFC';
  160. }
  161. /**
  162. * Get glyphs
  163. *
  164. * @param string $char Char
  165. * @param integer $type Type
  166. *
  167. * @return string
  168. */
  169. protected function getGlyphs($char, $type)
  170. {
  171. $pos = mb_strpos($this->_glyphs, $char);
  172. if ($pos > 49) {
  173. $pos = ($pos-49)/2 + 49;
  174. }
  175. $pos = $pos*16 + $type*4;
  176. return substr($this->_hex, $pos, 4);
  177. }
  178. /**
  179. * Convert Arabic Windows-1256 charset string into glyph joining in UTF-8
  180. * hexadecimals stream
  181. *
  182. * @param string $str Arabic string in Windows-1256 charset
  183. *
  184. * @return string Arabic glyph joining in UTF-8 hexadecimals stream
  185. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  186. */
  187. protected function preConvert($str)
  188. {
  189. $crntChar = null;
  190. $prevChar = null;
  191. $nextChar = null;
  192. $output = '';
  193. $_temp = mb_strlen($str);
  194. for ($i = 0; $i < $_temp; $i++) {
  195. $chars[] = mb_substr($str, $i, 1);
  196. }
  197. $max = count($chars);
  198. for ($i = $max - 1; $i >= 0; $i--) {
  199. $crntChar = $chars[$i];
  200. $prevChar = ' ';
  201. if ($i > 0) {
  202. $prevChar = $chars[$i - 1];
  203. }
  204. if ($prevChar && mb_strpos($this->_vowel, $prevChar) !== false) {
  205. $prevChar = $chars[$i - 2];
  206. if ($prevChar && mb_strpos($this->_vowel, $prevChar) !== false) {
  207. $prevChar = $chars[$i - 3];
  208. }
  209. }
  210. $Reversed = false;
  211. $flip_arr = ')]>}';
  212. $ReversedChr = '([<{';
  213. if ($crntChar && mb_strpos($flip_arr, $crntChar) !== false) {
  214. $crntChar = $ReversedChr[mb_strpos($flip_arr, $crntChar)];
  215. $Reversed = true;
  216. } else {
  217. $Reversed = false;
  218. }
  219. if ($crntChar && !$Reversed
  220. && (mb_strpos($ReversedChr, $crntChar) !== false)
  221. ) {
  222. $crntChar = $flip_arr[mb_strpos($ReversedChr, $crntChar)];
  223. }
  224. if (ord($crntChar) < 128) {
  225. $output .= $crntChar;
  226. $nextChar = $crntChar;
  227. continue;
  228. }
  229. if ($crntChar == '?' && isset($chars[$i + 1])
  230. && (mb_strpos('????', $chars[$i + 1]) !== false)
  231. ) {
  232. continue;
  233. }
  234. if ($crntChar && mb_strpos($this->_vowel, $crntChar) !== false) {
  235. if ((mb_strpos($this->_nextLink, $chars[$i + 1]) !== false)
  236. && (mb_strpos($this->_prevLink, $prevChar) !== false)
  237. ) {
  238. $output .= '&#x' . $this->getGlyphs($crntChar, 1) . ';';
  239. } else {
  240. $output .= '&#x' . $this->getGlyphs($crntChar, 0) . ';';
  241. }
  242. continue;
  243. }
  244. $form = 0;
  245. if (($prevChar == '??' || $prevChar == '??' || $prevChar == '??'
  246. || $prevChar == '??' || $prevChar == '?')
  247. && (mb_strpos('????', $crntChar) !== false)
  248. ) {
  249. if (mb_strpos($this->_prevLink, $chars[$i - 2]) !== false) {
  250. $form++;
  251. }
  252. if (mb_strpos($this->_vowel, $chars[$i - 1])) {
  253. $output .= '&#x';
  254. $output .= $this->getGlyphs($crntChar, $form).';';
  255. } else {
  256. $output .= '&#x';
  257. $output .= $this->getGlyphs($prevChar.$crntChar, $form).';';
  258. }
  259. $nextChar = $prevChar;
  260. continue;
  261. }
  262. if ($prevChar && mb_strpos($this->_prevLink, $prevChar) !== false) {
  263. $form++;
  264. }
  265. if ($nextChar && mb_strpos($this->_nextLink, $nextChar) !== false) {
  266. $form += 2;
  267. }
  268. $output .= '&#x' . $this->getGlyphs($crntChar, $form) . ';';
  269. $nextChar = $crntChar;
  270. }
  271. // from Arabic Presentation Forms-B, Range: FE70-FEFF,
  272. // file "UFE70.pdf" (in reversed order)
  273. // into Arabic Presentation Forms-A, Range: FB50-FDFF, file "UFB50.pdf"
  274. // Example: $output = str_replace('&#xFEA0;&#xFEDF;', '&#xFCC9;', $output);
  275. // Lam Jeem
  276. $output = $this->decodeEntities($output, $exclude = array('&'));
  277. return $output;
  278. }
  279. /**
  280. * Regression analysis calculate roughly the max number of character fit in
  281. * one A4 page line for a given font size.
  282. *
  283. * @param integer $font Font size
  284. *
  285. * @return integer Maximum number of characters per line
  286. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  287. */
  288. public function a4MaxChars($font)
  289. {
  290. $x = 381.6 - 31.57 * $font + 1.182 * pow($font, 2) - 0.02052 *
  291. pow($font, 3) + 0.0001342 * pow($font, 4);
  292. return floor($x - 2);
  293. }
  294. /**
  295. * Calculate the lines number of given Arabic text and font size that will
  296. * fit in A4 page size
  297. *
  298. * @param string $str Arabic string you would like to split it into lines
  299. * @param integer $font Font size
  300. *
  301. * @return integer Number of lines for a given Arabic string in A4 page size
  302. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  303. */
  304. public function a4Lines($str, $font)
  305. {
  306. $str = str_replace(array("\r\n", "\n", "\r"), "\n", $str);
  307. $lines = 0;
  308. $chars = 0;
  309. $words = explode(' ', $str);
  310. $w_count = count($words);
  311. $max_chars = $this->a4MaxChars($font);
  312. for ($i = 0; $i < $w_count; $i++) {
  313. $w_len = mb_strlen($words[$i]) + 1;
  314. if ($chars + $w_len < $max_chars) {
  315. if (mb_strpos($words[$i], "\n") !== false) {
  316. $words_nl = explode("\n", $words[$i]);
  317. $nl_num = count($words_nl) - 1;
  318. for ($j = 1; $j < $nl_num; $j++) {
  319. $lines++;
  320. }
  321. $chars = mb_strlen($words_nl[$nl_num]) + 1;
  322. } else {
  323. $chars += $w_len;
  324. }
  325. } else {
  326. $lines++;
  327. $chars = $w_len;
  328. }
  329. }
  330. $lines++;
  331. return $lines;
  332. }
  333. /**
  334. * Convert Arabic Windows-1256 charset string into glyph joining in UTF-8
  335. * hexadecimals stream (take care of whole the document including English
  336. * sections as well as numbers and arcs etc...)
  337. *
  338. * @param string $str Arabic string in Windows-1256 charset
  339. * @param integer $max_chars Max number of chars you can fit in one line
  340. * @param boolean $hindo If true use Hindo digits else use Arabic digits
  341. *
  342. * @return string Arabic glyph joining in UTF-8 hexadecimals stream (take
  343. * care of whole document including English sections as well
  344. * as numbers and arcs etc...)
  345. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  346. */
  347. public function utf8Glyphs($str, $max_chars = 50, $hindo = true)
  348. {
  349. $str = str_replace(array("\r\n", "\n", "\r"), " \n ", $str);
  350. $str = str_replace("\t", " ", $str);
  351. $lines = array();
  352. $words = explode(' ', $str);
  353. $w_count = count($words);
  354. $c_chars = 0;
  355. $c_words = array();
  356. $english = array();
  357. $en_index = -1;
  358. $en_words = array();
  359. $en_stack = array();
  360. for ($i = 0; $i < $w_count; $i++) {
  361. $pattern = '/^(\n?)';
  362. $pattern .= '[a-z\d\\/\@\#\$\%\^\&\*\(\)\_\~\"\'\[\]\{\}\;\,\|\-\.\:!]*';
  363. $pattern .= '([\.\:\+\=\-\!??]?)$/i';
  364. if (preg_match($pattern, $words[$i], $matches)) {
  365. if ($matches[1]) {
  366. $words[$i] = mb_substr($words[$i], 1).$matches[1];
  367. }
  368. if ($matches[2]) {
  369. $words[$i] = $matches[2].mb_substr($words[$i], 0, -1);
  370. }
  371. $words[$i] = strrev($words[$i]);
  372. array_push($english, $words[$i]);
  373. if ($en_index == -1) {
  374. $en_index = $i;
  375. }
  376. $en_words[] = true;
  377. } elseif ($en_index != -1) {
  378. $en_count = count($english);
  379. for ($j = 0; $j < $en_count; $j++) {
  380. $words[$en_index + $j] = $english[$en_count - 1 - $j];
  381. }
  382. $en_index = -1;
  383. $english = array();
  384. $en_words[] = false;
  385. } else {
  386. $en_words[] = false;
  387. }
  388. }
  389. if ($en_index != -1) {
  390. $en_count = count($english);
  391. for ($j = 0; $j < $en_count; $j++) {
  392. $words[$en_index + $j] = $english[$en_count - 1 - $j];
  393. }
  394. }
  395. // need more work to fix lines starts by English words
  396. if (isset($en_start)) {
  397. $last = true;
  398. $from = 0;
  399. foreach ($en_words as $key => $value) {
  400. if ($last !== $value) {
  401. $to = $key - 1;
  402. array_push($en_stack, array($from, $to));
  403. $from = $key;
  404. }
  405. $last = $value;
  406. }
  407. array_push($en_stack, array($from, $key));
  408. $new_words = array();
  409. while (list($from, $to) = array_pop($en_stack)) {
  410. for ($i = $from; $i <= $to; $i++) {
  411. $new_words[] = $words[$i];
  412. }
  413. }
  414. $words = $new_words;
  415. }
  416. for ($i = 0; $i < $w_count; $i++) {
  417. $w_len = mb_strlen($words[$i]) + 1;
  418. if ($c_chars + $w_len < $max_chars) {
  419. if (mb_strpos($words[$i], "\n") !== false) {
  420. $words_nl = explode("\n", $words[$i]);
  421. array_push($c_words, $words_nl[0]);
  422. array_push($lines, implode(' ', $c_words));
  423. $nl_num = count($words_nl) - 1;
  424. for ($j = 1; $j < $nl_num; $j++) {
  425. array_push($lines, $words_nl[$j]);
  426. }
  427. $c_words = array($words_nl[$nl_num]);
  428. $c_chars = mb_strlen($words_nl[$nl_num]) + 1;
  429. } else {
  430. array_push($c_words, $words[$i]);
  431. $c_chars += $w_len;
  432. }
  433. } else {
  434. array_push($lines, implode(' ', $c_words));
  435. $c_words = array($words[$i]);
  436. $c_chars = $w_len;
  437. }
  438. }
  439. array_push($lines, implode(' ', $c_words));
  440. $maxLine = count($lines);
  441. $output = '';
  442. for ($j = $maxLine - 1; $j >= 0; $j--) {
  443. $output .= $lines[$j] . "\n";
  444. }
  445. $output = rtrim($output);
  446. $output = $this->preConvert($output);
  447. if ($hindo) {
  448. $nums = array(
  449. '0', '1', '2', '3', '4',
  450. '5', '6', '7', '8', '9'
  451. );
  452. $arNums = array(
  453. '?', '?', '?', '?', '?',
  454. '?', '?', '?', '?', '?'
  455. );
  456. foreach ($nums as $k => $v) {
  457. $p_nums[$k] = '/'.$v.'/ui';
  458. }
  459. $output = preg_replace($p_nums, $arNums, $output);
  460. foreach ($arNums as $k => $v) {
  461. $p_arNums[$k] = '/([a-z-\d]+)'.$v.'/ui';
  462. }
  463. foreach ($nums as $k => $v) {
  464. $r_nums[$k] = '${1}'.$v;
  465. }
  466. $output = preg_replace($p_arNums, $r_nums, $output);
  467. foreach ($arNums as $k => $v) {
  468. $p_arNums[$k] = '/'.$v.'([a-z-\d]+)/ui';
  469. }
  470. foreach ($nums as $k => $v) {
  471. $r_nums[$k] = $v.'${1}';
  472. }
  473. $output = preg_replace($p_arNums, $r_nums, $output);
  474. }
  475. return $output;
  476. }
  477. /**
  478. * Decode all HTML entities (including numerical ones) to regular UTF-8 bytes.
  479. * Double-escaped entities will only be decoded once
  480. * ("&amp;lt;" becomes "&lt;", not "<").
  481. *
  482. * @param string $text The text to decode entities in.
  483. * @param array $exclude An array of characters which should not be decoded.
  484. * For example, array('<', '&', '"'). This affects
  485. * both named and numerical entities.
  486. *
  487. * @return string
  488. */
  489. protected function decodeEntities($text, $exclude = array())
  490. {
  491. static $table;
  492. // We store named entities in a table for quick processing.
  493. if (!isset($table)) {
  494. // Get all named HTML entities.
  495. $table = array_flip(get_html_translation_table(HTML_ENTITIES));
  496. // PHP gives us ISO-8859-1 data, we need UTF-8.
  497. $table = array_map('utf8_encode', $table);
  498. // Add apostrophe (XML)
  499. $table['&apos;'] = "'";
  500. }
  501. $newtable = array_diff($table, $exclude);
  502. // Use a regexp to select all entities in one pass, to avoid decoding
  503. // double-escaped entities twice.
  504. //return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e',
  505. // '$this->decodeEntities2("$1", "$2", "$0", $newtable,
  506. // $exclude)', $text);
  507. $pieces = explode('&', $text);
  508. $text = array_shift($pieces);
  509. foreach ($pieces as $piece) {
  510. if ($piece[0] == '#') {
  511. if ($piece[1] == 'x') {
  512. $one = '#x';
  513. } else {
  514. $one = '#';
  515. }
  516. } else {
  517. $one = '';
  518. }
  519. $end = mb_strpos($piece, ';');
  520. $start = mb_strlen($one);
  521. $two = mb_substr($piece, $start, $end - $start);
  522. $zero = '&'.$one.$two.';';
  523. $text .= $this->decodeEntities2($one, $two, $zero, $newtable, $exclude).
  524. mb_substr($piece, $end+1);
  525. }
  526. return $text;
  527. }
  528. /**
  529. * Helper function for decodeEntities
  530. *
  531. * @param string $prefix Prefix
  532. * @param string $codepoint Codepoint
  533. * @param string $original Original
  534. * @param array &$table Store named entities in a table
  535. * @param array &$exclude An array of characters which should not be decoded
  536. *
  537. * @return string
  538. */
  539. protected function decodeEntities2(
  540. $prefix, $codepoint, $original, &$table, &$exclude
  541. ) {
  542. // Named entity
  543. if (!$prefix) {
  544. if (isset($table[$original])) {
  545. return $table[$original];
  546. } else {
  547. return $original;
  548. }
  549. }
  550. // Hexadecimal numerical entity
  551. if ($prefix == '#x') {
  552. $codepoint = base_convert($codepoint, 16, 10);
  553. }
  554. // Encode codepoint as UTF-8 bytes
  555. if ($codepoint < 0x80) {
  556. $str = chr($codepoint);
  557. } elseif ($codepoint < 0x800) {
  558. $str = chr(0xC0 | ($codepoint >> 6)) .
  559. chr(0x80 | ($codepoint & 0x3F));
  560. } elseif ($codepoint < 0x10000) {
  561. $str = chr(0xE0 | ($codepoint >> 12)) .
  562. chr(0x80 | (($codepoint >> 6) & 0x3F)) .
  563. chr(0x80 | ($codepoint & 0x3F));
  564. } elseif ($codepoint < 0x200000) {
  565. $str = chr(0xF0 | ($codepoint >> 18)) .
  566. chr(0x80 | (($codepoint >> 12) & 0x3F)) .
  567. chr(0x80 | (($codepoint >> 6) & 0x3F)) .
  568. chr(0x80 | ($codepoint & 0x3F));
  569. }
  570. // Check for excluded characters
  571. if (in_array($str, $exclude)) {
  572. return $original;
  573. } else {
  574. return $str;
  575. }
  576. }
  577. }