PageRenderTime 56ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/application/third_party/ar-php/Arabic/Glyphs.php

https://bitbucket.org/machaven/limesurvey
PHP | 635 lines | 341 code | 87 blank | 207 comment | 77 complexity | e950a3e731070b257a6a133395604f17 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, BSD-3-Clause, GPL-3.0, LGPL-3.0
  1. <?php
  2. /**
  3. * ----------------------------------------------------------------------
  4. *
  5. * Copyright (c) 2006-2012 Khaled Al-Sham'aa.
  6. *
  7. * http://www.ar-php.org
  8. *
  9. * PHP Version 5
  10. *
  11. * ----------------------------------------------------------------------
  12. *
  13. * LICENSE
  14. *
  15. * This program is open source product; you can redistribute it and/or
  16. * modify it under the terms of the GNU Lesser General Public License (LGPL)
  17. * as published by the Free Software Foundation; either version 3
  18. * of the License, or (at your option) any later version.
  19. *
  20. * This program is distributed in the hope that it will be useful,
  21. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  22. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  23. * GNU Lesser General Public License for more details.
  24. *
  25. * You should have received a copy of the GNU Lesser General Public License
  26. * along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
  27. *
  28. * ----------------------------------------------------------------------
  29. *
  30. * Class Name: Arabic Glyphs is a simple class to render Arabic text
  31. *
  32. * Filename: Glyphs.php
  33. *
  34. * Original Author(s): Khaled Al-Sham'aa <khaled@ar-php.org>
  35. *
  36. * Purpose: This class takes Arabic text (encoded in Windows-1256 character
  37. * set) as input and performs Arabic glyph joining on it and outputs
  38. * a UTF-8 hexadecimals stream that is no longer logically arranged
  39. * but in a visual order which gives readable results when formatted
  40. * with a simple Unicode rendering just like GD and UFPDF libraries
  41. * that does not handle basic connecting glyphs of Arabic language
  42. * yet but simply outputs all stand alone glyphs in left-to-right
  43. * order.
  44. *
  45. * ----------------------------------------------------------------------
  46. *
  47. * Arabic Glyphs is class to render Arabic text
  48. *
  49. * PHP class to render Arabic text by performs Arabic glyph joining on it,
  50. * then output a UTF-8 hexadecimals stream gives readable results on PHP
  51. * libraries supports UTF-8.
  52. *
  53. * Example:
  54. * <code>
  55. * include('./I18N/Arabic.php');
  56. * $obj = new I18N_Arabic('Glyphs');
  57. *
  58. * $text = $obj->utf8Glyphs($text);
  59. *
  60. * imagettftext($im, 20, 0, 200, 100, $black, $font, $text);
  61. * </code>
  62. *
  63. * @category I18N
  64. * @package I18N_Arabic
  65. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  66. * @copyright 2006-2012 Khaled Al-Sham'aa
  67. *
  68. * @license LGPL <http://www.gnu.org/licenses/lgpl.txt>
  69. * @link http://www.ar-php.org
  70. */
  71. // New in PHP V5.3: Namespaces
  72. // namespace I18N\Arabic;
  73. //
  74. // $obj = new I18N\Arabic\Glyphs();
  75. //
  76. // use I18N\Arabic;
  77. // $obj = new Arabic\Glyphs();
  78. //
  79. // use I18N\Arabic\Glyphs as Glyphs;
  80. // $obj = new Glyphs();
  81. /**
  82. * This PHP class render Arabic text by performs Arabic glyph joining on it
  83. *
  84. * @category I18N
  85. * @package I18N_Arabic
  86. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  87. * @copyright 2006-2012 Khaled Al-Sham'aa
  88. *
  89. * @license LGPL <http://www.gnu.org/licenses/lgpl.txt>
  90. * @link http://www.ar-php.org
  91. */
  92. class I18N_Arabic_Glyphs
  93. {
  94. private $_glyphs = null;
  95. private $_hex = null;
  96. private $_prevLink = null;
  97. private $_nextLink = null;
  98. private $_vowel = null;
  99. /**
  100. * Loads initialize values
  101. *
  102. * @ignore
  103. */
  104. public function __construct()
  105. {
  106. $this->_prevLink = '،؟؛ـئبتثجحخسشصضطظعغفقكلمنهي';
  107. $this->_nextLink = 'ـآأؤإائبةتثجحخدذرزسشصضطظعغفقكلمنهوىي';
  108. $this->_vowel = 'ًٌٍَُِّْ';
  109. /*
  110. $this->_glyphs['ً'] = array('FE70','FE71');
  111. $this->_glyphs['ٌ'] = array('FE72','FE72');
  112. $this->_glyphs['ٍ'] = array('FE74','FE74');
  113. $this->_glyphs['َ'] = array('FE76','FE77');
  114. $this->_glyphs['ُ'] = array('FE78','FE79');
  115. $this->_glyphs['ِ'] = array('FE7A','FE7B');
  116. $this->_glyphs['ّ'] = array('FE7C','FE7D');
  117. $this->_glyphs['ْ'] = array('FE7E','FE7E');
  118. */
  119. $this->_glyphs = 'ًٌٍَُِّْ';
  120. $this->_hex = '064B064B064B064B064C064C064C064C064D064D064D064D064E064E';
  121. $this->_hex .= '064E064E064F064F064F064F06500650065006500651065106510651';
  122. $this->_hex .= '0652065206520652';
  123. $this->_glyphs .= 'ءآأؤإئاب';
  124. $this->_hex .= 'FE80FE80FE80FE80FE81FE82FE81FE82FE83FE84FE83FE84FE85FE86';
  125. $this->_hex .= 'FE85FE86FE87FE88FE87FE88FE89FE8AFE8BFE8CFE8DFE8EFE8DFE8E';
  126. $this->_hex .= 'FE8FFE90FE91FE92';
  127. $this->_glyphs .= 'ةتثجحخدذ';
  128. $this->_hex .= 'FE93FE94FE93FE94FE95FE96FE97FE98FE99FE9AFE9BFE9CFE9DFE9E';
  129. $this->_hex .= 'FE9FFEA0FEA1FEA2FEA3FEA4FEA5FEA6FEA7FEA8FEA9FEAAFEA9FEAA';
  130. $this->_hex .= 'FEABFEACFEABFEAC';
  131. $this->_glyphs .= 'رزسشصضطظ';
  132. $this->_hex .= 'FEADFEAEFEADFEAEFEAFFEB0FEAFFEB0FEB1FEB2FEB3FEB4FEB5FEB6';
  133. $this->_hex .= 'FEB7FEB8FEB9FEBAFEBBFEBCFEBDFEBEFEBFFEC0FEC1FEC2FEC3FEC4';
  134. $this->_hex .= 'FEC5FEC6FEC7FEC8';
  135. $this->_glyphs .= 'عغفقكلمن';
  136. $this->_hex .= 'FEC9FECAFECBFECCFECDFECEFECFFED0FED1FED2FED3FED4FED5FED6';
  137. $this->_hex .= 'FED7FED8FED9FEDAFEDBFEDCFEDDFEDEFEDFFEE0FEE1FEE2FEE3FEE4';
  138. $this->_hex .= 'FEE5FEE6FEE7FEE8';
  139. $this->_glyphs .= 'هوىيـ،؟؛';
  140. $this->_hex .= 'FEE9FEEAFEEBFEECFEEDFEEEFEEDFEEEFEEFFEF0FEEFFEF0FEF1FEF2';
  141. $this->_hex .= 'FEF3FEF40640064006400640060C060C060C060C061F061F061F061F';
  142. $this->_hex .= '061B061B061B061B';
  143. // Support the extra 4 Persian letters (p), (ch), (zh) and (g)
  144. // This needs value in getGlyphs function to be 52 instead of 48
  145. // $this->_glyphs .= chr(129).chr(141).chr(142).chr(144);
  146. // $this->_hex .= 'FB56FB57FB58FB59FB7AFB7BFB7CFB7DFB8AFB8BFB8AFB8BFB92';
  147. // $this->_hex .= 'FB93FB94FB95';
  148. //
  149. // $this->_prevLink .= chr(129).chr(141).chr(142).chr(144);
  150. // $this->_nextLink .= chr(129).chr(141).chr(142).chr(144);
  151. //
  152. // Example: $text = 'نمونة قلم: لاگچ ژافپ';
  153. // Email Yossi Beck <yosbeck@gmail.com> ask him to save that example
  154. // string using ANSI encoding in Notepad
  155. $this->_glyphs .= 'لآلألإلا';
  156. $this->_hex .= 'FEF5FEF6FEF5FEF6FEF7FEF8FEF7FEF8FEF9FEFAFEF9FEFAFEFBFEFC';
  157. $this->_hex .= 'FEFBFEFC';
  158. }
  159. /**
  160. * Get glyphs
  161. *
  162. * @param string $char Char
  163. * @param integer $type Type
  164. *
  165. * @return string
  166. */
  167. protected function getGlyphs($char, $type)
  168. {
  169. $pos = mb_strpos($this->_glyphs, $char);
  170. if ($pos > 48) {
  171. $pos = ($pos-48)/2 + 48;
  172. }
  173. $pos = $pos*16 + $type*4;
  174. return substr($this->_hex, $pos, 4);
  175. }
  176. /**
  177. * Convert Arabic Windows-1256 charset string into glyph joining in UTF-8
  178. * hexadecimals stream
  179. *
  180. * @param string $str Arabic string in Windows-1256 charset
  181. *
  182. * @return string Arabic glyph joining in UTF-8 hexadecimals stream
  183. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  184. */
  185. protected function preConvert($str)
  186. {
  187. $crntChar = null;
  188. $prevChar = null;
  189. $nextChar = null;
  190. $output = '';
  191. $_temp = mb_strlen($str);
  192. for ($i = 0; $i < $_temp; $i++) {
  193. $chars[] = mb_substr($str, $i, 1);
  194. }
  195. $max = count($chars);
  196. for ($i = $max - 1; $i >= 0; $i--) {
  197. $crntChar = $chars[$i];
  198. if ($i > 0) {
  199. $prevChar = $chars[$i - 1];
  200. }
  201. if ($prevChar && mb_strpos($this->_vowel, $prevChar) !== false) {
  202. $prevChar = $chars[$i - 2];
  203. if ($prevChar && mb_strpos($this->_vowel, $prevChar) !== false) {
  204. $prevChar = $chars[$i - 3];
  205. }
  206. }
  207. $Reversed = false;
  208. $flip_arr = ')]>}';
  209. $ReversedChr = '([<{';
  210. if ($crntChar && mb_strpos($flip_arr, $crntChar) !== false) {
  211. $crntChar = $ReversedChr[mb_strpos($flip_arr, $crntChar)];
  212. $Reversed = true;
  213. } else {
  214. $Reversed = false;
  215. }
  216. if ($crntChar && !$Reversed &&
  217. (mb_strpos($ReversedChr, $crntChar) !== false)) {
  218. $crntChar = $flip_arr[mb_strpos($ReversedChr, $crntChar)];
  219. }
  220. if (ord($crntChar) < 128) {
  221. $output .= $crntChar;
  222. $nextChar = $crntChar;
  223. continue;
  224. }
  225. if ($crntChar == 'ل' && isset($chars[$i + 1]) &&
  226. (mb_strpos('آأإا', $chars[$i + 1]) !== false)) {
  227. continue;
  228. }
  229. if ($crntChar && mb_strpos($this->_vowel, $crntChar) !== false) {
  230. if ((mb_strpos($this->_nextLink, $chars[$i + 1]) !== false) &&
  231. (mb_strpos($this->_prevLink, $prevChar) !== false)) {
  232. $output .= '&#x' . $this->getGlyphs($crntChar, 1) . ';';
  233. } else {
  234. $output .= '&#x' . $this->getGlyphs($crntChar, 0) . ';';
  235. }
  236. continue;
  237. }
  238. $form = 0;
  239. //if ($prevChar == 'لا' && (mb_strpos('آأإا', $crntChar) !== false)) {
  240. if ($prevChar == 'ل' && (mb_strpos('آأإا', $crntChar) !== false)) {
  241. if (mb_strpos($this->_prevLink, $chars[$i - 2]) !== false) {
  242. $form++;
  243. }
  244. $output .= '&#x'.$this->getGlyphs($prevChar.$crntChar, $form).';';
  245. $nextChar = $prevChar;
  246. continue;
  247. }
  248. if ($prevChar && mb_strpos($this->_prevLink, $prevChar) !== false) {
  249. $form++;
  250. }
  251. if ($nextChar && mb_strpos($this->_nextLink, $nextChar) !== false) {
  252. $form += 2;
  253. }
  254. $output .= '&#x' . $this->getGlyphs($crntChar, $form) . ';';
  255. $nextChar = $crntChar;
  256. }
  257. // from Arabic Presentation Forms-B, Range: FE70-FEFF,
  258. // file "UFE70.pdf" (in reversed order)
  259. // into Arabic Presentation Forms-A, Range: FB50-FDFF, file "UFB50.pdf"
  260. // Example: $output = str_replace('&#xFEA0;&#xFEDF;', '&#xFCC9;', $output);
  261. // Lam Jeem
  262. $output = $this->decodeEntities($output, $exclude = array('&'));
  263. return $output;
  264. }
  265. /**
  266. * Regression analysis calculate roughly the max number of character fit in
  267. * one A4 page line for a given font size.
  268. *
  269. * @param integer $font Font size
  270. *
  271. * @return integer Maximum number of characters per line
  272. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  273. */
  274. public function a4MaxChars($font)
  275. {
  276. $x = 381.6 - 31.57 * $font + 1.182 * pow($font, 2) - 0.02052 *
  277. pow($font, 3) + 0.0001342 * pow($font, 4);
  278. return floor($x - 2);
  279. }
  280. /**
  281. * Calculate the lines number of given Arabic text and font size that will
  282. * fit in A4 page size
  283. *
  284. * @param string $str Arabic string you would like to split it into lines
  285. * @param integer $font Font size
  286. *
  287. * @return integer Number of lines for a given Arabic string in A4 page size
  288. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  289. */
  290. public function a4Lines($str, $font)
  291. {
  292. $str = str_replace(array("\r\n", "\n", "\r"), "\n", $str);
  293. $lines = 0;
  294. $chars = 0;
  295. $words = explode(' ', $str);
  296. $w_count = count($words);
  297. $max_chars = $this->a4MaxChars($font);
  298. for ($i = 0; $i < $w_count; $i++) {
  299. $w_len = mb_strlen($words[$i]) + 1;
  300. if ($chars + $w_len < $max_chars) {
  301. if (mb_strpos($words[$i], "\n") !== false) {
  302. $words_nl = explode("\n", $words[$i]);
  303. $nl_num = count($words_nl) - 1;
  304. for ($j = 1; $j < $nl_num; $j++) {
  305. $lines++;
  306. }
  307. $chars = mb_strlen($words_nl[$nl_num]) + 1;
  308. } else {
  309. $chars += $w_len;
  310. }
  311. } else {
  312. $lines++;
  313. $chars = $w_len;
  314. }
  315. }
  316. $lines++;
  317. return $lines;
  318. }
  319. /**
  320. * Convert Arabic Windows-1256 charset string into glyph joining in UTF-8
  321. * hexadecimals stream (take care of whole the document including English
  322. * sections as well as numbers and arcs etc...)
  323. *
  324. * @param string $str Arabic string in Windows-1256 charset
  325. * @param integer $max_chars Max number of chars you can fit in one line
  326. * @param boolean $hindo If true use Hindo digits else use Arabic digits
  327. *
  328. * @return string Arabic glyph joining in UTF-8 hexadecimals stream (take
  329. * care of whole document including English sections as well
  330. * as numbers and arcs etc...)
  331. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  332. */
  333. public function utf8Glyphs($str, $max_chars = 50, $hindo = true)
  334. {
  335. $str = str_replace(array("\r\n", "\n", "\r"), " \n ", $str);
  336. $str = str_replace("\t", " ", $str);
  337. $lines = array();
  338. $words = explode(' ', $str);
  339. $w_count = count($words);
  340. $c_chars = 0;
  341. $c_words = array();
  342. $english = array();
  343. $en_index = -1;
  344. $en_words = array();
  345. $en_stack = array();
  346. for ($i = 0; $i < $w_count; $i++) {
  347. $pattern = '/^(\n?)';
  348. $pattern .= '[a-z\d\\/\@\#\$\%\^\&\*\(\)\_\~\"\'\[\]\{\}\;\,\|\-\.\:!]*';
  349. $pattern .= '([\.\:\+\=\-\!،؟]?)$/i';
  350. if (preg_match($pattern, $words[$i], $matches)) {
  351. if ($matches[1]) {
  352. $words[$i] = mb_substr($words[$i], 1).$matches[1];
  353. }
  354. if ($matches[2]) {
  355. $words[$i] = $matches[2].mb_substr($words[$i], 0, -1);
  356. }
  357. $words[$i] = strrev($words[$i]);
  358. array_push($english, $words[$i]);
  359. if ($en_index == -1) {
  360. $en_index = $i;
  361. }
  362. $en_words[] = true;
  363. } elseif ($en_index != -1) {
  364. $en_count = count($english);
  365. for ($j = 0; $j < $en_count; $j++) {
  366. $words[$en_index + $j] = $english[$en_count - 1 - $j];
  367. }
  368. $en_index = -1;
  369. $english = array();
  370. $en_words[] = false;
  371. } else {
  372. $en_words[] = false;
  373. }
  374. }
  375. if ($en_index != -1) {
  376. $en_count = count($english);
  377. for ($j = 0; $j < $en_count; $j++) {
  378. $words[$en_index + $j] = $english[$en_count - 1 - $j];
  379. }
  380. }
  381. // need more work to fix lines starts by English words
  382. if (isset($en_start)) {
  383. $last = true;
  384. $from = 0;
  385. foreach ($en_words as $key => $value) {
  386. if ($last !== $value) {
  387. $to = $key - 1;
  388. array_push($en_stack, array($from, $to));
  389. $from = $key;
  390. }
  391. $last = $value;
  392. }
  393. array_push($en_stack, array($from, $key));
  394. $new_words = array();
  395. while (list($from, $to) = array_pop($en_stack)) {
  396. for ($i = $from; $i <= $to; $i++) {
  397. $new_words[] = $words[$i];
  398. }
  399. }
  400. $words = $new_words;
  401. }
  402. for ($i = 0; $i < $w_count; $i++) {
  403. $w_len = mb_strlen($words[$i]) + 1;
  404. if ($c_chars + $w_len < $max_chars) {
  405. if (mb_strpos($words[$i], "\n") !== false) {
  406. $words_nl = explode("\n", $words[$i]);
  407. array_push($c_words, $words_nl[0]);
  408. array_push($lines, implode(' ', $c_words));
  409. $nl_num = count($words_nl) - 1;
  410. for ($j = 1; $j < $nl_num; $j++) {
  411. array_push($lines, $words_nl[$j]);
  412. }
  413. $c_words = array($words_nl[$nl_num]);
  414. $c_chars = mb_strlen($words_nl[$nl_num]) + 1;
  415. } else {
  416. array_push($c_words, $words[$i]);
  417. $c_chars += $w_len;
  418. }
  419. } else {
  420. array_push($lines, implode(' ', $c_words));
  421. $c_words = array($words[$i]);
  422. $c_chars = $w_len;
  423. }
  424. }
  425. array_push($lines, implode(' ', $c_words));
  426. $maxLine = count($lines);
  427. $output = '';
  428. for ($j = $maxLine - 1; $j >= 0; $j--) {
  429. $output .= $lines[$j] . "\n";
  430. }
  431. $output = rtrim($output);
  432. $output = $this->preConvert($output);
  433. if ($hindo) {
  434. $nums = array('0', '1', '2', '3', '4', '5', '6', '7', '8', '9');
  435. $arNums = array('٠', '١', '٢', '٣', '٤', '٥', '٦', '٧', '٨', '٩');
  436. foreach ($nums as $k => $v) $p_nums[$k] = '/'.$v.'/ui';
  437. $output = preg_replace($p_nums, $arNums, $output);
  438. foreach ($arNums as $k => $v) $p_arNums[$k] = '/([a-z-\d]+)'.$v.'/ui';
  439. foreach ($nums as $k => $v) $r_nums[$k] = '${1}'.$v;
  440. $output = preg_replace($p_arNums, $r_nums, $output);
  441. foreach ($arNums as $k => $v) $p_arNums[$k] = '/'.$v.'([a-z-\d]+)/ui';
  442. foreach ($nums as $k => $v) $r_nums[$k] = $v.'${1}';
  443. $output = preg_replace($p_arNums, $r_nums, $output);
  444. }
  445. return $output;
  446. }
  447. /**
  448. * Decode all HTML entities (including numerical ones) to regular UTF-8 bytes.
  449. * Double-escaped entities will only be decoded once
  450. * ("&amp;lt;" becomes "&lt;", not "<").
  451. *
  452. * @param string $text The text to decode entities in.
  453. * @param array $exclude An array of characters which should not be decoded.
  454. * For example, array('<', '&', '"'). This affects
  455. * both named and numerical entities.
  456. *
  457. * @return string
  458. */
  459. protected function decodeEntities($text, $exclude = array())
  460. {
  461. static $table;
  462. // We store named entities in a table for quick processing.
  463. if (!isset($table)) {
  464. // Get all named HTML entities.
  465. $table = array_flip(get_html_translation_table(HTML_ENTITIES));
  466. // PHP gives us ISO-8859-1 data, we need UTF-8.
  467. $table = array_map('utf8_encode', $table);
  468. // Add apostrophe (XML)
  469. $table['&apos;'] = "'";
  470. }
  471. $newtable = array_diff($table, $exclude);
  472. // Use a regexp to select all entities in one pass, to avoid decoding
  473. // double-escaped entities twice.
  474. //return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e',
  475. // '$this->decodeEntities2("$1", "$2", "$0", $newtable,
  476. // $exclude)', $text);
  477. $pieces = explode('&', $text);
  478. $text = array_shift($pieces);
  479. foreach ($pieces as $piece) {
  480. if ($piece[0] == '#') {
  481. if ($piece[1] == 'x') {
  482. $one = '#x';
  483. } else {
  484. $one = '#';
  485. }
  486. } else {
  487. $one = '';
  488. }
  489. $end = mb_strpos($piece, ';');
  490. $start = mb_strlen($one);
  491. $two = mb_substr($piece, $start, $end - $start);
  492. $zero = '&'.$one.$two.';';
  493. $text .= $this->decodeEntities2($one, $two, $zero, $newtable, $exclude).
  494. mb_substr($piece, $end+1);
  495. }
  496. return $text;
  497. }
  498. /**
  499. * Helper function for decodeEntities
  500. *
  501. * @param string $prefix Prefix
  502. * @param string $codepoint Codepoint
  503. * @param string $original Original
  504. * @param array &$table Store named entities in a table
  505. * @param array &$exclude An array of characters which should not be decoded
  506. *
  507. * @return string
  508. */
  509. protected function decodeEntities2($prefix, $codepoint, $original,
  510. &$table, &$exclude)
  511. {
  512. // Named entity
  513. if (!$prefix) {
  514. if (isset($table[$original])) {
  515. return $table[$original];
  516. } else {
  517. return $original;
  518. }
  519. }
  520. // Hexadecimal numerical entity
  521. if ($prefix == '#x') {
  522. $codepoint = base_convert($codepoint, 16, 10);
  523. }
  524. // Encode codepoint as UTF-8 bytes
  525. if ($codepoint < 0x80) {
  526. $str = chr($codepoint);
  527. } elseif ($codepoint < 0x800) {
  528. $str = chr(0xC0 | ($codepoint >> 6)) .
  529. chr(0x80 | ($codepoint & 0x3F));
  530. } elseif ($codepoint < 0x10000) {
  531. $str = chr(0xE0 | ($codepoint >> 12)) .
  532. chr(0x80 | (($codepoint >> 6) & 0x3F)) .
  533. chr(0x80 | ($codepoint & 0x3F));
  534. } elseif ($codepoint < 0x200000) {
  535. $str = chr(0xF0 | ($codepoint >> 18)) .
  536. chr(0x80 | (($codepoint >> 12) & 0x3F)) .
  537. chr(0x80 | (($codepoint >> 6) & 0x3F)) .
  538. chr(0x80 | ($codepoint & 0x3F));
  539. }
  540. // Check for excluded characters
  541. if (in_array($str, $exclude)) {
  542. return $original;
  543. } else {
  544. return $str;
  545. }
  546. }
  547. }