PageRenderTime 58ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/application/libraries/sub/ArGlyphs.class.php

https://bitbucket.org/nadiasho/tako-3
PHP | 598 lines | 304 code | 75 blank | 219 comment | 68 complexity | 966f8904c7cf16059ccf01292bc226ce MD5 | raw file
  1. <?php
  2. /**
  3. * ----------------------------------------------------------------------
  4. *
  5. * Copyright (c) 2006-2011 Khaled Al-Sham'aa.
  6. *
  7. * http://www.ar-php.org
  8. *
  9. * PHP Version 5
  10. *
  11. * ----------------------------------------------------------------------
  12. *
  13. * LICENSE
  14. *
  15. * This program is open source product; you can redistribute it and/or
  16. * modify it under the terms of the GNU Lesser General Public License (LGPL)
  17. * as published by the Free Software Foundation; either version 3
  18. * of the License, or (at your option) any later version.
  19. *
  20. * This program is distributed in the hope that it will be useful,
  21. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  22. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  23. * GNU Lesser General Public License for more details.
  24. *
  25. * You should have received a copy of the GNU Lesser General Public License
  26. * along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
  27. *
  28. * ----------------------------------------------------------------------
  29. *
  30. * Class Name: Arabic Glyphs is a simple class to render Arabic text
  31. *
  32. * Filename: ArGlyphs.class.php
  33. *
  34. * Original Author(s): Khaled Al-Sham'aa <khaled@ar-php.org>
  35. *
  36. * Purpose: This class takes Arabic text (encoded in Windows-1256 character
  37. * set) as input and performs Arabic glyph joining on it and outputs
  38. * a UTF-8 hexadecimals stream that is no longer logically arranged
  39. * but in a visual order which gives readable results when formatted
  40. * with a simple Unicode rendering just like GD and UFPDF libraries
  41. * that does not handle basic connecting glyphs of Arabic language
  42. * yet but simply outputs all stand alone glyphs in left-to-right
  43. * order.
  44. *
  45. * ----------------------------------------------------------------------
  46. *
  47. * Arabic Glyphs is class to render Arabic text
  48. *
  49. * PHP class to render Arabic text by performs Arabic glyph joining on it,
  50. * then output a UTF-8 hexadecimals stream gives readable results on PHP
  51. * libraries supports UTF-8.
  52. *
  53. * Example:
  54. * <code>
  55. * include('./Arabic.php');
  56. * $obj = new Arabic('ArGlyphs');
  57. *
  58. * $text = $obj->utf8Glyphs($text);
  59. *
  60. * imagettftext($im, 20, 0, 200, 100, $black, $font, $text);
  61. * </code>
  62. *
  63. * @category I18N
  64. * @package Arabic
  65. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  66. * @copyright 2006-2011 Khaled Al-Sham'aa
  67. *
  68. * @license LGPL <http://www.gnu.org/licenses/lgpl.txt>
  69. * @link http://www.ar-php.org
  70. */
  71. // New in PHP V5.3: Namespaces
  72. // namespace I18N\Arabic;
  73. //
  74. // $obj = new I18N\Arabic\ArGlyphs();
  75. //
  76. // use I18N\Arabic;
  77. // $obj = new Arabic\ArGlyphs();
  78. //
  79. // use I18N\Arabic\ArGlyphs as Glyphs;
  80. // $obj = new Glyphs();
  81. /**
  82. * This PHP class render Arabic text by performs Arabic glyph joining on it
  83. *
  84. * @category I18N
  85. * @package Arabic
  86. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  87. * @copyright 2006-2011 Khaled Al-Sham'aa
  88. *
  89. * @license LGPL <http://www.gnu.org/licenses/lgpl.txt>
  90. * @link http://www.ar-php.org
  91. */
  92. class ArGlyphs
  93. {
  94. protected $glyphs = null;
  95. protected $hex = null;
  96. protected $prevLink = null;
  97. protected $nextLink = null;
  98. protected $vowel = null;
  99. /**
  100. * "a4Lines" method input charset
  101. * @var String
  102. */
  103. public $a4LinesInput = 'windows-1256';
  104. /**
  105. * Name of the textual "a4Lines" method parameters
  106. * @var Array
  107. */
  108. public $a4LinesVars = array('str');
  109. /**
  110. * "utf8Glyphs" method input charset
  111. * @var String
  112. */
  113. public $utf8GlyphsInput = 'windows-1256';
  114. /**
  115. * Name of the textual "utf8Glyphs" method parameters
  116. * @var Array
  117. */
  118. public $utf8GlyphsVars = array('str');
  119. /**
  120. * Loads initialize values
  121. */
  122. public function __construct()
  123. {
  124. $this->prevLink = '';
  125. $this->nextLink = '';
  126. $this->vowel = '';
  127. /*
  128. $this->glyphs[''] = array('FE70','FE71');
  129. $this->glyphs[''] = array('FE72','FE72');
  130. $this->glyphs[''] = array('FE74','FE74');
  131. $this->glyphs[''] = array('FE76','FE77');
  132. $this->glyphs[''] = array('FE78','FE79');
  133. $this->glyphs[''] = array('FE7A','FE7B');
  134. $this->glyphs[''] = array('FE7C','FE7D');
  135. $this->glyphs[''] = array('FE7E','FE7E');
  136. */
  137. $this->glyphs = '';
  138. $this->hex = '064B064B064B064B064C064C064C064C064D064D064D064D064E064E';
  139. $this->hex .= '064E064E064F064F064F064F06500650065006500651065106510651';
  140. $this->hex .= '0652065206520652';
  141. $this->glyphs .= '';
  142. $this->hex .= 'FE80FE80FE80FE80FE81FE82FE81FE82FE83FE84FE83FE84FE85FE86';
  143. $this->hex .= 'FE85FE86FE87FE88FE87FE88FE89FE8AFE8BFE8CFE8DFE8EFE8DFE8E';
  144. $this->hex .= 'FE8FFE90FE91FE92';
  145. $this->glyphs .= '';
  146. $this->hex .= 'FE93FE94FE93FE94FE95FE96FE97FE98FE99FE9AFE9BFE9CFE9DFE9E';
  147. $this->hex .= 'FE9FFEA0FEA1FEA2FEA3FEA4FEA5FEA6FEA7FEA8FEA9FEAAFEA9FEAA';
  148. $this->hex .= 'FEABFEACFEABFEAC';
  149. $this->glyphs .= '';
  150. $this->hex .= 'FEADFEAEFEADFEAEFEAFFEB0FEAFFEB0FEB1FEB2FEB3FEB4FEB5FEB6';
  151. $this->hex .= 'FEB7FEB8FEB9FEBAFEBBFEBCFEBDFEBEFEBFFEC0FEC1FEC2FEC3FEC4';
  152. $this->hex .= 'FEC5FEC6FEC7FEC8';
  153. $this->glyphs .= '';
  154. $this->hex .= 'FEC9FECAFECBFECCFECDFECEFECFFED0FED1FED2FED3FED4FED5FED6';
  155. $this->hex .= 'FED7FED8FED9FEDAFEDBFEDCFEDDFEDEFEDFFEE0FEE1FEE2FEE3FEE4';
  156. $this->hex .= 'FEE5FEE6FEE7FEE8';
  157. $this->glyphs .= 'ܡ';
  158. $this->hex .= 'FEE9FEEAFEEBFEECFEEDFEEEFEEDFEEEFEEFFEF0FEEFFEF0FEF1FEF2';
  159. $this->hex .= 'FEF3FEF40640064006400640060C060C060C060C061F061F061F061F';
  160. $this->hex .= '061B061B061B061B';
  161. // Support the extra 4 Persian letters (p), (ch), (zh) and (g)
  162. // This needs value in getGlyphs function to be 52 instead of 48
  163. // $this->glyphs .= chr(129).chr(141).chr(142).chr(144);
  164. // $this->hex .= 'FB56FB57FB58FB59FB7AFB7BFB7CFB7DFB8AFB8BFB8AFB8BFB92';
  165. // $this->hex .= 'FB93FB94FB95';
  166. //
  167. // $this->prevLink .= chr(129).chr(141).chr(142).chr(144);
  168. // $this->nextLink .= chr(129).chr(141).chr(142).chr(144);
  169. //
  170. // Example: $text = ' : ǐ ݁';
  171. // Email Yossi Beck <yosbeck@gmail.com> ask him to save that example
  172. // string using ANSI encoding in Notepad
  173. $this->glyphs .= '';
  174. $this->hex .= 'FEF5FEF6FEF5FEF6FEF7FEF8FEF7FEF8FEF9FEFAFEF9FEFAFEFBFEFC';
  175. $this->hex .= 'FEFBFEFC';
  176. }
  177. /**
  178. * Get glyphs
  179. *
  180. * @param string $char Char
  181. * @param integer $type Type
  182. *
  183. * @return string
  184. */
  185. protected function getGlyphs($char, $type)
  186. {
  187. $pos = strpos($this->glyphs, $char);
  188. if ($pos > 48) {
  189. $pos = ($pos-48)/2 + 48;
  190. }
  191. $pos = $pos*16 + $type*4;
  192. return substr($this->hex, $pos, 4);
  193. }
  194. /**
  195. * Convert Arabic Windows-1256 charset string into glyph joining in UTF-8
  196. * hexadecimals stream
  197. *
  198. * @param string $str Arabic string in Windows-1256 charset
  199. *
  200. * @return string Arabic glyph joining in UTF-8 hexadecimals stream
  201. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  202. */
  203. protected function preConvert($str)
  204. {
  205. $crntChar = null;
  206. $prevChar = null;
  207. $nextChar = null;
  208. $output = '';
  209. $chars = str_split($str);
  210. $max = count($chars);
  211. for ($i = $max - 1; $i >= 0; $i--) {
  212. $crntChar = $chars[$i];
  213. if ($i > 0) {
  214. $prevChar = $chars[$i - 1];
  215. }
  216. if ($prevChar && strpos($this->vowel, $prevChar) !== false) {
  217. $prevChar = $chars[$i - 2];
  218. if ($prevChar && strpos($this->vowel, $prevChar) !== false) {
  219. $prevChar = $chars[$i - 3];
  220. }
  221. }
  222. $Reversed = false;
  223. $flip_arr = ')]>}';
  224. $ReversedChr = '([<{';
  225. if ($crntChar && strpos($flip_arr, $crntChar) !== false) {
  226. $crntChar = $ReversedChr[strpos($flip_arr, $crntChar)];
  227. $Reversed = true;
  228. } else {
  229. $Reversed = false;
  230. }
  231. if ($crntChar && !$Reversed &&
  232. (strpos($ReversedChr, $crntChar) !== false)) {
  233. $crntChar = $flip_arr[strpos($ReversedChr, $crntChar)];
  234. }
  235. if (ord($crntChar) < 128) {
  236. $output .= $crntChar;
  237. $nextChar = $crntChar;
  238. continue;
  239. }
  240. if ($crntChar == '' && isset($chars[$i + 1]) &&
  241. (strpos('', $chars[$i + 1]) !== false)) {
  242. continue;
  243. }
  244. if ($crntChar && strpos($this->vowel, $crntChar) !== false) {
  245. if ((strpos($this->nextLink, $chars[$i + 1]) !== false) &&
  246. (strpos($this->prevLink, $prevChar) !== false)) {
  247. $output .= '&#x' . $this->getGlyphs($crntChar, 1) . ';';
  248. } else {
  249. $output .= '&#x' . $this->getGlyphs($crntChar, 0) . ';';
  250. }
  251. continue;
  252. }
  253. $form = 0;
  254. if ($prevChar == '' && (strpos('', $crntChar) !== false)) {
  255. if (strpos($this->prevLink, $chars[$i - 2]) !== false) {
  256. $form++;
  257. }
  258. $output .= '&#x'.$this->getGlyphs($prevChar.$crntChar, $form).';';
  259. $nextChar = $prevChar;
  260. continue;
  261. }
  262. if ($prevChar && strpos($this->prevLink, $prevChar) !== false) {
  263. $form++;
  264. }
  265. if ($nextChar && strpos($this->nextLink, $nextChar) !== false) {
  266. $form += 2;
  267. }
  268. $output .= '&#x' . $this->getGlyphs($crntChar, $form) . ';';
  269. $nextChar = $crntChar;
  270. }
  271. // from Arabic Presentation Forms-B, Range: FE70-FEFF,
  272. // file "UFE70.pdf" (in reversed order)
  273. // into Arabic Presentation Forms-A, Range: FB50-FDFF, file "UFB50.pdf"
  274. // Example: $output = str_replace('&#xFEA0;&#xFEDF;', '&#xFCC9;', $output);
  275. // Lam Jeem
  276. $output = $this->decodeEntities($output, $exclude = array('&'));
  277. return $output;
  278. }
  279. /**
  280. * Regression analysis calculate roughly the max number of character fit in
  281. * one A4 page line for a given font size.
  282. *
  283. * @param integer $font Font size
  284. *
  285. * @return integer Maximum number of characters per line
  286. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  287. */
  288. public function a4MaxChars($font)
  289. {
  290. $x = 381.6 - 31.57 * $font + 1.182 * pow($font, 2) - 0.02052 *
  291. pow($font, 3) + 0.0001342 * pow($font, 4);
  292. return floor($x - 2);
  293. }
  294. /**
  295. * Calculate the lines number of given Arabic text and font size that will
  296. * fit in A4 page size
  297. *
  298. * @param string $str Arabic string you would like to split it into lines
  299. * @param integer $font Font size
  300. *
  301. * @return integer Number of lines for a given Arabic string in A4 page size
  302. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  303. */
  304. public function a4Lines($str, $font)
  305. {
  306. $str = str_replace(array("\r\n", "\n", "\r"), "\n", $str);
  307. $lines = 0;
  308. $chars = 0;
  309. $words = explode(' ', $str);
  310. $w_count = count($words);
  311. $max_chars = $this->a4MaxChars($font);
  312. for ($i = 0; $i < $w_count; $i++) {
  313. $w_len = strlen($words[$i]) + 1;
  314. if ($chars + $w_len < $max_chars) {
  315. if (strpos($words[$i], "\n") !== false) {
  316. $words_nl = explode("\n", $words[$i]);
  317. $nl_num = count($words_nl) - 1;
  318. for ($j = 1; $j < $nl_num; $j++) {
  319. $lines++;
  320. }
  321. $chars = strlen($words_nl[$nl_num]) + 1;
  322. } else {
  323. $chars += $w_len;
  324. }
  325. } else {
  326. $lines++;
  327. $chars = $w_len;
  328. }
  329. }
  330. $lines++;
  331. return $lines;
  332. }
  333. /**
  334. * Convert Arabic Windows-1256 charset string into glyph joining in UTF-8
  335. * hexadecimals stream (take care of whole the document including English
  336. * sections as well as numbers and arcs etc...)
  337. *
  338. * @param string $str Arabic string in Windows-1256 charset
  339. * @param integer $max_chars Max number of chars you can fit in one line
  340. * @param boolean $hindo If true use Hindo digits else use Arabic digits
  341. *
  342. * @return string Arabic glyph joining in UTF-8 hexadecimals stream (take
  343. * care of whole document including English sections as well
  344. * as numbers and arcs etc...)
  345. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  346. */
  347. public function utf8Glyphs($str, $max_chars = 50, $hindo = true)
  348. {
  349. $str = str_replace(array("\r\n", "\n", "\r"), " \n", $str);
  350. $str = str_replace("\t", " ", $str);
  351. $lines = array();
  352. $words = explode(' ', $str);
  353. $w_count = count($words);
  354. $c_chars = 0;
  355. $c_words = array();
  356. $english = array();
  357. $en_index = -1;
  358. for ($i = 0; $i < $w_count; $i++) {
  359. $pattern = '/^(\n?)';
  360. $pattern .= '[a-z\d\\/\@\#\$\%\^\&\*\(\)\_\~\"\'\[\]\{\}\;\,\|\-\.\:]*';
  361. $pattern .= '([\.\:\+\=\-\!]?)$/i';
  362. if (preg_match($pattern, $words[$i], $matches)) {
  363. if ($matches[1]) {
  364. $words[$i] = substr($words[$i], 1).$matches[1];
  365. }
  366. if ($matches[2]) {
  367. $words[$i] = $matches[2].substr($words[$i], 0, -1);
  368. }
  369. $words[$i] = strrev($words[$i]);
  370. array_push($english, $words[$i]);
  371. if ($en_index == -1) {
  372. $en_index = $i;
  373. }
  374. } elseif ($en_index != -1) {
  375. $en_count = count($english);
  376. for ($j = 0; $j < $en_count; $j++) {
  377. $words[$en_index + $j] = $english[$en_count - 1 - $j];
  378. }
  379. $en_index = -1;
  380. $english = array();
  381. }
  382. }
  383. for ($i = 0; $i < $w_count; $i++) {
  384. $w_len = strlen($words[$i]) + 1;
  385. if ($c_chars + $w_len < $max_chars) {
  386. if (strpos($words[$i], "\n") !== false) {
  387. $words_nl = explode("\n", $words[$i]);
  388. array_push($c_words, $words_nl[0]);
  389. array_push($lines, implode(' ', $c_words));
  390. $nl_num = count($words_nl) - 1;
  391. for ($j = 1; $j < $nl_num; $j++) {
  392. array_push($lines, $words_nl[$j]);
  393. }
  394. $c_words = array($words_nl[$nl_num]);
  395. $c_chars = strlen($words_nl[$nl_num]) + 1;
  396. } else {
  397. array_push($c_words, $words[$i]);
  398. $c_chars += $w_len;
  399. }
  400. } else {
  401. array_push($lines, implode(' ', $c_words));
  402. $c_words = array($words[$i]);
  403. $c_chars = $w_len;
  404. }
  405. }
  406. array_push($lines, implode(' ', $c_words));
  407. $maxLine = count($lines);
  408. $output = '';
  409. for ($j = $maxLine - 1; $j >= 0; $j--) {
  410. $output .= $lines[$j] . "\n";
  411. }
  412. $output = rtrim($output);
  413. $output = $this->preConvert($output);
  414. if ($hindo) {
  415. $nums = array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
  416. $arNums = array('٠', '١', '٢', '٣', '٤',
  417. '٥', '٦', '٧', '٨', '٩');
  418. $output = str_replace($nums, $arNums, $output);
  419. }
  420. return $output;
  421. }
  422. /**
  423. * Decode all HTML entities (including numerical ones) to regular UTF-8 bytes.
  424. * Double-escaped entities will only be decoded once
  425. * ("&amp;lt;" becomes "&lt;", not "<").
  426. *
  427. * @param string $text The text to decode entities in.
  428. * @param array $exclude An array of characters which should not be decoded.
  429. * For example, array('<', '&', '"'). This affects
  430. * both named and numerical entities.
  431. *
  432. * @return string
  433. */
  434. protected function decodeEntities($text, $exclude = array())
  435. {
  436. static $table;
  437. // We store named entities in a table for quick processing.
  438. if (!isset($table)) {
  439. // Get all named HTML entities.
  440. $table = array_flip(get_html_translation_table(HTML_ENTITIES));
  441. // PHP gives us ISO-8859-1 data, we need UTF-8.
  442. $table = array_map('utf8_encode', $table);
  443. // Add apostrophe (XML)
  444. $table['&apos;'] = "'";
  445. }
  446. $newtable = array_diff($table, $exclude);
  447. // Use a regexp to select all entities in one pass, to avoid decoding
  448. // double-escaped entities twice.
  449. //return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e',
  450. // '$this->decodeEntities2("$1", "$2", "$0", $newtable,
  451. // $exclude)', $text);
  452. $pieces = explode('&', $text);
  453. $text = array_shift($pieces);
  454. foreach ($pieces as $piece) {
  455. if ($piece[0] == '#') {
  456. if ($piece[1] == 'x') {
  457. $one = '#x';
  458. } else {
  459. $one = '#';
  460. }
  461. } else {
  462. $one = '';
  463. }
  464. $end = strpos($piece, ';');
  465. $start = strlen($one);
  466. $two = substr($piece, $start, $end - $start);
  467. $zero = '&'.$one.$two.';';
  468. $text .= $this->decodeEntities2($one, $two, $zero, $newtable, $exclude).
  469. substr($piece, $end+1);
  470. }
  471. return $text;
  472. }
  473. /**
  474. * Helper function for decodeEntities
  475. *
  476. * @param string $prefix Prefix
  477. * @param string $codepoint Codepoint
  478. * @param string $original Original
  479. * @param array &$table Store named entities in a table
  480. * @param array &$exclude An array of characters which should not be decoded
  481. *
  482. * @return string
  483. */
  484. protected function decodeEntities2($prefix, $codepoint, $original,
  485. &$table, &$exclude)
  486. {
  487. // Named entity
  488. if (!$prefix) {
  489. if (isset($table[$original])) {
  490. return $table[$original];
  491. } else {
  492. return $original;
  493. }
  494. }
  495. // Hexadecimal numerical entity
  496. if ($prefix == '#x') {
  497. $codepoint = base_convert($codepoint, 16, 10);
  498. }
  499. // Encode codepoint as UTF-8 bytes
  500. if ($codepoint < 0x80) {
  501. $str = chr($codepoint);
  502. } elseif ($codepoint < 0x800) {
  503. $str = chr(0xC0 | ($codepoint >> 6)) .
  504. chr(0x80 | ($codepoint & 0x3F));
  505. } elseif ($codepoint < 0x10000) {
  506. $str = chr(0xE0 | ($codepoint >> 12)) .
  507. chr(0x80 | (($codepoint >> 6) & 0x3F)) .
  508. chr(0x80 | ($codepoint & 0x3F));
  509. } elseif ($codepoint < 0x200000) {
  510. $str = chr(0xF0 | ($codepoint >> 18)) .
  511. chr(0x80 | (($codepoint >> 12) & 0x3F)) .
  512. chr(0x80 | (($codepoint >> 6) & 0x3F)) .
  513. chr(0x80 | ($codepoint & 0x3F));
  514. }
  515. // Check for excluded characters
  516. if (in_array($str, $exclude)) {
  517. return $original;
  518. } else {
  519. return $str;
  520. }
  521. }
  522. }