PageRenderTime 54ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/sondage/classes/core/ArGlyphs.class.php

https://bitbucket.org/bontiv/insomnia
PHP | 506 lines | 276 code | 64 blank | 166 comment | 64 complexity | 6cb6b18a8cea133448a3e5b2aa7a17c7 MD5 | raw file
Possible License(s): Apache-2.0, LGPL-3.0, LGPL-2.1, GPL-3.0, BSD-3-Clause, GPL-2.0
  1. <?php
  2. /**
  3. * ----------------------------------------------------------------------
  4. *
  5. * Copyright (C) 2009 by Khaled Al-Shamaa.
  6. *
  7. * http://www.ar-php.org
  8. *
  9. * ----------------------------------------------------------------------
  10. *
  11. * LICENSE
  12. *
  13. * This program is open source product; you can redistribute it and/or
  14. * modify it under the terms of the GNU Lesser General Public License (LGPL)
  15. * as published by the Free Software Foundation; either version 3
  16. * of the License, or (at your option) any later version.
  17. *
  18. * This program is distributed in the hope that it will be useful,
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  21. * GNU Lesser General Public License for more details.
  22. *
  23. * You should have received a copy of the GNU Lesser General Public License
  24. * along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
  25. *
  26. * ----------------------------------------------------------------------
  27. *
  28. * Class Name: Arabic Glyphs is a simple class to render Arabic text
  29. *
  30. * Filename: ArGlyphs.class.php
  31. *
  32. * Original Author(s): Khaled Al-Sham'aa <khaled.alshamaa@gmail.com>
  33. *
  34. * Purpose: This class takes Arabic text (encoded in Windows-1256 character
  35. * set) as input and performs Arabic glyph joining on it and outputs
  36. * a UTF-8 hexadecimals stream that is no longer logically arranged
  37. * but in a visual order which gives readable results when formatted
  38. * with a simple Unicode rendering just like GD and UFPDF libraries
  39. * that does not handle basic connecting glyphs of Arabic language
  40. * yet but simply outputs all stand alone glyphs in left-to-right
  41. * order.
  42. *
  43. * ----------------------------------------------------------------------
  44. *
  45. * Arabic Glyphs is class to render Arabic text
  46. *
  47. * PHP class to render Arabic text by performs Arabic glyph joining on it,
  48. * then output a UTF-8 hexadecimals stream gives readable results on PHP
  49. * libraries supports UTF-8.
  50. *
  51. * Example:
  52. * <code>
  53. * include('./Arabic.php');
  54. * $Arabic = new Arabic('ArGlyphs');
  55. *
  56. * $text = $Arabic->utf8Glyphs($text);
  57. *
  58. * imagettftext($im, 20, 0, 200, 100, $black, $font, $text);
  59. * </code>
  60. *
  61. * @category Text
  62. * @package Arabic
  63. * @author Khaled Al-Shamaa <khaled.alshamaa@gmail.com>
  64. * @copyright 2009 Khaled Al-Shamaa
  65. *
  66. * @license LGPL <http://www.gnu.org/licenses/lgpl.txt>
  67. * @link http://www.ar-php.org
  68. */
  69. // New in PHP V5.3: Namespaces
  70. // namespace Arabic/ArGlyphs;
  71. /**
  72. * This PHP class render Arabic text by performs Arabic glyph joining on it
  73. *
  74. * @category Text
  75. * @package Arabic
  76. * @author Khaled Al-Shamaa <khaled.alshamaa@gmail.com>
  77. * @copyright 2009 Khaled Al-Shamaa
  78. *
  79. * @license LGPL <http://www.gnu.org/licenses/lgpl.txt>
  80. * @link http://www.ar-php.org
  81. */
  82. class ArGlyphs
  83. {
  84. protected $_glyphs = null;
  85. protected $_hex = null;
  86. protected $_prevLink;
  87. protected $_nextLink;
  88. /**
  89. * Loads initialize values
  90. */
  91. public function __construct()
  92. {
  93. $this->_prevLink = '���������������������������';
  94. $this->_nextLink = '������������������������������������';
  95. $this->vowel = '��������';
  96. /*
  97. $this->_glyphs['�'] = array('FE70','FE71');
  98. $this->_glyphs['�'] = array('FE72','FE72');
  99. $this->_glyphs['�'] = array('FE74','FE74');
  100. $this->_glyphs['�'] = array('FE76','FE77');
  101. $this->_glyphs['�'] = array('FE78','FE79');
  102. $this->_glyphs['�'] = array('FE7A','FE7B');
  103. $this->_glyphs['�'] = array('FE7C','FE7D');
  104. $this->_glyphs['�'] = array('FE7E','FE7E');
  105. */
  106. $this->_glyphs = '��������';
  107. $this->_hex = '064B064B064B064B064C064C064C064C064D064D064D064D064E064E064E064E064F064F064F064F065006500650065006510651065106510652065206520652';
  108. $this->_glyphs .= '��������';
  109. $this->_hex .= 'FE80FE80FE80FE80FE81FE82FE81FE82FE83FE84FE83FE84FE85FE86FE85FE86FE87FE88FE87FE88FE89FE8AFE8BFE8CFE8DFE8EFE8DFE8EFE8FFE90FE91FE92';
  110. $this->_glyphs .= '��������';
  111. $this->_hex .= 'FE93FE94FE93FE94FE95FE96FE97FE98FE99FE9AFE9BFE9CFE9DFE9EFE9FFEA0FEA1FEA2FEA3FEA4FEA5FEA6FEA7FEA8FEA9FEAAFEA9FEAAFEABFEACFEABFEAC';
  112. $this->_glyphs .= '��������';
  113. $this->_hex .= 'FEADFEAEFEADFEAEFEAFFEB0FEAFFEB0FEB1FEB2FEB3FEB4FEB5FEB6FEB7FEB8FEB9FEBAFEBBFEBCFEBDFEBEFEBFFEC0FEC1FEC2FEC3FEC4FEC5FEC6FEC7FEC8';
  114. $this->_glyphs .= '��������';
  115. $this->_hex .= 'FEC9FECAFECBFECCFECDFECEFECFFED0FED1FED2FED3FED4FED5FED6FED7FED8FED9FEDAFEDBFEDCFEDDFEDEFEDFFEE0FEE1FEE2FEE3FEE4FEE5FEE6FEE7FEE8';
  116. $this->_glyphs .= '����ܡ��';
  117. $this->_hex .= 'FEE9FEEAFEEBFEECFEEDFEEEFEEDFEEEFEEFFEF0FEEFFEF0FEF1FEF2FEF3FEF40640064006400640060C060C060C060C061F061F061F061F061B061B061B061B';
  118. $this->_glyphs .= '��������';
  119. $this->_hex .= 'FEF5FEF6FEF5FEF6FEF7FEF8FEF7FEF8FEF9FEFAFEF9FEFAFEFBFEFCFEFBFEFC';
  120. }
  121. /**
  122. * Get glyphs
  123. *
  124. * @param string $char Char
  125. * @param integer $type Type
  126. *
  127. * @return string
  128. */
  129. protected function _getGlyphs($char, $type)
  130. {
  131. $pos = strpos($this->_glyphs, $char);
  132. if ($pos > 48) {
  133. $pos = ($pos-48)/2 + 48;
  134. }
  135. $pos = $pos*16 + $type*4;
  136. return substr($this->_hex, $pos, 4);
  137. }
  138. /**
  139. * Convert Arabic Windows-1256 charset string into glyph joining in UTF-8
  140. * hexadecimals stream
  141. *
  142. * @param string $str Arabic string in Windows-1256 charset
  143. *
  144. * @return string Arabic glyph joining in UTF-8 hexadecimals stream
  145. * @author Khaled Al-Shamaa <khaled.alshamaa@gmail.com>
  146. */
  147. protected function _preConvert($str)
  148. {
  149. $crntChar = null;
  150. $prevChar = null;
  151. $nextChar = null;
  152. $output = '';
  153. $chars = preg_split('//', $str);
  154. $max = count($chars);
  155. for ($i = $max - 1; $i >= 0; $i--) {
  156. $crntChar = $chars[$i];
  157. if ($i > 0) {
  158. $prevChar = $chars[$i - 1];
  159. } else {
  160. $prevChar = null;
  161. }
  162. if ($prevChar && strpos($this->vowel, $prevChar) !== false) {
  163. $prevChar = $chars[$i - 2];
  164. if ($prevChar && strpos($this->vowel, $prevChar) !== false) {
  165. $prevChar = $chars[$i - 3];
  166. }
  167. }
  168. $Reversed = false;
  169. $flip_arr = ')]>}';
  170. $ReversedChr = '([<{';
  171. if ($crntChar && strpos($flip_arr, $crntChar) !== false) {
  172. $crntChar = substr($ReversedChr, strpos($flip_arr, $crntChar), 1);
  173. $Reversed = true;
  174. } else {
  175. $Reversed = false;
  176. }
  177. if ($crntChar && (strpos($ReversedChr, $crntChar) !== false) && !$Reversed) {
  178. $crntChar = substr($flip_arr, strpos($ReversedChr, $crntChar), 1);
  179. }
  180. if ($crntChar && strpos($this->vowel, $crntChar) !== false) {
  181. if ((strpos($this->_nextLink, $chars[$i + 1]) !== false) && (strpos($this->_prevLink, $prevChar) !== false)) {
  182. $output .= '&#x' . $this->_getGlyphs($crntChar, 1) . ';';
  183. } else {
  184. $output .= '&#x' . $this->_getGlyphs($crntChar, 0) . ';';
  185. }
  186. continue;
  187. }
  188. if (isset($chars[$i + 1]) && in_array($chars[$i + 1], array('�', '�', '�', '�')) && $crntChar == '�') {
  189. continue;
  190. }
  191. if (ord($crntChar) < 128) {
  192. $output .= $crntChar;
  193. $nextChar = $crntChar;
  194. continue;
  195. }
  196. $form = 0;
  197. if (in_array($crntChar, array('�', '�', '�', '�')) && $prevChar == '�') {
  198. if (strpos($this->_prevLink, $chars[$i - 2]) !== false) {
  199. $form++;
  200. }
  201. $output .= '&#x' . $this->_getGlyphs($prevChar . $crntChar, $form) . ';';
  202. $nextChar = $prevChar;
  203. continue;
  204. }
  205. if ($prevChar && strpos($this->_prevLink, $prevChar) !== false) {
  206. $form++;
  207. }
  208. if ($nextChar && strpos($this->_nextLink, $nextChar) !== false) {
  209. $form += 2;
  210. }
  211. $output .= '&#x' . $this->_getGlyphs($crntChar, $form) . ';';
  212. $nextChar = $crntChar;
  213. }
  214. $output = $this->_decodeEntities($output, $exclude = array('&'));
  215. return $output;
  216. }
  217. /**
  218. * Regression analysis calculate roughly the max number of character fit in
  219. * one A4 page line for a given font size.
  220. *
  221. * @param integer $font Font size
  222. *
  223. * @return integer Maximum number of characters per line
  224. * @author Khaled Al-Shamaa <khaled.alshamaa@gmail.com>
  225. */
  226. public function a4_max_chars($font)
  227. {
  228. $x = 381.6 - 31.57 * $font + 1.182 * pow($font, 2) - 0.02052 * pow($font, 3) + 0.0001342 * pow($font, 4);
  229. return floor($x - 2);
  230. }
  231. /**
  232. * Calculate the lines number of given Arabic text and font size that will
  233. * fit in A4 page size
  234. *
  235. * @param string $str Arabic string you would like to split it into lines
  236. * @param integer $font Font size
  237. * @param string $inputCharset (optional) Input charset [utf-8|windows-1256|iso-8859-6]
  238. * default value is NULL (use set input charset)
  239. * @param object $main Main Ar-PHP object to access charset converter options
  240. *
  241. * @return integer Number of lines for a given Arabic string in A4 page size
  242. * @author Khaled Al-Shamaa <khaled.alshamaa@gmail.com>
  243. */
  244. public function a4_lines($str, $font, $inputCharset = null, $main = null)
  245. {
  246. if ($main) {
  247. if ($inputCharset == null) $inputCharset = $main->getInputCharset();
  248. $str = $main->coreConvert($str, $inputCharset, 'windows-1256');
  249. }
  250. $str = str_replace(array("\r\n", "\n", "\r"), "\n", $str);
  251. $lines = 0;
  252. $chars = 0;
  253. $words = explode(' ', $str);
  254. $w_count = count($words);
  255. $max_chars = $this->a4_max_chars($font);
  256. for ($i = 0; $i < $w_count; $i++) {
  257. $w_len = strlen($words[$i]) + 1;
  258. if ($chars + $w_len < $max_chars) {
  259. if (preg_match("/\n/i", $words[$i])) {
  260. $words_nl = split("\n", $words[$i]);
  261. $nl_num = count($words_nl) - 1;
  262. for ($j = 1; $j < $nl_num; $j++) {
  263. $lines++;
  264. }
  265. $chars = strlen($words_nl[$nl_num]) + 1;
  266. } else {
  267. $chars += $w_len;
  268. }
  269. } else {
  270. $lines++;
  271. $chars = $w_len;
  272. }
  273. }
  274. $lines++;
  275. return $lines;
  276. }
  277. /**
  278. * Convert Arabic Windows-1256 charset string into glyph joining in UTF-8
  279. * hexadecimals stream (take care of whole the document including English
  280. * sections as well as numbers and arcs etc...)
  281. *
  282. * @param string $str Arabic string in Windows-1256 charset
  283. * @param integer $max_chars Max number of chars you can fit in one line
  284. * @param boolean $hindo If true use Hindo digits else use Arabic digits
  285. * @param string $inputCharset (optional) Input charset [utf-8|windows-1256|iso-8859-6]
  286. * default value is NULL (use set input charset)
  287. * @param object $main Main Ar-PHP object to access charset converter options
  288. *
  289. * @return string Arabic glyph joining in UTF-8 hexadecimals stream (take
  290. * care of whole document including English sections as well
  291. * as numbers and arcs etc...)
  292. * @author Khaled Al-Shamaa <khaled.alshamaa@gmail.com>
  293. */
  294. public function utf8Glyphs($str, $max_chars = 50, $hindo = true, $inputCharset = null, $main = null)
  295. {
  296. if ($main) {
  297. if ($inputCharset == null) $inputCharset = $main->getInputCharset();
  298. $str = $main->coreConvert($str, $inputCharset, 'windows-1256');
  299. }
  300. $str = str_replace(array("\r\n", "\n", "\r"), "\n", $str);
  301. $lines = array();
  302. $words = explode(' ', $str);
  303. $w_count = count($words);
  304. $c_chars = 0;
  305. $c_words = array();
  306. $english = array();
  307. $en_index = -1;
  308. for ($i = 0; $i < $w_count; $i++) {
  309. if (preg_match("/^[a-z\d\\/\@\#\$\%\^\&\*\(\)\_\~\"\'\[\]\{\}\;\,\|]*([\.\:\+\=\-\!��]?)$/i", $words[$i], $matches)) {
  310. if ($matches[1]) $words[$i] = $matches[1].substr($words[$i], 0, -1);
  311. $words[$i] = strrev($words[$i]);
  312. array_push($english, $words[$i]);
  313. if ($en_index == -1) {
  314. $en_index = $i;
  315. }
  316. } elseif ($en_index != -1) {
  317. $en_count = count($english);
  318. for ($j = 0; $j < $en_count; $j++) {
  319. $words[$en_index + $j] = $english[$en_count - 1 - $j];
  320. }
  321. $en_index = -1;
  322. $english = array();
  323. }
  324. $en_count = count($english);
  325. for ($j = 0; $j < $en_count; $j++) {
  326. $words[$en_index + $j] = $english[$en_count - 1 - $j];
  327. }
  328. }
  329. for ($i = 0; $i < $w_count; $i++) {
  330. $w_len = strlen($words[$i]) + 1;
  331. if ($c_chars + $w_len < $max_chars) {
  332. if (preg_match("/\n/i", $words[$i])) {
  333. $words_nl = explode("\n", $words[$i]);
  334. array_push($c_words, $words_nl[0]);
  335. array_push($lines, implode(' ', $c_words));
  336. $nl_num = count($words_nl) - 1;
  337. for ($j = 1; $j < $nl_num; $j++) {
  338. array_push($lines, $words_nl[$j]);
  339. }
  340. $c_words = array($words_nl[$nl_num]);
  341. $c_chars = strlen($words_nl[$nl_num]) + 1;
  342. } else {
  343. array_push($c_words, $words[$i]);
  344. $c_chars += $w_len;
  345. }
  346. } else {
  347. array_push($lines, implode(' ', $c_words));
  348. $c_words = array($words[$i]);
  349. $c_chars = $w_len;
  350. }
  351. }
  352. array_push($lines, implode(' ', $c_words));
  353. $max_line = count($lines);
  354. $output = '';
  355. for ($j = $max_line - 1; $j >= 0; $j--) {
  356. $output .= $lines[$j] . "\n";
  357. }
  358. $output = rtrim($output);
  359. $output = $this->_preConvert($output);
  360. if ($hindo) {
  361. $Nums = array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
  362. $arNums = array('٠', '١', '٢', '٣', '٤', '٥', '٦', '٧', '٨', '٩');
  363. $output = str_replace($Nums, $arNums, $output);
  364. }
  365. return $output;
  366. }
  367. /**
  368. * Decode all HTML entities (including numerical ones) to regular UTF-8 bytes.
  369. * Double-escaped entities will only be decoded once ("&amp;lt;" becomes "&lt;", not "<").
  370. *
  371. * @param string $text The text to decode entities in.
  372. * @param array $exclude An array of characters which should not be decoded.
  373. * For example, array('<', '&', '"'). This affects
  374. * both named and numerical entities.
  375. *
  376. * @return string
  377. */
  378. protected function _decodeEntities($text, $exclude = array())
  379. {
  380. static $table;
  381. // We store named entities in a table for quick processing.
  382. if (!isset($table)) {
  383. // Get all named HTML entities.
  384. $table = array_flip(get_html_translation_table(HTML_ENTITIES));
  385. // PHP gives us ISO-8859-1 data, we need UTF-8.
  386. $table = array_map('utf8_encode', $table);
  387. // Add apostrophe (XML)
  388. $table['&apos;'] = "'";
  389. }
  390. $newtable = array_diff($table, $exclude);
  391. // Use a regexp to select all entities in one pass, to avoid decoding
  392. // double-escaped entities twice.
  393. return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e', '$this
  394. ->_decodeEntities2("$1", "$2", "$0", $newtable, $exclude)', $text);
  395. }
  396. /**
  397. * Helper function for _decodeEntities
  398. *
  399. * @param string $prefix Prefix
  400. * @param string $codepoint Codepoint
  401. * @param string $original Original
  402. * @param array &$table Store named entities in a table
  403. * @param array &$exclude An array of characters which should not be decoded
  404. *
  405. * @return string
  406. */
  407. protected function _decodeEntities2($prefix, $codepoint, $original, &$table, &$exclude)
  408. {
  409. // Named entity
  410. if (!$prefix) {
  411. if (isset($table[$original])) {
  412. return $table[$original];
  413. } else {
  414. return $original;
  415. }
  416. }
  417. // Hexadecimal numerical entity
  418. if ($prefix == '#x') {
  419. $codepoint = base_convert($codepoint, 16, 10);
  420. }
  421. // Encode codepoint as UTF-8 bytes
  422. if ($codepoint < 0x80) {
  423. $str = chr($codepoint);
  424. } elseif ($codepoint < 0x800) {
  425. $str = chr(0xC0 | ($codepoint >> 6)) . chr(0x80 | ($codepoint & 0x3F));
  426. } elseif ($codepoint < 0x10000) {
  427. $str = chr(0xE0 | ($codepoint >> 12)) . chr(0x80 | (($codepoint >> 6) & 0x3F)) . chr(0x80 | ($codepoint & 0x3F));
  428. } elseif ($codepoint < 0x200000) {
  429. $str = chr(0xF0 | ($codepoint >> 18)) . chr(0x80 | (($codepoint >> 12) & 0x3F)) . chr(0x80 | (($codepoint >> 6) & 0x3F)) . chr(0x80 | ($codepoint & 0x3F));
  430. }
  431. // Check for excluded characters
  432. if (in_array($str, $exclude)) {
  433. return $original;
  434. } else {
  435. return $str;
  436. }
  437. }
  438. }
  439. ?>