PageRenderTime 65ms CodeModel.GetById 35ms RepoModel.GetById 0ms app.codeStats 0ms

/application/third_party/ar-php/Arabic/Transliteration.php

https://gitlab.com/mariadb-corporation/LimeSurvey
PHP | 367 lines | 175 code | 40 blank | 152 comment | 17 complexity | f087208558a4cdf30ec98562a3945007 MD5 | raw file
  1. <?php
  2. /**
  3. * ----------------------------------------------------------------------
  4. *
  5. * Copyright (c) 2006-2013 Khaled Al-Sham'aa.
  6. *
  7. * http://www.ar-php.org
  8. *
  9. * PHP Version 5
  10. *
  11. * ----------------------------------------------------------------------
  12. *
  13. * LICENSE
  14. *
  15. * This program is open source product; you can redistribute it and/or
  16. * modify it under the terms of the GNU Lesser General Public License (LGPL)
  17. * as published by the Free Software Foundation; either version 3
  18. * of the License, or (at your option) any later version.
  19. *
  20. * This program is distributed in the hope that it will be useful,
  21. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  22. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  23. * GNU Lesser General Public License for more details.
  24. *
  25. * You should have received a copy of the GNU Lesser General Public License
  26. * along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
  27. *
  28. * ----------------------------------------------------------------------
  29. *
  30. * Class Name: English-Arabic Transliteration
  31. *
  32. * Filename: Transliteration.php
  33. *
  34. * Original Author(s): Khaled Al-Sham'aa <khaled@ar-php.org>
  35. *
  36. * Purpose: Transliterate English words into Arabic by render them
  37. * in the orthography of the Arabic language and vise versa
  38. *
  39. * ----------------------------------------------------------------------
  40. *
  41. * English-Arabic Transliteration
  42. *
  43. * PHP class transliterate English words into Arabic by render them in the
  44. * orthography of the Arabic language and vise versa.
  45. *
  46. * Out of vocabulary (OOV) words are a common source of errors in cross language
  47. * information retrieval. Bilingual dictionaries are often limited in their coverage
  48. * of named- entities, numbers, technical terms and acronyms. There is a need to
  49. * generate translations for these "on-the-fly" or at query time.
  50. *
  51. * A significant proportion of OOV words are named entities and technical terms.
  52. * Typical analyses find around 50% of OOV words to be named entities. Yet these
  53. * can be the most important words in the queries. Cross language retrieval
  54. * performance (average precision) reduced more than 50% when named entities in the
  55. * queries were not translated.
  56. *
  57. * When the query language and the document language share the same alphabet it may
  58. * be sufficient to use the OOV word as its own translation. However, when the two
  59. * languages have different alphabets, the query term must somehow be rendered in
  60. * the orthography of the other language. The process of converting a word from one
  61. * orthography into another is called transliteration.
  62. *
  63. * Foreign words often occur in Arabic text as transliteration. This is the case for
  64. * many categories of foreign words, not just proper names but also technical terms
  65. * such as caviar, telephone and internet.
  66. *
  67. * Example:
  68. * <code>
  69. * include('./I18N/Arabic.php');
  70. * $obj = new I18N_Arabic('Transliteration');
  71. *
  72. * $ar_word_1 = $obj->en2ar($en_word_1);
  73. * $en_word_2 = $obj->ar2en($ar_word_2);
  74. * </code>
  75. *
  76. * @category I18N
  77. * @package I18N_Arabic
  78. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  79. * @copyright 2006-2013 Khaled Al-Sham'aa
  80. *
  81. * @license LGPL <http://www.gnu.org/licenses/lgpl.txt>
  82. * @link http://www.ar-php.org
  83. */
  84. // New in PHP V5.3: Namespaces
  85. // namespace I18N\Arabic;
  86. //
  87. // $obj = new I18N\Arabic\Transliteration();
  88. //
  89. // use I18N\Arabic;
  90. // $obj = new Arabic\Transliteration();
  91. //
  92. // use I18N\Arabic\Transliteration as Transliteration;
  93. // $obj = new Transliteration();
  94. /**
  95. * This PHP class transliterate English words into Arabic
  96. *
  97. * @category I18N
  98. * @package I18N_Arabic
  99. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  100. * @copyright 2006-2013 Khaled Al-Sham'aa
  101. *
  102. * @license LGPL <http://www.gnu.org/licenses/lgpl.txt>
  103. * @link http://www.ar-php.org
  104. */
  105. class I18N_Arabic_Transliteration
  106. {
  107. private static $_arFinePatterns = array("/'+/u", "/([\- ])'/u", '/(.)#/u');
  108. private static $_arFineReplacements = array("'", '\\1', "\\1'\\1");
  109. private static $_en2arPregSearch = array();
  110. private static $_en2arPregReplace = array();
  111. private static $_en2arStrSearch = array();
  112. private static $_en2arStrReplace = array();
  113. private static $_ar2enPregSearch = array();
  114. private static $_ar2enPregReplace = array();
  115. private static $_ar2enStrSearch = array();
  116. private static $_ar2enStrReplace = array();
  117. private static $_diariticalSearch = array();
  118. private static $_diariticalReplace = array();
  119. private static $_iso233Search = array();
  120. private static $_iso233Replace = array();
  121. private static $_rjgcSearch = array();
  122. private static $_rjgcReplace = array();
  123. private static $_sesSearch = array();
  124. private static $_sesReplace = array();
  125. /**
  126. * Loads initialize values
  127. *
  128. * @ignore
  129. */
  130. public function __construct()
  131. {
  132. $xml = simplexml_load_file(dirname(__FILE__).'/data/Transliteration.xml');
  133. foreach ($xml->xpath("//preg_replace[@function='ar2en']/pair") as $pair) {
  134. array_push(self::$_ar2enPregSearch, (string)$pair->search);
  135. array_push(self::$_ar2enPregReplace, (string)$pair->replace);
  136. }
  137. foreach (
  138. $xml->xpath("//str_replace[@function='diaritical']/pair") as $pair
  139. ) {
  140. array_push(self::$_diariticalSearch, (string)$pair->search);
  141. array_push(self::$_diariticalReplace, (string)$pair->replace);
  142. }
  143. foreach ($xml->xpath("//str_replace[@function='ISO233']/pair") as $pair) {
  144. array_push(self::$_iso233Search, (string)$pair->search);
  145. array_push(self::$_iso233Replace, (string)$pair->replace);
  146. }
  147. foreach ($xml->xpath("//str_replace[@function='RJGC']/pair") as $pair) {
  148. array_push(self::$_rjgcSearch, (string)$pair->search);
  149. array_push(self::$_rjgcReplace, (string)$pair->replace);
  150. }
  151. foreach ($xml->xpath("//str_replace[@function='SES']/pair") as $pair) {
  152. array_push(self::$_sesSearch, (string)$pair->search);
  153. array_push(self::$_sesReplace, (string)$pair->replace);
  154. }
  155. foreach ($xml->xpath("//str_replace[@function='ar2en']/pair") as $pair) {
  156. array_push(self::$_ar2enStrSearch, (string)$pair->search);
  157. array_push(self::$_ar2enStrReplace, (string)$pair->replace);
  158. }
  159. foreach ($xml->xpath("//preg_replace[@function='en2ar']/pair") as $pair) {
  160. array_push(self::$_en2arPregSearch, (string)$pair->search);
  161. array_push(self::$_en2arPregReplace, (string)$pair->replace);
  162. }
  163. foreach ($xml->xpath("//str_replace[@function='en2ar']/pair") as $pair) {
  164. array_push(self::$_en2arStrSearch, (string)$pair->search);
  165. array_push(self::$_en2arStrReplace, (string)$pair->replace);
  166. }
  167. }
  168. /**
  169. * Transliterate English string into Arabic by render them in the
  170. * orthography of the Arabic language
  171. *
  172. * @param string $string English string you want to transliterate
  173. *
  174. * @return String Out of vocabulary English string in Arabic characters
  175. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  176. */
  177. public static function en2ar($string)
  178. {
  179. $string = strtolower($string);
  180. $words = explode(' ', $string);
  181. $string = '';
  182. foreach ($words as $word) {
  183. $word = preg_replace(
  184. self::$_en2arPregSearch,
  185. self::$_en2arPregReplace, $word
  186. );
  187. $word = str_replace(
  188. self::$_en2arStrSearch,
  189. self::$_en2arStrReplace,
  190. $word
  191. );
  192. $string .= ' ' . $word;
  193. }
  194. return $string;
  195. }
  196. /**
  197. * Transliterate Arabic string into English by render them in the
  198. * orthography of the English language
  199. *
  200. * @param string $string Arabic string you want to transliterate
  201. * @param string $standard Transliteration standard, default is UNGEGN
  202. * and possible values are [UNGEGN, UNGEGN+, RJGC,
  203. * SES, ISO233]
  204. *
  205. * @return String Out of vocabulary Arabic string in English characters
  206. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  207. */
  208. public static function ar2en($string, $standard='UNGEGN')
  209. {
  210. //$string = str_replace('ة ال', 'tul', $string);
  211. $words = explode(' ', $string);
  212. $string = '';
  213. for ($i=0; $i<count($words)-1; $i++) {
  214. $words[$i] = str_replace('ة', 'ت', $words[$i]);
  215. }
  216. foreach ($words as $word) {
  217. $temp = $word;
  218. if ($standard == 'UNGEGN+') {
  219. $temp = str_replace(
  220. self::$_diariticalSearch,
  221. self::$_diariticalReplace,
  222. $temp
  223. );
  224. } else if ($standard == 'RJGC') {
  225. $temp = str_replace(
  226. self::$_diariticalSearch,
  227. self::$_diariticalReplace,
  228. $temp
  229. );
  230. $temp = str_replace(
  231. self::$_rjgcSearch,
  232. self::$_rjgcReplace,
  233. $temp
  234. );
  235. } else if ($standard == 'SES') {
  236. $temp = str_replace(
  237. self::$_diariticalSearch,
  238. self::$_diariticalReplace,
  239. $temp
  240. );
  241. $temp = str_replace(
  242. self::$_sesSearch,
  243. self::$_sesReplace,
  244. $temp
  245. );
  246. } else if ($standard == 'ISO233') {
  247. $temp = str_replace(
  248. self::$_iso233Search,
  249. self::$_iso233Replace,
  250. $temp
  251. );
  252. }
  253. $temp = preg_replace(
  254. self::$_ar2enPregSearch,
  255. self::$_ar2enPregReplace,
  256. $temp
  257. );
  258. $temp = str_replace(
  259. self::$_ar2enStrSearch,
  260. self::$_ar2enStrReplace,
  261. $temp
  262. );
  263. $temp = preg_replace(
  264. self::$_arFinePatterns,
  265. self::$_arFineReplacements,
  266. $temp
  267. );
  268. if (preg_match('/[a-z]/', mb_substr($temp, 0, 1))) {
  269. $temp = ucwords($temp);
  270. }
  271. $pos = strpos($temp, '-');
  272. if ($pos > 0) {
  273. if (preg_match('/[a-z]/', mb_substr($temp, $pos+1, 1))) {
  274. $temp2 = substr($temp, 0, $pos);
  275. $temp2 .= '-'.strtoupper($temp[$pos+1]);
  276. $temp2 .= substr($temp, $pos+2);
  277. } else {
  278. $temp2 = $temp;
  279. }
  280. } else {
  281. $temp2 = $temp;
  282. }
  283. $string .= ' ' . $temp2;
  284. }
  285. return $string;
  286. }
  287. /**
  288. * Render numbers in given string using HTML entities that will show them as
  289. * Arabic digits (i.e. 1, 2, 3, etc.) whatever browser language settings are
  290. * (if browser supports UTF-8 character set).
  291. *
  292. * @param string $string String includes some digits here or there
  293. *
  294. * @return String Original string after replace digits by HTML entities that
  295. * will show given number using Indian digits
  296. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  297. */
  298. public static function enNum($string)
  299. {
  300. $html = '';
  301. $digits = str_split("$string");
  302. foreach ($digits as $digit) {
  303. $html .= preg_match('/\d/', $digit) ? "&#x3$digit;" : $digit;
  304. }
  305. return $html;
  306. }
  307. /**
  308. * Render numbers in given string using HTML entities that will show them as
  309. * Indian digits (i.e. ١, ٢, ٣, etc.) whatever browser language settings are
  310. * (if browser supports UTF-8 character set).
  311. *
  312. * @param string $string String includes some digits here or there
  313. *
  314. * @return String Original string after replace digits by HTML entities that
  315. * will show given number using Arabic digits
  316. * @author Khaled Al-Sham'aa <khaled@ar-php.org>
  317. */
  318. public static function arNum($string)
  319. {
  320. $html = '';
  321. $digits = str_split("$string");
  322. foreach ($digits as $digit) {
  323. $html .= preg_match('/\d/', $digit) ? "&#x066$digit;" : $digit;
  324. }
  325. return $html;
  326. }
  327. }