PageRenderTime 49ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/build/commands/CldrCommand.php

https://bitbucket.org/penkoh/yii
PHP | 509 lines | 400 code | 56 blank | 53 comment | 60 complexity | a9cc0e1e20ef7faf6842581f5225cd64 MD5 | raw file
Possible License(s): LGPL-2.1, BSD-3-Clause
  1. <?php
  2. /**
  3. * CldrCommand class file.
  4. *
  5. * @author Qiang Xue <qiang.xue@gmail.com>
  6. * @link http://www.yiiframework.com/
  7. * @copyright Copyright &copy; 2008-2011 Yii Software LLC
  8. * @license http://www.yiiframework.com/license/
  9. */
  10. /**
  11. * CldrCommand converts the locale data from the {@link http://www.unicode.org/cldr/ CLDR project}
  12. * to PHP scripts so that they can be more easily used in PHP programming.
  13. *
  14. * The script respects locale inheritance so that the PHP data for a child locale
  15. * will contain all its parents' locale data if they are not specified in the child locale.
  16. * Therefore, to import the data for a locale, only the PHP script for that particular locale
  17. * needs to be included.
  18. *
  19. * Note, only the data relevant to number and date formatting are extracted.
  20. * Each PHP script file is named as the corresponding locale ID in lower case.
  21. *
  22. * @author Qiang Xue <qiang.xue@gmail.com>
  23. * @package system.build
  24. * @since 1.0
  25. */
  26. class CldrCommand extends CConsoleCommand
  27. {
  28. protected $pluralRules = array();
  29. public function getHelp()
  30. {
  31. return <<<EOD
  32. USAGE
  33. build cldr <data-path>
  34. DESCRIPTION
  35. This command converts the locale data from the CLDR project
  36. to PHP scripts so that they can be more easily used in PHP programming.
  37. The script respects locale inheritance so that the PHP data for
  38. a child locale will contain all its parent locale data if they are
  39. not specified in the child locale. Therefore, to import the data
  40. for a locale, only the PHP script for that particular locale needs
  41. to be included.
  42. Note, only the data relevant to number and date formatting are extracted.
  43. Each PHP script file is named as the corresponding locale ID in lower case.
  44. The resulting PHP scripts are created under the same directory that
  45. contains the original CLDR data.
  46. PARAMETERS
  47. * data-path: required, the original CLDR data directory. This
  48. directory should contain "main" subdirectory with hundreds of XML files
  49. and "supplemental" subdirectory with "plurals.xml".
  50. EOD;
  51. }
  52. /**
  53. * Execute the action.
  54. * @param array command line parameters specific for this command
  55. */
  56. public function run($args)
  57. {
  58. if(!isset($args[0]))
  59. {
  60. $cldrPath = dirname(__FILE__).'/../temp';
  61. $args[0] = $cldrPath.'/common';
  62. //$this->usageError('the CLDR data directory is not specified.');
  63. }
  64. if(!is_dir($basePath=$args[0]))
  65. {
  66. if (!mkdir($basePath, 0777, true))
  67. {
  68. $this->usageError("Directory '$basePath' can not be created.");
  69. }
  70. //$this->usageError("Directory '$basePath' does not exist.");
  71. }
  72. if(!is_dir($path=$basePath.DIRECTORY_SEPARATOR.'main'))
  73. {
  74. // look for zip file
  75. if(!is_file($zipFile=$cldrPath.'/core.zip')) {
  76. // download latest core.zip file
  77. $latestUrl = 'http://www.unicode.org/Public/cldr/latest/core.zip';
  78. $ch = curl_init($latestUrl);
  79. $fp = fopen($zipFile, "w");
  80. curl_setopt($ch, CURLOPT_FILE, $fp);
  81. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
  82. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  83. if (curl_exec($ch)===FALSE)
  84. {
  85. $this->usageError("Failed to download from '$latestUrl'.");
  86. };
  87. curl_close($ch);
  88. fclose($fp);
  89. }
  90. // unzip file
  91. $zip = new ZipArchive;
  92. if ($zip->open($zipFile) === TRUE)
  93. {
  94. $zip->extractTo($cldrPath);
  95. $zip->close();
  96. }
  97. else
  98. {
  99. $this->usageError("Failed to unzip '$zipFile'.");
  100. }
  101. }
  102. if(!is_file($pluralFile=$basePath.DIRECTORY_SEPARATOR.'supplemental'.DIRECTORY_SEPARATOR.'plurals.xml'))
  103. $this->usageError("File '$pluralFile' does not exist.");
  104. // parse plural.xml before locale files
  105. $pluralXml = simplexml_load_file($pluralFile);
  106. $this->parsePluralRules($pluralXml);
  107. // collect XML files to be processed
  108. $options=array(
  109. 'exclude'=>array('.gitignore'),
  110. 'fileTypes'=>array('xml'),
  111. 'level'=>0,
  112. );
  113. $files=CFileHelper::findFiles(realpath($path),$options);
  114. $sourceFiles=array();
  115. foreach($files as $file)
  116. $sourceFiles[basename($file)]=$file;
  117. // sort by file name so that inheritances can be processed properly
  118. ksort($sourceFiles);
  119. // process root first because it is inherited by all
  120. if(isset($sourceFiles['root.xml']))
  121. {
  122. $this->process($sourceFiles['root.xml']);
  123. unset($sourceFiles['root.xml']);
  124. foreach($sourceFiles as $sourceFile)
  125. $this->process($sourceFile);
  126. // clean up temporary files
  127. function rrmdir($path)
  128. {
  129. return is_file($path)?
  130. @unlink($path):
  131. array_map('rrmdir',glob($path.'/*'))==@rmdir($path);
  132. }
  133. rrmdir($cldrPath);
  134. }
  135. else
  136. die('Unable to find the required root.xml under CLDR "main" data directory.');
  137. }
  138. protected function process($path)
  139. {
  140. $source=basename($path);
  141. echo "processing $source...";
  142. $dir=dirname($path);
  143. $locale=substr($source,0,-4);
  144. $target=$locale.'.php';
  145. $i18nDataPath = dirname(__FILE__).'/../../framework/i18n/data';
  146. // retrieve parent data first
  147. if(($pos=strrpos($locale,'_'))!==false)
  148. $data=require($i18nDataPath.DIRECTORY_SEPARATOR.strtolower(substr($locale,0,$pos)).'.php');
  149. else if($locale!=='root')
  150. $data=require($i18nDataPath.DIRECTORY_SEPARATOR.'root.php');
  151. else
  152. $data=array();
  153. $xml=simplexml_load_file($path);
  154. $this->parseVersion($xml,$data);
  155. $this->parseNumberSymbols($xml,$data);
  156. $this->parseNumberFormats($xml,$data);
  157. $this->parseCurrencySymbols($xml,$data);
  158. $this->parseLanguages($xml,$data);
  159. $this->parseScripts($xml,$data);
  160. $this->parseTerritories($xml,$data);
  161. $this->parseMonthNames($xml,$data);
  162. $this->parseWeekDayNames($xml,$data);
  163. $this->parseEraNames($xml,$data);
  164. $this->parseDateFormats($xml,$data);
  165. $this->parseTimeFormats($xml,$data);
  166. $this->parseDateTimeFormat($xml,$data);
  167. $this->parsePeriodNames($xml,$data);
  168. $this->parseOrientation($xml,$data);
  169. $this->addPluralRules($data, $locale);
  170. $data=str_replace("\r",'',var_export($data,true));
  171. $locale=substr(basename($path),0,-4);
  172. $content=<<<EOD
  173. /**
  174. * Locale data for '$locale'.
  175. *
  176. * This file is automatically generated by yiic cldr command.
  177. *
  178. * Copyright © 1991-2007 Unicode, Inc. All rights reserved.
  179. * Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
  180. *
  181. * Copyright © 2008-2011 Yii Software LLC (http://www.yiiframework.com/license/)
  182. */
  183. return $data;
  184. EOD;
  185. file_put_contents($i18nDataPath.DIRECTORY_SEPARATOR.strtolower($locale).'.php',"<?php\n".$content."\n");
  186. echo "done.\n";
  187. }
  188. protected function parseVersion($xml,&$data)
  189. {
  190. preg_match('/[\d\.]+/',(string)$xml->identity->version['number'],$matches);
  191. $data['version']=$matches[0];
  192. }
  193. protected function parseNumberSymbols($xml,&$data)
  194. {
  195. foreach($xml->xpath('/ldml/numbers/symbols/*') as $symbol)
  196. {
  197. $name=$symbol->getName();
  198. if(!isset($data['numberSymbols'][$name]) || (string)$symbol['draft']==='')
  199. $data['numberSymbols'][$name]=(string)$symbol;
  200. }
  201. }
  202. protected function parseNumberFormats($xml,&$data)
  203. {
  204. $pattern=$xml->xpath('/ldml/numbers/decimalFormats/decimalFormatLength/decimalFormat/pattern');
  205. if(isset($pattern[0]))
  206. $data['decimalFormat']=(string)$pattern[0];
  207. $pattern=$xml->xpath('/ldml/numbers/scientificFormats/scientificFormatLength/scientificFormat/pattern');
  208. if(isset($pattern[0]))
  209. $data['scientificFormat']=(string)$pattern[0];
  210. $pattern=$xml->xpath('/ldml/numbers/percentFormats/percentFormatLength/percentFormat/pattern');
  211. if(isset($pattern[0]))
  212. $data['percentFormat']=(string)$pattern[0];
  213. $pattern=$xml->xpath('/ldml/numbers/currencyFormats/currencyFormatLength/currencyFormat/pattern');
  214. if(isset($pattern[0]))
  215. $data['currencyFormat']=(string)$pattern[0];
  216. }
  217. protected function parseCurrencySymbols($xml,&$data)
  218. {
  219. $currencies=$xml->xpath('/ldml/numbers/currencies/currency');
  220. foreach($currencies as $currency)
  221. {
  222. if((string)$currency->symbol!='')
  223. $data['currencySymbols'][(string)$currency['type']]=(string)$currency->symbol;
  224. }
  225. }
  226. protected function parseLanguages($xml,&$data)
  227. {
  228. $languages=$xml->xpath('/ldml/localeDisplayNames/languages/language');
  229. foreach($languages as $language)
  230. {
  231. if((string)$language!='')
  232. $data['languages'][strtolower(str_replace('-','_',(string)$language['type']))]=(string)$language;
  233. }
  234. }
  235. protected function parseScripts($xml,&$data)
  236. {
  237. $scripts=$xml->xpath('/ldml/localeDisplayNames/scripts/script');
  238. foreach($scripts as $script)
  239. {
  240. if((string)$script!='')
  241. $data['scripts'][strtolower(str_replace('-','_',(string)$script['type']))]=(string)$script;
  242. }
  243. }
  244. protected function parseTerritories($xml,&$data)
  245. {
  246. $territories=$xml->xpath('/ldml/localeDisplayNames/territories/territory');
  247. foreach($territories as $territory)
  248. {
  249. if((string)$territory!='')
  250. $data['territories'][strtolower(str_replace('-','_',(string)$territory['type']))]=(string)$territory;
  251. }
  252. }
  253. protected function parseMonthNames($xml,&$data)
  254. {
  255. $monthTypes=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/months/monthContext[@type=\'format\']/monthWidth');
  256. if(is_array($monthTypes))
  257. {
  258. foreach($monthTypes as $monthType)
  259. {
  260. $names=array();
  261. foreach($monthType->xpath('month') as $month)
  262. $names[(string)$month['type']]=(string)$month;
  263. if($names!==array())
  264. foreach($names as $type=>$name)
  265. $data['monthNames'][(string)$monthType['type']][$type]=$name;
  266. }
  267. }
  268. if(!isset($data['monthNames']['abbreviated']))
  269. $data['monthNames']['abbreviated']=$data['monthNames']['wide'];
  270. $monthTypes=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/months/monthContext[@type=\'stand-alone\']/monthWidth');
  271. if(is_array($monthTypes))
  272. {
  273. foreach($monthTypes as $monthType)
  274. {
  275. $names=array();
  276. foreach($monthType->xpath('month') as $month)
  277. $names[(string)$month['type']]=(string)$month;
  278. if($names!==array())
  279. foreach($names as $type=>$name)
  280. $data['monthNamesSA'][(string)$monthType['type']][$type]=$name;
  281. }
  282. }
  283. }
  284. protected function parseWeekDayNames($xml,&$data)
  285. {
  286. static $mapping=array(
  287. 'sun'=>0,
  288. 'mon'=>1,
  289. 'tue'=>2,
  290. 'wed'=>3,
  291. 'thu'=>4,
  292. 'fri'=>5,
  293. 'sat'=>6,
  294. );
  295. $dayTypes=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/days/dayContext[@type=\'format\']/dayWidth');
  296. if(is_array($dayTypes))
  297. {
  298. foreach($dayTypes as $dayType)
  299. {
  300. $names=array();
  301. foreach($dayType->xpath('day') as $day)
  302. $names[$mapping[(string)$day['type']]]=(string)$day;
  303. if($names!==array())
  304. $data['weekDayNames'][(string)$dayType['type']]=$names;
  305. }
  306. }
  307. if(!isset($data['weekDayNames']['abbreviated']))
  308. $data['weekDayNames']['abbreviated']=$data['weekDayNames']['wide'];
  309. $dayTypes=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/days/dayContext[@type=\'stand-alone\']/dayWidth');
  310. if(is_array($dayTypes))
  311. {
  312. foreach($dayTypes as $dayType)
  313. {
  314. $names=array();
  315. foreach($dayType->xpath('day') as $day)
  316. $names[$mapping[(string)$day['type']]]=(string)$day;
  317. if($names!==array())
  318. $data['weekDayNamesSA'][(string)$dayType['type']]=$names;
  319. }
  320. }
  321. }
  322. protected function parsePeriodNames($xml,&$data)
  323. {
  324. $am=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/dayPeriods/dayPeriodContext[@type=\'format\']/dayPeriodWidth[@type=\'wide\']/dayPeriod[@type=\'am\']');
  325. if(is_array($am) && isset($am[0]))
  326. $data['amName']=(string)$am[0];
  327. $pm=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/dayPeriods/dayPeriodContext[@type=\'format\']/dayPeriodWidth[@type=\'wide\']/dayPeriod[@type=\'pm\']');
  328. if(is_array($pm) && isset($pm[0]))
  329. $data['pmName']=(string)$pm[0];
  330. }
  331. protected function parseEraNames($xml,&$data)
  332. {
  333. $era=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/eras/eraAbbr');
  334. if(is_array($era) && isset($era[0]))
  335. {
  336. foreach($era[0]->xpath('era') as $e)
  337. $data['eraNames']['abbreviated'][(string)$e['type']]=(string)$e;
  338. }
  339. $era=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/eras/eraNames');
  340. if(is_array($era) && isset($era[0]))
  341. {
  342. foreach($era[0]->xpath('era') as $e)
  343. $data['eraNames']['wide'][(string)$e['type']]=(string)$e;
  344. }
  345. else if(!isset($data['eraNames']['wide']))
  346. $data['eraNames']['wide']=$data['eraNames']['abbreviated'];
  347. $era=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/eras/eraNarrow');
  348. if(is_array($era) && isset($era[0]))
  349. {
  350. foreach($era[0]->xpath('era') as $e)
  351. $data['eraNames']['narrow'][(string)$e['type']]=(string)$e;
  352. }
  353. else if(!isset($data['eraNames']['narrow']))
  354. $data['eraNames']['narrow']=$data['eraNames']['abbreviated'];
  355. }
  356. protected function parseDateFormats($xml,&$data)
  357. {
  358. $types=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/dateFormats/dateFormatLength');
  359. if(is_array($types))
  360. {
  361. foreach($types as $type)
  362. {
  363. $pattern=$type->xpath('dateFormat/pattern');
  364. $data['dateFormats'][(string)$type['type']]=(string)$pattern[0];
  365. }
  366. }
  367. }
  368. protected function parseTimeFormats($xml,&$data)
  369. {
  370. $types=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/timeFormats/timeFormatLength');
  371. if(is_array($types))
  372. {
  373. foreach($types as $type)
  374. {
  375. $pattern=$type->xpath('timeFormat/pattern');
  376. $data['timeFormats'][(string)$type['type']]=(string)$pattern[0];
  377. }
  378. }
  379. }
  380. protected function parseDateTimeFormat($xml,&$data)
  381. {
  382. $types=$xml->xpath('/ldml/dates/calendars/calendar[@type=\'gregorian\']/dateTimeFormats/dateTimeFormatLength');
  383. if(is_array($types) && isset($types[0]))
  384. {
  385. $picked = $types[0];
  386. foreach($types as $element) {
  387. $attrs = $element->attributes();
  388. if($attrs['type'] == 'medium')
  389. {
  390. $picked = $element;
  391. break;
  392. }
  393. }
  394. $pattern=$picked->xpath('dateTimeFormat/pattern');
  395. $data['dateTimeFormat']=(string)$pattern[0];
  396. }
  397. }
  398. protected function parseOrientation($xml,&$data)
  399. {
  400. $orientation=$xml->xpath('/ldml/layout/orientation[@characters=\'right-to-left\']');
  401. if(!empty($orientation))
  402. $data['orientation']='rtl';
  403. else if(!isset($data['orientation']))
  404. $data['orientation']='ltr';
  405. }
  406. /**
  407. * @see http://cldr.unicode.org/index/cldr-spec/plural-rules
  408. */
  409. protected function parsePluralRules($xml)
  410. {
  411. echo "Processing plural.xml...";
  412. $patterns = array(
  413. '/\s+is\s+not\s+/i'=>'!=', //is not
  414. '/\s+is\s+/i'=>'==', //is
  415. '/n\s+mod\s+(\d+)/i'=>'fmod(n,$1)', //mod (CLDR's "mod" is "fmod()", not "%")
  416. '/^(.*?)\s+not\s+(?:in|within)\s+(\d+)\.\.(\d+)/i'=>'($1<$2||$1>$3)', //not in, not within
  417. '/^(.*?)\s+within\s+(\d+)\.\.(\d+)/i'=>'($1>=$2&&$1<=$3)', //within
  418. '/^(.*?)\s+in\s+(\d+)\.\.(\d+)/i'=>'($1>=$2&&$1<=$3&&fmod($1,1)==0)', //in
  419. );
  420. foreach($xml->plurals->pluralRules as $node)
  421. {
  422. $attributes=$node->attributes();
  423. $locales=explode(' ',$attributes['locales']);
  424. $rules=array();
  425. if(!empty($node->pluralRule))
  426. {
  427. foreach($node->pluralRule as $rule)
  428. {
  429. $expr_or=preg_split('/\s+or\s+/i', $rule);
  430. foreach ($expr_or as $key_or => $val_or)
  431. {
  432. $expr_and=preg_split('/\s+and\s+/i', $val_or);
  433. $expr_and=preg_replace(array_keys($patterns), array_values($patterns), $expr_and);
  434. $expr_or[$key_or]=implode('&&', $expr_and);
  435. }
  436. $rules[]=implode('||', $expr_or);
  437. }
  438. //append last rule to match "other"
  439. $rules[] = 'true';
  440. foreach ($locales as $locale)
  441. {
  442. $this->pluralRules[$locale] = $rules;
  443. }
  444. }
  445. }
  446. echo "Done.\n";
  447. }
  448. protected function addPluralRules(&$data, $locale)
  449. {
  450. if (!empty($this->pluralRules[$locale]))
  451. $data['pluralRules']=$this->pluralRules[$locale];
  452. }
  453. }