PageRenderTime 44ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/php-lithium/libraries/lithium/g11n/catalog/adapter/Gettext.php

https://github.com/yasuhiroki/FrameworkBenchmarks
PHP | 519 lines | 295 code | 47 blank | 177 comment | 24 complexity | 18bdf6ae878f58584115230ed0cc866b MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.0, CC0-1.0, BSD-3-Clause, MIT, Apache-2.0
  1. <?php
  2. /**
  3. * Lithium: the most rad php framework
  4. *
  5. * @copyright Copyright 2013, Union of RAD (http://union-of-rad.org)
  6. * @license http://opensource.org/licenses/bsd-license.php The BSD License
  7. */
  8. namespace lithium\g11n\catalog\adapter;
  9. use RangeException;
  10. use lithium\core\ConfigException;
  11. /**
  12. * The `Gettext` class is an adapter for reading and writing PO and MO files without the
  13. * requirement of having the gettext extension enabled or installed. Moreover it doesn't
  14. * require the usage of the non thread safe `setlocale()`.
  15. *
  16. * The adapter works with the directory structure below. The example shows the structure
  17. * for the directory as given by the `'path'` configuration setting. It closely ressembles
  18. * the standard gettext directory structure with a few slight adjustments to the way
  19. * templates are being named.
  20. *
  21. * {{{
  22. * resources/g11n/po
  23. * ├── <locale>
  24. * | ├── LC_MESSAGES
  25. * | | ├── default.po
  26. * | | ├── default.mo
  27. * | | ├── <scope>.po
  28. * | | └── <scope>.mo
  29. * | ├── LC_VALIDATION
  30. * | | └── ...
  31. * | └── ...
  32. * ├── <locale>
  33. * | └── ...
  34. * ├── message_default.pot
  35. * ├── message_<scope>.pot
  36. * ├── validation_default.pot
  37. * ├── validation_<scope>.pot
  38. * └── ...
  39. * }}}
  40. *
  41. * @see lithium\g11n\Locale
  42. * @link http://php.net/setlocale PHP Manual: setlocale()
  43. * @link http://www.gnu.org/software/gettext/manual/gettext.html GNU Gettext Utilities
  44. */
  45. class Gettext extends \lithium\g11n\catalog\Adapter {
  46. /**
  47. * Magic used for validating the format of a MO file as well as
  48. * detecting if the machine used to create that file was little endian.
  49. *
  50. * @see lithium\g11n\catalog\adapter\Gettext::_parseMo()
  51. * @var float
  52. */
  53. const MO_LITTLE_ENDIAN_MAGIC = 0x950412de;
  54. /**
  55. * Magic used for validating the format of a MO file as well as
  56. * detecting if the machine used to create that file was big endian.
  57. *
  58. * @see lithium\g11n\catalog\adapter\Gettext::_parseMo()
  59. * @var float
  60. */
  61. const MO_BIG_ENDIAN_MAGIC = 0xde120495;
  62. /**
  63. * The size of the header of a MO file in bytes.
  64. *
  65. * @see lithium\g11n\catalog\adapter\Gettext::_parseMo()
  66. * @var integer Number of bytes.
  67. */
  68. const MO_HEADER_SIZE = 28;
  69. /**
  70. * Constructor.
  71. *
  72. * @param array $config Available configuration options are:
  73. * - `'path'`: The path to the directory holding the data.
  74. */
  75. public function __construct(array $config = array()) {
  76. $defaults = array('path' => null);
  77. parent::__construct($config + $defaults);
  78. }
  79. /**
  80. * Initializer. Checks if the configured path exists.
  81. *
  82. * @return void
  83. * @throws ConfigException
  84. */
  85. protected function _init() {
  86. parent::_init();
  87. if (!is_dir($this->_config['path'])) {
  88. $message = "Gettext directory does not exist at path `{$this->_config['path']}`.";
  89. throw new ConfigException($message);
  90. }
  91. }
  92. /**
  93. * Reads data.
  94. *
  95. * @param string $category A category.
  96. * @param string $locale A locale identifier.
  97. * @param string $scope The scope for the current operation.
  98. * @return array
  99. */
  100. public function read($category, $locale, $scope) {
  101. $files = $this->_files($category, $locale, $scope);
  102. foreach ($files as $file) {
  103. $method = '_parse' . ucfirst(pathinfo($file, PATHINFO_EXTENSION));
  104. if (!file_exists($file) || !is_readable($file)) {
  105. continue;
  106. }
  107. $stream = fopen($file, 'rb');
  108. $data = $this->invokeMethod($method, array($stream));
  109. fclose($stream);
  110. if ($data) {
  111. $data['pluralRule'] = array(
  112. 'id' => 'pluralRule',
  113. 'translated' => function($count) {
  114. return $count !== 1;
  115. }
  116. );
  117. return $data;
  118. }
  119. }
  120. }
  121. /**
  122. * Writes data.
  123. *
  124. * @param string $category A category.
  125. * @param string $locale A locale identifier.
  126. * @param string $scope The scope for the current operation.
  127. * @param array $data The data to write.
  128. * @return boolean
  129. */
  130. public function write($category, $locale, $scope, array $data) {
  131. $files = $this->_files($category, $locale, $scope);
  132. foreach ($files as $file) {
  133. $method = '_compile' . ucfirst(pathinfo($file, PATHINFO_EXTENSION));
  134. if (!$stream = fopen($file, 'wb')) {
  135. return false;
  136. }
  137. $this->invokeMethod($method, array($stream, $data));
  138. fclose($stream);
  139. }
  140. return true;
  141. }
  142. /**
  143. * Returns absolute paths to files according to configuration.
  144. *
  145. * @param string $category
  146. * @param string $locale
  147. * @param string $scope
  148. * @return array
  149. */
  150. protected function _files($category, $locale, $scope) {
  151. $path = $this->_config['path'];
  152. $scope = $scope ?: 'default';
  153. if (($pos = strpos($category, 'Template')) !== false) {
  154. $category = substr($category, 0, $pos);
  155. return array("{$path}/{$category}_{$scope}.pot");
  156. }
  157. if ($category === 'message') {
  158. $category = 'messages';
  159. }
  160. $category = strtoupper($category);
  161. return array(
  162. "{$path}/{$locale}/LC_{$category}/{$scope}.mo",
  163. "{$path}/{$locale}/LC_{$category}/{$scope}.po"
  164. );
  165. }
  166. /**
  167. * Parses portable object (PO) format.
  168. *
  169. * This parser sacrifices some features of the reference implementation the
  170. * differences to that implementation are as follows.
  171. * - No support for comments spanning multiple lines.
  172. * - Translator and extracted comments are treated as being the same type.
  173. * - Message IDs are allowed to have other encodings as just US-ASCII.
  174. *
  175. * Items with an empty id are ignored. For more information see `_merge()`.
  176. *
  177. * @param resource $stream
  178. * @return array
  179. */
  180. protected function _parsePo($stream) {
  181. $defaults = array(
  182. 'ids' => array(),
  183. 'translated' => null,
  184. 'flags' => array(),
  185. 'comments' => array(),
  186. 'occurrences' => array()
  187. );
  188. $data = array();
  189. $item = $defaults;
  190. while ($line = fgets($stream)) {
  191. $line = trim($line);
  192. if ($line === '') {
  193. $data = $this->_merge($data, $item);
  194. $item = $defaults;
  195. } elseif (substr($line, 0, 3) === '#~ ') {
  196. $item['flags']['obsolete'] = true;
  197. } elseif (substr($line, 0, 3) === '#, ') {
  198. $item['flags'][substr($line, 3)] = true;
  199. } elseif (substr($line, 0, 3) === '#: ') {
  200. $item['occurrences'][] = array(
  201. 'file' => strtok(substr($line, 3), ':'),
  202. 'line' => strtok(':')
  203. );
  204. } elseif (substr($line, 0, 3) === '#. ') {
  205. $item['comments'][] = substr($line, 3);
  206. } elseif ($line[0] === '#') {
  207. $item['comments'][] = ltrim(substr($line, 1));
  208. } elseif (substr($line, 0, 7) === 'msgid "') {
  209. $item['ids']['singular'] = substr($line, 7, -1);
  210. } elseif (substr($line, 0, 9) === 'msgctxt "') {
  211. $item['context'] = substr($line, 9, -1);
  212. } elseif (substr($line, 0, 8) === 'msgstr "') {
  213. $item['translated'] = substr($line, 8, -1);
  214. } elseif ($line[0] === '"') {
  215. $continues = isset($item['translated']) ? 'translated' : 'ids';
  216. if (is_array($item[$continues])) {
  217. end($item[$continues]);
  218. $item[$continues][key($item[$continues])] .= substr($line, 1, -1);
  219. } else {
  220. $item[$continues] .= substr($line, 1, -1);
  221. }
  222. } elseif (substr($line, 0, 14) === 'msgid_plural "') {
  223. $item['ids']['plural'] = substr($line, 14, -1);
  224. } elseif (substr($line, 0, 7) === 'msgstr[') {
  225. $item['translated'][(integer) substr($line, 7, 1)] = substr($line, 11, -1);
  226. }
  227. }
  228. return $this->_merge($data, $item);
  229. }
  230. /**
  231. * Parses portable object template (POT) format.
  232. *
  233. * @param resource $stream
  234. * @return array
  235. */
  236. protected function _parsePot($stream) {
  237. return $this->_parsePo($stream);
  238. }
  239. /**
  240. * Parses machine object (MO) format, independent of the machine's endian it
  241. * was created on. Both 32bit and 64bit systems are supported.
  242. *
  243. * @param resource $stream
  244. * @return array
  245. * @throws RangeException If stream content has an invalid format.
  246. */
  247. protected function _parseMo($stream) {
  248. $stat = fstat($stream);
  249. if ($stat['size'] < self::MO_HEADER_SIZE) {
  250. throw new RangeException("MO stream content has an invalid format.");
  251. }
  252. $magic = unpack('V1', fread($stream, 4));
  253. $magic = hexdec(substr(dechex(current($magic)), -8));
  254. if ($magic == self::MO_LITTLE_ENDIAN_MAGIC) {
  255. $isBigEndian = false;
  256. } elseif ($magic == self::MO_BIG_ENDIAN_MAGIC) {
  257. $isBigEndian = true;
  258. } else {
  259. throw new RangeException("MO stream content has an invalid format.");
  260. }
  261. $header = array(
  262. 'formatRevision' => null,
  263. 'count' => null,
  264. 'offsetId' => null,
  265. 'offsetTranslated' => null,
  266. 'sizeHashes' => null,
  267. 'offsetHashes' => null
  268. );
  269. foreach ($header as &$value) {
  270. $value = $this->_readLong($stream, $isBigEndian);
  271. }
  272. extract($header);
  273. $data = array();
  274. for ($i = 0; $i < $count; $i++) {
  275. $singularId = $pluralId = null;
  276. $translated = null;
  277. fseek($stream, $offsetId + $i * 8);
  278. $length = $this->_readLong($stream, $isBigEndian);
  279. $offset = $this->_readLong($stream, $isBigEndian);
  280. if ($length < 1) {
  281. continue;
  282. }
  283. fseek($stream, $offset);
  284. $singularId = fread($stream, $length);
  285. if (strpos($singularId, "\000") !== false) {
  286. list($singularId, $pluralId) = explode("\000", $singularId);
  287. }
  288. fseek($stream, $offsetTranslated + $i * 8);
  289. $length = $this->_readLong($stream, $isBigEndian);
  290. $offset = $this->_readLong($stream, $isBigEndian);
  291. fseek($stream, $offset);
  292. $translated = fread($stream, $length);
  293. if (strpos($translated, "\000") !== false) {
  294. $translated = explode("\000", $translated);
  295. }
  296. $ids = array('singular' => $singularId, 'plural' => $pluralId);
  297. $data = $this->_merge($data, compact('ids', 'translated'));
  298. }
  299. return $data;
  300. }
  301. /**
  302. * Reads an unsigned long from stream respecting endianess.
  303. *
  304. * @param resource $stream
  305. * @param boolean $isBigEndian
  306. * @return integer
  307. */
  308. protected function _readLong($stream, $isBigEndian) {
  309. $result = unpack($isBigEndian ? 'N1' : 'V1', fread($stream, 4));
  310. $result = current($result);
  311. return (integer) substr($result, -8);
  312. }
  313. /**
  314. * Compiles data into portable object (PO) format.
  315. *
  316. * To improve portability accross libraries the header is generated according
  317. * to the format of the output of `xgettext`. This means using the same names for
  318. * placeholders as well as including an empty entry. The empty entry at the
  319. * beginning aids in parsing the file as it _attracts_ the preceding comments and
  320. * following metadata when parsed which could otherwise be mistaken as a continued
  321. * translation. The only difference in the header format is the initial header which
  322. * just features one line of text.
  323. *
  324. * @param resource $stream
  325. * @param array $data
  326. * @return boolean
  327. */
  328. protected function _compilePo($stream, array $data) {
  329. $output[] = '# This file is distributed under the same license as the PACKAGE package.';
  330. $output[] = '#';
  331. $output[] = 'msgid ""';
  332. $output[] = 'msgstr ""';
  333. $output[] = '"Project-Id-Version: PACKAGE VERSION\n"';
  334. $output[] = '"POT-Creation-Date: YEAR-MO-DA HO:MI+ZONE\n"';
  335. $output[] = '"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"';
  336. $output[] = '"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"';
  337. $output[] = '"Language-Team: LANGUAGE <EMAIL@ADDRESS>\n"';
  338. $output[] = '"MIME-Version: 1.0\n"';
  339. $output[] = '"Content-Type: text/plain; charset=UTF-8\n"';
  340. $output[] = '"Content-Transfer-Encoding: 8bit\n"';
  341. $output[] = '"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n"';
  342. $output[] = '';
  343. $output = implode("\n", $output) . "\n";
  344. fwrite($stream, $output);
  345. foreach ($data as $key => $item) {
  346. $output = array();
  347. $item = $this->_prepareForWrite($item);
  348. foreach ($item['occurrences'] as $occurrence) {
  349. $output[] = "#: {$occurrence['file']}:{$occurrence['line']}";
  350. }
  351. foreach ($item['comments'] as $comment) {
  352. $output[] = "#. {$comment}";
  353. }
  354. foreach ($item['flags'] as $flag => $value) {
  355. $output[] = "#, {$flag}";
  356. }
  357. $output[] = "msgid \"{$item['ids']['singular']}\"";
  358. if (isset($item['ids']['plural'])) {
  359. $output[] = "msgid_plural \"{$item['ids']['plural']}\"";
  360. foreach ((array) $item['translated'] ?: array(null, null) as $key => $value) {
  361. $output[] = "msgstr[{$key}] \"{$value}\"";
  362. }
  363. } else {
  364. if (is_array($item['translated'])) {
  365. $item['translated'] = array_pop($item['translated']);
  366. }
  367. $output[] = "msgstr \"{$item['translated']}\"";
  368. }
  369. $output[] = '';
  370. $output = implode("\n", $output) . "\n";
  371. fwrite($stream, $output);
  372. }
  373. return true;
  374. }
  375. /**
  376. * Compiles data into portable object template (POT) format.
  377. *
  378. * @param resource $stream
  379. * @param array $data
  380. * @return boolean Success.
  381. */
  382. protected function _compilePot($stream, array $data) {
  383. return $this->_compilePo($stream, $data);
  384. }
  385. /**
  386. * Compiles data into machine object (MO) format.
  387. *
  388. * @param resource $stream
  389. * @param array $data
  390. * @return void
  391. * @todo Determine if needed and implement compiler.
  392. */
  393. protected function _compileMo($stream, array $data) {}
  394. /**
  395. * Prepares an item before it is being written and escapes fields.
  396. *
  397. * All characters from \000 to \037 (this includes new line and tab characters)
  398. * as well as the backslash (`\`) and the double quote (`"`) are escaped.
  399. *
  400. * Literal Windows CRLFs (`\r\n`) are converted to LFs (`\n`) to improve cross platform
  401. * compatibility. Escaped single quotes (`'`) are unescaped as they should not need to be.
  402. * Double escaped characters are maintained and not escaped once again.
  403. *
  404. * @link http://www.asciitable.com
  405. * @see lithium\g11n\catalog\Adapter::_prepareForWrite()
  406. * @param array $item
  407. * @return array
  408. */
  409. protected function _prepareForWrite(array $item) {
  410. $filter = function ($value) use (&$filter) {
  411. if (is_array($value)) {
  412. return array_map($filter, $value);
  413. }
  414. $value = strtr($value, array("\\'" => "'", "\\\\" => "\\", "\r\n" => "\n"));
  415. $value = addcslashes($value, "\0..\37\\\"");
  416. return $value;
  417. };
  418. $fields = array('id', 'ids', 'translated');
  419. foreach ($fields as $field) {
  420. if (isset($item[$field])) {
  421. $item[$field] = $filter($item[$field]);
  422. }
  423. }
  424. if (!isset($item['ids']['singular'])) {
  425. $item['ids']['singular'] = $item['id'];
  426. }
  427. if (isset($item['occurrences'])) {
  428. foreach ($item['occurrences'] as &$occurrence) {
  429. $occurrence['file'] = str_replace(LITHIUM_APP_PATH, '', $occurrence['file']);
  430. }
  431. }
  432. return parent::_prepareForWrite($item);
  433. }
  434. /**
  435. * Merges an item into given data and unescapes fields.
  436. *
  437. * Please note that items with an id containing exclusively whitespace characters
  438. * or are empty are **not** being merged. Whitespace characters are space, tab, vertical
  439. * tab, line feed, carriage return and form feed.
  440. *
  441. * @see lithium\g11n\catalog\Adapter::_merge()
  442. * @param array $data Data to merge item into.
  443. * @param array $item Item to merge into $data.
  444. * @return array The merged data.
  445. */
  446. protected function _merge(array $data, array $item) {
  447. $filter = function ($value) use (&$filter) {
  448. if (is_array($value)) {
  449. return array_map($filter, $value);
  450. }
  451. return stripcslashes($value);
  452. };
  453. $fields = array('id', 'ids', 'translated');
  454. foreach ($fields as $field) {
  455. if (isset($item[$field])) {
  456. $item[$field] = $filter($item[$field]);
  457. }
  458. }
  459. if (isset($item['ids']['singular'])) {
  460. $item['id'] = $item['ids']['singular'];
  461. }
  462. if (empty($item['id']) || ctype_space($item['id'])) {
  463. return $data;
  464. }
  465. return parent::_merge($data, $item);
  466. }
  467. }
  468. ?>