PageRenderTime 45ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 1ms

/g11n/Locale.php

https://bitbucket.org/d1rk/lithium
PHP | 304 lines | 134 code | 23 blank | 147 comment | 23 complexity | 83628487c8b090188b8b39b9d3ce69e3 MD5 | raw file
  1. <?php
  2. /**
  3. * Lithium: the most rad php framework
  4. *
  5. * @copyright Copyright 2012, Union of RAD (http://union-of-rad.org)
  6. * @license http://opensource.org/licenses/bsd-license.php The BSD License
  7. */
  8. namespace lithium\g11n;
  9. use BadMethodCallException;
  10. use InvalidArgumentException;
  11. use lithium\action\Request as ActionRequest;
  12. use lithium\console\Request as ConsoleRequest;
  13. /**
  14. * The `Locale` class provides methods to deal with locale identifiers. The locale
  15. * (here: _locale identifier_) is used to distinguish among different sets of common
  16. * preferences.
  17. *
  18. * In order to avoid unnecessary overhead all methods throughout the framework accepting
  19. * a locale require it to be well-formed according to the structure laid out below. For
  20. * assuring the correct format use `Locale::canonicalize()` once on the locale.
  21. *
  22. * However the methods within this class will also work with not-so-well-formed locales.
  23. * They accept both underscores and hyphens as separators between and don't care about the
  24. * case of the individual tags.
  25. *
  26. * The identifier used by Lithium is based in its structure upon Unicode's
  27. * language identifier and is compliant to BCP 47.
  28. *
  29. * `language[_Script][_TERRITORY][_VARIANT]`
  30. * - `language` The spoken language, here represented by an ISO 639-1 code,
  31. * where not available ISO 639-3 and ISO 639-5 codes are allowed too) tag.
  32. * The tag should be lower-cased and is required.
  33. * - `Script` The tag should have it's first character capitalized, all others
  34. * lower-cased. The tag is optional.
  35. * - `TERRITORY` A geographical area, here represented by an ISO 3166-1 code.
  36. * Should be all upper-cased and is optional.
  37. * - `VARIANT` Should be all upper-cased and is optional.
  38. *
  39. * @link http://www.unicode.org/reports/tr35/tr35-12.html#Identifiers
  40. * @link http://www.rfc-editor.org/rfc/bcp/bcp47.txt
  41. * @link http://www.iana.org/assignments/language-subtag-registry
  42. */
  43. class Locale extends \lithium\core\StaticObject {
  44. /**
  45. * Properties for locale tags.
  46. *
  47. * @var array
  48. */
  49. protected static $_tags = array(
  50. 'language' => array('formatter' => 'strtolower'),
  51. 'script' => array('formatter' => array('strtolower', 'ucfirst')),
  52. 'territory' => array('formatter' => 'strtoupper'),
  53. 'variant' => array('formatter' => 'strtoupper')
  54. );
  55. /**
  56. * Magic method enabling `language`, `script`, `territory` and `variant`
  57. * methods to parse and retrieve individual tags from a locale.
  58. *
  59. * {{{
  60. * Locale::language('en_US'); // returns 'en'
  61. * Locale::territory('en_US'); // returns 'US'
  62. * }}}
  63. *
  64. * @see lithium\g11n\Locale::$_tags
  65. * @see lithium\g11n\Locale::decompose()
  66. * @param string $method
  67. * @param array $params
  68. * @return mixed
  69. */
  70. public static function __callStatic($method, $params = array()) {
  71. $tags = static::invokeMethod('decompose', $params);
  72. if (!isset(static::$_tags[$method])) {
  73. throw new BadMethodCallException("Invalid locale tag `{$method}`.");
  74. }
  75. return isset($tags[$method]) ? $tags[$method] : null;
  76. }
  77. /**
  78. * Composes a locale from locale tags. This is the pendant to `Locale::decompose()`.
  79. *
  80. * @param array $tags An array as obtained from `Locale::decompose()`.
  81. * @return string A locale with tags separated by underscores or `null`
  82. * if none of the passed tags could be used to compose a locale.
  83. */
  84. public static function compose($tags) {
  85. $result = array();
  86. foreach (static::$_tags as $name => $tag) {
  87. if (isset($tags[$name])) {
  88. $result[] = $tags[$name];
  89. }
  90. }
  91. if ($result) {
  92. return implode('_', $result);
  93. }
  94. }
  95. /**
  96. * Parses a locale into locale tags. This is the pendant to `Locale::compose()``.
  97. *
  98. * @param string $locale A locale in an arbitrary form (i.e. `'en_US'` or `'EN-US'`).
  99. * @return array Parsed language, script, territory and variant tags.
  100. * @throws InvalidArgumentException
  101. */
  102. public static function decompose($locale) {
  103. $regex = '(?P<language>[a-z]{2,3})';
  104. $regex .= '(?:[_-](?P<script>[a-z]{4}))?';
  105. $regex .= '(?:[_-](?P<territory>[a-z]{2}))?';
  106. $regex .= '(?:[_-](?P<variant>[a-z]{5,}))?';
  107. if (!preg_match("/^{$regex}$/i", $locale, $matches)) {
  108. throw new InvalidArgumentException("Locale `{$locale}` could not be parsed.");
  109. }
  110. return array_filter(array_intersect_key($matches, static::$_tags));
  111. }
  112. /**
  113. * Returns a locale in its canonical form with tags formatted properly.
  114. *
  115. * @param string $locale A locale in an arbitrary form (i.e. `'ZH-HANS-HK_REVISED'`).
  116. * @return string A locale in it's canonical form (i.e. `'zh_Hans_HK_REVISED'`).
  117. */
  118. public static function canonicalize($locale) {
  119. $tags = static::decompose($locale);
  120. foreach ($tags as $name => &$tag) {
  121. foreach ((array) static::$_tags[$name]['formatter'] as $formatter) {
  122. $tag = $formatter($tag);
  123. }
  124. }
  125. return static::compose($tags);
  126. }
  127. /**
  128. * Cascades a locale.
  129. *
  130. * Usage:
  131. * {{{
  132. * Locale::cascade('en_US');
  133. * // returns array('en_US', 'en', 'root')
  134. *
  135. * Locale::cascade('zh_Hans_HK_REVISED');
  136. * // returns array('zh_Hans_HK_REVISED', 'zh_Hans_HK', 'zh_Hans', 'zh', 'root')
  137. * }}}
  138. *
  139. * @link http://www.unicode.org/reports/tr35/tr35-13.html#Locale_Inheritance
  140. * @param string $locale A locale in an arbitrary form (i.e. `'en_US'` or `'EN-US'`).
  141. * @return array Indexed array of locales (starting with the most specific one).
  142. */
  143. public static function cascade($locale) {
  144. $locales[] = $locale;
  145. if ($locale === 'root') {
  146. return $locales;
  147. }
  148. $tags = static::decompose($locale);
  149. while (count($tags) > 1) {
  150. array_pop($tags);
  151. $locales[] = static::compose($tags);
  152. }
  153. $locales[] = 'root';
  154. return $locales;
  155. }
  156. /**
  157. * Searches an array of locales for the best match to a locale. The locale
  158. * is iteratively simplified until either it matches one of the locales
  159. * in the list or the locale can't be further simplified.
  160. *
  161. * This method partially implements the lookup matching scheme as described
  162. * in RFC 4647, section 3.4 and thus does not strictly conform to the
  163. * specification.
  164. *
  165. * Differences to specification:
  166. * - No support for wildcards in the to-be-matched locales.
  167. * - No support for locales with private subtags.
  168. * - No support for a default return value.
  169. * - Passed locales are required to be in canonical form (i.e. `'ja_JP'`).
  170. *
  171. * @link http://www.ietf.org/rfc/rfc4647.txt
  172. * @param array $locales Locales to match against `$locale`.
  173. * @param string $locale A locale in it's canonical form (i.e. `'zh_Hans_HK_REVISED'`).
  174. * @return string The matched locale.
  175. */
  176. public static function lookup($locales, $locale) {
  177. $tags = static::decompose($locale);
  178. $count = count($tags);
  179. while ($count > 0) {
  180. if (($key = array_search(static::compose($tags), $locales)) !== false) {
  181. return $locales[$key];
  182. } elseif ($count == 1) {
  183. foreach ($locales as $currentLocale) {
  184. if (strpos($currentLocale, current($tags) . '_') === 0) {
  185. return $currentLocale;
  186. }
  187. }
  188. }
  189. if (($key = array_search(static::compose($tags), $locales)) !== false) {
  190. return $locales[$key];
  191. }
  192. array_pop($tags);
  193. $count = count($tags);
  194. }
  195. }
  196. /**
  197. * Determines the preferred locale from a request or array. Optionally negotiates
  198. * the preferred locale with available locales.
  199. *
  200. * @see lithium\g11n\Locale::_preferredAction()
  201. * @see lithium\g11n\Locale::_preferredConsole()
  202. * @see lithium\g11n\Locale::lookup()
  203. * @param object|array $request An action or console request object or an array of locales.
  204. * @param array $available A list of locales to negotiate the preferred locale with.
  205. * @return string The preferred locale in it's canonical form (i.e. `'fr_CA'`).
  206. * @todo Rewrite this to remove hard-coded class names.
  207. */
  208. public static function preferred($request, $available = null) {
  209. if (is_array($request)) {
  210. $result = $request;
  211. } elseif ($request instanceof ActionRequest) {
  212. $result = static::_preferredAction($request);
  213. } elseif ($request instanceof ConsoleRequest) {
  214. $result = static::_preferredConsole($request);
  215. } else {
  216. return null;
  217. }
  218. if (!$available) {
  219. return array_shift($result);
  220. }
  221. foreach ((array) $result as $locale) {
  222. if ($match = static::lookup($available, $locale)) {
  223. return $match;
  224. }
  225. }
  226. }
  227. /**
  228. * Detects preferred locales from an action request by looking at the
  229. * `'Accept-Language'` header as described by RFC 2616, section 14.4.
  230. *
  231. * @link http://www.ietf.org/rfc/rfc2616.txt
  232. * @param object $request An instance of `lithium\action\Request`.
  233. * @return array Preferred locales in their canonical form (i.e. `'fr_CA'`).
  234. */
  235. protected static function _preferredAction($request) {
  236. $regex = '/^\s*(?P<locale>\w\w(?:[-]\w\w)?)(?:;q=(?P<quality>[0-9]+\.[0-9]+))?\s*$/';
  237. $result = array();
  238. foreach (explode(',', $request->env('HTTP_ACCEPT_LANGUAGE')) as $part) {
  239. if (preg_match($regex, $part, $matches)) {
  240. $locale = static::canonicalize($matches['locale']);
  241. $quality = isset($matches['quality']) ? $matches['quality'] : 1;
  242. $result[$locale] = $quality;
  243. }
  244. }
  245. arsort($result);
  246. return array_keys($result);
  247. }
  248. /**
  249. * Detects preferred locales from a console request by looking at certain
  250. * environment variables. The environment variables may be present or not
  251. * depending on your system. If multiple variables are present the following
  252. * hierarchy is used: `'LANGUAGE'`, `'LC_ALL'`, `'LANG'`.
  253. *
  254. * The locales of the `'LC_ALL'` and the `'LANG'` are formatted according
  255. * to the posix standard: `language(_territory)(.encoding)(@modifier)`.
  256. * Locales having such a format are automatically canonicalized and transformed
  257. * into the `Locale` class' format.
  258. *
  259. * @link http://www.linux.com/archive/feature/53781
  260. * @param object $request An instance of `lithium\console\Request`.
  261. * @return array Preferred locales in their canonical form (i.e. `'fr_CA'`).
  262. */
  263. protected static function _preferredConsole($request) {
  264. $regex = '(?P<locale>[\w\_]+)(\.|@|$)+';
  265. $result = array();
  266. if ($value = $request->env('LANGUAGE')) {
  267. return explode(':', $value);
  268. }
  269. foreach (array('LC_ALL', 'LANG') as $variable) {
  270. $value = $request->env($variable);
  271. if (!$value || $value == 'C' || $value == 'POSIX') {
  272. continue;
  273. }
  274. if (preg_match("/{$regex}/", $value, $matches)) {
  275. return (array) $matches['locale'];
  276. }
  277. }
  278. return $result;
  279. }
  280. }
  281. ?>