PageRenderTime 26ms CodeModel.GetById 40ms RepoModel.GetById 0ms app.codeStats 0ms

/src/Symfony/Component/String/Slugger/AsciiSlugger.php

https://github.com/FabienD/symfony
PHP | 176 lines | 121 code | 23 blank | 32 comment | 18 complexity | 4c3b6f8f5460573947014bb63b34b4c0 MD5 | raw file
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\String\Slugger;
  11. use Symfony\Component\String\AbstractUnicodeString;
  12. use Symfony\Component\String\UnicodeString;
  13. use Symfony\Contracts\Translation\LocaleAwareInterface;
  14. if (!interface_exists(LocaleAwareInterface::class)) {
  15. throw new \LogicException('You cannot use the "Symfony\Component\String\Slugger\AsciiSlugger" as the "symfony/translation-contracts" package is not installed. Try running "composer require symfony/translation-contracts".');
  16. }
  17. /**
  18. * @author Titouan Galopin <galopintitouan@gmail.com>
  19. */
  20. class AsciiSlugger implements SluggerInterface, LocaleAwareInterface
  21. {
  22. private const LOCALE_TO_TRANSLITERATOR_ID = [
  23. 'am' => 'Amharic-Latin',
  24. 'ar' => 'Arabic-Latin',
  25. 'az' => 'Azerbaijani-Latin',
  26. 'be' => 'Belarusian-Latin',
  27. 'bg' => 'Bulgarian-Latin',
  28. 'bn' => 'Bengali-Latin',
  29. 'de' => 'de-ASCII',
  30. 'el' => 'Greek-Latin',
  31. 'fa' => 'Persian-Latin',
  32. 'he' => 'Hebrew-Latin',
  33. 'hy' => 'Armenian-Latin',
  34. 'ka' => 'Georgian-Latin',
  35. 'kk' => 'Kazakh-Latin',
  36. 'ky' => 'Kirghiz-Latin',
  37. 'ko' => 'Korean-Latin',
  38. 'mk' => 'Macedonian-Latin',
  39. 'mn' => 'Mongolian-Latin',
  40. 'or' => 'Oriya-Latin',
  41. 'ps' => 'Pashto-Latin',
  42. 'ru' => 'Russian-Latin',
  43. 'sr' => 'Serbian-Latin',
  44. 'sr_Cyrl' => 'Serbian-Latin',
  45. 'th' => 'Thai-Latin',
  46. 'tk' => 'Turkmen-Latin',
  47. 'uk' => 'Ukrainian-Latin',
  48. 'uz' => 'Uzbek-Latin',
  49. 'zh' => 'Han-Latin',
  50. ];
  51. private ?string $defaultLocale;
  52. private \Closure|array $symbolsMap = [
  53. 'en' => ['@' => 'at', '&' => 'and'],
  54. ];
  55. /**
  56. * Cache of transliterators per locale.
  57. *
  58. * @var \Transliterator[]
  59. */
  60. private array $transliterators = [];
  61. public function __construct(string $defaultLocale = null, array|\Closure $symbolsMap = null)
  62. {
  63. $this->defaultLocale = $defaultLocale;
  64. $this->symbolsMap = $symbolsMap ?? $this->symbolsMap;
  65. }
  66. /**
  67. * {@inheritdoc}
  68. */
  69. public function setLocale(string $locale)
  70. {
  71. $this->defaultLocale = $locale;
  72. }
  73. /**
  74. * {@inheritdoc}
  75. */
  76. public function getLocale(): string
  77. {
  78. return $this->defaultLocale;
  79. }
  80. /**
  81. * {@inheritdoc}
  82. */
  83. public function slug(string $string, string $separator = '-', string $locale = null): AbstractUnicodeString
  84. {
  85. $locale ??= $this->defaultLocale;
  86. $transliterator = [];
  87. if ($locale && ('de' === $locale || str_starts_with($locale, 'de_'))) {
  88. // Use the shortcut for German in UnicodeString::ascii() if possible (faster and no requirement on intl)
  89. $transliterator = ['de-ASCII'];
  90. } elseif (\function_exists('transliterator_transliterate') && $locale) {
  91. $transliterator = (array) $this->createTransliterator($locale);
  92. }
  93. if ($this->symbolsMap instanceof \Closure) {
  94. // If the symbols map is passed as a closure, there is no need to fallback to the parent locale
  95. // as the closure can just provide substitutions for all locales of interest.
  96. $symbolsMap = $this->symbolsMap;
  97. array_unshift($transliterator, static function ($s) use ($symbolsMap, $locale) {
  98. return $symbolsMap($s, $locale);
  99. });
  100. }
  101. $unicodeString = (new UnicodeString($string))->ascii($transliterator);
  102. if (\is_array($this->symbolsMap)) {
  103. $map = null;
  104. if (isset($this->symbolsMap[$locale])) {
  105. $map = $this->symbolsMap[$locale];
  106. } else {
  107. $parent = self::getParentLocale($locale);
  108. if ($parent && isset($this->symbolsMap[$parent])) {
  109. $map = $this->symbolsMap[$parent];
  110. }
  111. }
  112. if ($map) {
  113. foreach ($map as $char => $replace) {
  114. $unicodeString = $unicodeString->replace($char, ' '.$replace.' ');
  115. }
  116. }
  117. }
  118. return $unicodeString
  119. ->replaceMatches('/[^A-Za-z0-9]++/', $separator)
  120. ->trim($separator)
  121. ;
  122. }
  123. private function createTransliterator(string $locale): ?\Transliterator
  124. {
  125. if (\array_key_exists($locale, $this->transliterators)) {
  126. return $this->transliterators[$locale];
  127. }
  128. // Exact locale supported, cache and return
  129. if ($id = self::LOCALE_TO_TRANSLITERATOR_ID[$locale] ?? null) {
  130. return $this->transliterators[$locale] = \Transliterator::create($id.'/BGN') ?? \Transliterator::create($id);
  131. }
  132. // Locale not supported and no parent, fallback to any-latin
  133. if (!$parent = self::getParentLocale($locale)) {
  134. return $this->transliterators[$locale] = null;
  135. }
  136. // Try to use the parent locale (ie. try "de" for "de_AT") and cache both locales
  137. if ($id = self::LOCALE_TO_TRANSLITERATOR_ID[$parent] ?? null) {
  138. $transliterator = \Transliterator::create($id.'/BGN') ?? \Transliterator::create($id);
  139. }
  140. return $this->transliterators[$locale] = $this->transliterators[$parent] = $transliterator ?? null;
  141. }
  142. private static function getParentLocale(?string $locale): ?string
  143. {
  144. if (!$locale) {
  145. return null;
  146. }
  147. if (false === $str = strrchr($locale, '_')) {
  148. // no parent locale
  149. return null;
  150. }
  151. return substr($locale, 0, -\strlen($str));
  152. }
  153. }