PageRenderTime 52ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/src/AliasCleaner.php

https://gitlab.com/Drulenium-bot/pathauto
PHP | 351 lines | 204 code | 47 blank | 100 comment | 21 complexity | bd8508569ce2b8104bec4e410122ab6f MD5 | raw file
  1. <?php
  2. namespace Drupal\pathauto;
  3. use Drupal\Component\Render\PlainTextOutput;
  4. use Drupal\Component\Transliteration\TransliterationInterface;
  5. use Drupal\Component\Utility\Html;
  6. use Drupal\Component\Utility\Unicode;
  7. use Drupal\Core\Cache\CacheBackendInterface;
  8. use Drupal\Core\Config\ConfigFactoryInterface;
  9. use Drupal\Core\Extension\ModuleHandlerInterface;
  10. use Drupal\Core\Language\LanguageManagerInterface;
  11. /**
  12. * Provides an alias cleaner.
  13. */
  14. class AliasCleaner implements AliasCleanerInterface {
  15. /**
  16. * The config factory.
  17. *
  18. * @var \Drupal\Core\Config\ConfigFactoryInterface
  19. */
  20. protected $configFactory;
  21. /**
  22. * The alias storage helper.
  23. *
  24. * @var AliasStorageHelperInterface
  25. */
  26. protected $aliasStorageHelper;
  27. /**
  28. * Language manager.
  29. *
  30. * @var \Drupal\Core\Language\LanguageManagerInterface
  31. */
  32. protected $languageManager;
  33. /**
  34. * Cache backend.
  35. *
  36. * @var \Drupal\Core\Cache\CacheBackendInterface
  37. */
  38. protected $cacheBackend;
  39. /**
  40. * Calculated settings cache.
  41. *
  42. * @todo Split this up into separate properties.
  43. *
  44. * @var array
  45. */
  46. protected $cleanStringCache = array();
  47. /**
  48. * Transliteration service.
  49. *
  50. * @var \Drupal\Component\Transliteration\TransliterationInterface
  51. */
  52. protected $transliteration;
  53. /**
  54. * The module handler.
  55. *
  56. * @var \Drupal\Core\Extension\ModuleHandlerInterface
  57. */
  58. protected $moduleHandler;
  59. /**
  60. * Creates a new AliasCleaner.
  61. *
  62. * @param \Drupal\Core\Config\ConfigFactoryInterface $config_factory
  63. * The config factory.
  64. * @param \Drupal\pathauto\AliasStorageHelperInterface $alias_storage_helper
  65. * The alias storage helper.
  66. * @param \Drupal\Core\Language\LanguageManagerInterface $language_manager
  67. * The language manager.
  68. * @param \Drupal\Core\Cache\CacheBackendInterface $cache_backend
  69. * The cache backend.
  70. * @param \Drupal\Component\Transliteration\TransliterationInterface $transliteration
  71. * The transliteration service.
  72. * @param \Drupal\Core\Extension\ModuleHandlerInterface $module_handler
  73. * The module handler.
  74. */
  75. public function __construct(ConfigFactoryInterface $config_factory, AliasStorageHelperInterface $alias_storage_helper, LanguageManagerInterface $language_manager, CacheBackendInterface $cache_backend, TransliterationInterface $transliteration, ModuleHandlerInterface $module_handler) {
  76. $this->configFactory = $config_factory;
  77. $this->aliasStorageHelper = $alias_storage_helper;
  78. $this->languageManager = $language_manager;
  79. $this->cacheBackend = $cache_backend;
  80. $this->transliteration = $transliteration;
  81. $this->moduleHandler = $module_handler;
  82. }
  83. /**
  84. * {@inheritdoc}
  85. */
  86. public function cleanAlias($alias) {
  87. $config = $this->configFactory->get('pathauto.settings');
  88. $alias_max_length = min($config->get('max_length'), $this->aliasStorageHelper->getAliasSchemaMaxLength());
  89. $output = $alias;
  90. // Trim duplicate, leading, and trailing separators. Do this before cleaning
  91. // backslashes since a pattern like "[token1]/[token2]-[token3]/[token4]"
  92. // could end up like "value1/-/value2" and if backslashes were cleaned first
  93. // this would result in a duplicate blackslash.
  94. $output = $this->getCleanSeparators($output);
  95. // Trim duplicate, leading, and trailing backslashes.
  96. $output = $this->getCleanSeparators($output, '/');
  97. // Shorten to a logical place based on word boundaries.
  98. $output = Unicode::truncate($output, $alias_max_length, TRUE);
  99. return $output;
  100. }
  101. /**
  102. * {@inheritdoc}
  103. */
  104. public function getCleanSeparators($string, $separator = NULL) {
  105. $config = $this->configFactory->get('pathauto.settings');
  106. if (!isset($separator)) {
  107. $separator = $config->get('separator');
  108. }
  109. $output = $string;
  110. if (strlen($separator)) {
  111. // Trim any leading or trailing separators.
  112. $output = trim($output, $separator);
  113. // Escape the separator for use in regular expressions.
  114. $seppattern = preg_quote($separator, '/');
  115. // Replace multiple separators with a single one.
  116. $output = preg_replace("/$seppattern+/", $separator, $output);
  117. // Replace trailing separators around slashes.
  118. if ($separator !== '/') {
  119. $output = preg_replace("/\/+$seppattern\/+|$seppattern\/+|\/+$seppattern/", "/", $output);
  120. }
  121. else {
  122. // If the separator is a slash, we need to re-add the leading slash
  123. // dropped by the trim function.
  124. $output = '/' . $output;
  125. }
  126. }
  127. return $output;
  128. }
  129. /**
  130. * {@inheritdoc}
  131. */
  132. public function cleanString($string, array $options = array()) {
  133. if (empty($this->cleanStringCache)) {
  134. // Generate and cache variables used in this method.
  135. $config = $this->configFactory->get('pathauto.settings');
  136. $this->cleanStringCache = array(
  137. 'separator' => $config->get('separator'),
  138. 'strings' => array(),
  139. 'transliterate' => $config->get('transliterate'),
  140. 'punctuation' => array(),
  141. 'reduce_ascii' => (bool) $config->get('reduce_ascii'),
  142. 'ignore_words_regex' => FALSE,
  143. 'lowercase' => (bool) $config->get('case'),
  144. 'maxlength' => min($config->get('max_component_length'), $this->aliasStorageHelper->getAliasSchemaMaxLength()),
  145. );
  146. // Generate and cache the punctuation replacements for strtr().
  147. $punctuation = $this->getPunctuationCharacters();
  148. foreach ($punctuation as $name => $details) {
  149. $action = $config->get('punctuation.' . $name);
  150. switch ($action) {
  151. case PathautoGeneratorInterface::PUNCTUATION_REMOVE:
  152. $this->cleanStringCache['punctuation'][$details['value']] = '';
  153. break;
  154. case PathautoGeneratorInterface::PUNCTUATION_REPLACE:
  155. $this->cleanStringCache['punctuation'][$details['value']] = $this->cleanStringCache['separator'];
  156. break;
  157. case PathautoGeneratorInterface::PUNCTUATION_DO_NOTHING:
  158. // Literally do nothing.
  159. break;
  160. }
  161. }
  162. // Generate and cache the ignored words regular expression.
  163. $ignore_words = $config->get('ignore_words');
  164. $ignore_words_regex = preg_replace(array('/^[,\s]+|[,\s]+$/', '/[,\s]+/'), array('', '\b|\b'), $ignore_words);
  165. if ($ignore_words_regex) {
  166. $this->cleanStringCache['ignore_words_regex'] = '\b' . $ignore_words_regex . '\b';
  167. if (function_exists('mb_eregi_replace')) {
  168. mb_regex_encoding('UTF-8');
  169. $this->cleanStringCache['ignore_words_callback'] = 'mb_eregi_replace';
  170. }
  171. else {
  172. $this->cleanStringCache['ignore_words_callback'] = 'preg_replace';
  173. $this->cleanStringCache['ignore_words_regex'] = '/' . $this->cleanStringCache['ignore_words_regex'] . '/i';
  174. }
  175. }
  176. }
  177. // Empty strings do not need any processing.
  178. if ($string === '' || $string === NULL) {
  179. return '';
  180. }
  181. $langcode = NULL;
  182. if (!empty($options['language'])) {
  183. $langcode = $options['language']->getId();
  184. }
  185. elseif (!empty($options['langcode'])) {
  186. $langcode = $options['langcode'];
  187. }
  188. // Check if the string has already been processed, and if so return the
  189. // cached result.
  190. if (isset($this->cleanStringCache['strings'][$langcode][(string) $string])) {
  191. return $this->cleanStringCache['strings'][$langcode][(string) $string];
  192. }
  193. // Remove all HTML tags from the string.
  194. $output = Html::decodeEntities($string);
  195. $output = PlainTextOutput::renderFromHtml($output);
  196. // Optionally transliterate.
  197. if ($this->cleanStringCache['transliterate']) {
  198. // If the reduce strings to letters and numbers is enabled, don't bother
  199. // replacing unknown characters with a question mark. Use an empty string
  200. // instead.
  201. $output = $this->transliteration->transliterate($output, $langcode, $this->cleanStringCache['reduce_ascii'] ? '' : '?');
  202. }
  203. // Replace or drop punctuation based on user settings.
  204. $output = strtr($output, $this->cleanStringCache['punctuation']);
  205. // Reduce strings to letters and numbers.
  206. if ($this->cleanStringCache['reduce_ascii']) {
  207. $output = preg_replace('/[^a-zA-Z0-9\/]+/', $this->cleanStringCache['separator'], $output);
  208. }
  209. // Get rid of words that are on the ignore list.
  210. if ($this->cleanStringCache['ignore_words_regex']) {
  211. $words_removed = $this->cleanStringCache['ignore_words_callback']($this->cleanStringCache['ignore_words_regex'], '', $output);
  212. if (Unicode::strlen(trim($words_removed)) > 0) {
  213. $output = $words_removed;
  214. }
  215. }
  216. // Always replace whitespace with the separator.
  217. $output = preg_replace('/\s+/', $this->cleanStringCache['separator'], $output);
  218. // Trim duplicates and remove trailing and leading separators.
  219. $output = $this->getCleanSeparators($this->getCleanSeparators($output, $this->cleanStringCache['separator']));
  220. // Optionally convert to lower case.
  221. if ($this->cleanStringCache['lowercase']) {
  222. $output = Unicode::strtolower($output);
  223. }
  224. // Shorten to a logical place based on word boundaries.
  225. $output = Unicode::truncate($output, $this->cleanStringCache['maxlength'], TRUE);
  226. // Cache this result in the static array.
  227. $this->cleanStringCache['strings'][$langcode][(string) $string] = $output;
  228. return $output;
  229. }
  230. /**
  231. * {@inheritdoc}
  232. */
  233. public function getPunctuationCharacters() {
  234. if (empty($this->punctuationCharacters)) {
  235. $langcode = $this->languageManager->getCurrentLanguage()->getId();
  236. $cid = 'pathauto:punctuation:' . $langcode;
  237. if ($cache = $this->cacheBackend->get($cid)) {
  238. $this->punctuationCharacters = $cache->data;
  239. }
  240. else {
  241. $punctuation = array();
  242. $punctuation['double_quotes'] = array('value' => '"', 'name' => t('Double quotation marks'));
  243. $punctuation['quotes'] = array('value' => '\'', 'name' => t("Single quotation marks (apostrophe)"));
  244. $punctuation['backtick'] = array('value' => '`', 'name' => t('Back tick'));
  245. $punctuation['comma'] = array('value' => ',', 'name' => t('Comma'));
  246. $punctuation['period'] = array('value' => '.', 'name' => t('Period'));
  247. $punctuation['hyphen'] = array('value' => '-', 'name' => t('Hyphen'));
  248. $punctuation['underscore'] = array('value' => '_', 'name' => t('Underscore'));
  249. $punctuation['colon'] = array('value' => ':', 'name' => t('Colon'));
  250. $punctuation['semicolon'] = array('value' => ';', 'name' => t('Semicolon'));
  251. $punctuation['pipe'] = array('value' => '|', 'name' => t('Vertical bar (pipe)'));
  252. $punctuation['left_curly'] = array('value' => '{', 'name' => t('Left curly bracket'));
  253. $punctuation['left_square'] = array('value' => '[', 'name' => t('Left square bracket'));
  254. $punctuation['right_curly'] = array('value' => '}', 'name' => t('Right curly bracket'));
  255. $punctuation['right_square'] = array('value' => ']', 'name' => t('Right square bracket'));
  256. $punctuation['plus'] = array('value' => '+', 'name' => t('Plus sign'));
  257. $punctuation['equal'] = array('value' => '=', 'name' => t('Equal sign'));
  258. $punctuation['asterisk'] = array('value' => '*', 'name' => t('Asterisk'));
  259. $punctuation['ampersand'] = array('value' => '&', 'name' => t('Ampersand'));
  260. $punctuation['percent'] = array('value' => '%', 'name' => t('Percent sign'));
  261. $punctuation['caret'] = array('value' => '^', 'name' => t('Caret'));
  262. $punctuation['dollar'] = array('value' => '$', 'name' => t('Dollar sign'));
  263. $punctuation['hash'] = array('value' => '#', 'name' => t('Number sign (pound sign, hash)'));
  264. $punctuation['at'] = array('value' => '@', 'name' => t('At sign'));
  265. $punctuation['exclamation'] = array('value' => '!', 'name' => t('Exclamation mark'));
  266. $punctuation['tilde'] = array('value' => '~', 'name' => t('Tilde'));
  267. $punctuation['left_parenthesis'] = array('value' => '(', 'name' => t('Left parenthesis'));
  268. $punctuation['right_parenthesis'] = array('value' => ')', 'name' => t('Right parenthesis'));
  269. $punctuation['question_mark'] = array('value' => '?', 'name' => t('Question mark'));
  270. $punctuation['less_than'] = array('value' => '<', 'name' => t('Less-than sign'));
  271. $punctuation['greater_than'] = array('value' => '>', 'name' => t('Greater-than sign'));
  272. $punctuation['slash'] = array('value' => '/', 'name' => t('Slash'));
  273. $punctuation['back_slash'] = array('value' => '\\', 'name' => t('Backslash'));
  274. // Allow modules to alter the punctuation list and cache the result.
  275. $this->moduleHandler->alter('pathauto_punctuation_chars', $punctuation);
  276. $this->cacheBackend->set($cid, $punctuation);
  277. $this->punctuationCharacters = $punctuation;
  278. }
  279. }
  280. return $this->punctuationCharacters;
  281. }
  282. /**
  283. * {@inheritdoc}
  284. */
  285. public function cleanTokenValues(&$replacements, $data = array(), $options = array()) {
  286. foreach ($replacements as $token => $value) {
  287. // Only clean non-path tokens.
  288. if (!preg_match('/(path|alias|url|url-brief)\]$/', $token)) {
  289. $replacements[$token] = $this->cleanString($value, $options);
  290. }
  291. }
  292. }
  293. /**
  294. * {@inheritdoc}
  295. */
  296. public function resetCaches() {
  297. $this->cleanStringCache = array();
  298. }
  299. }