PageRenderTime 45ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/vendor/craftcms/cms/src/helpers/Search.php

https://bitbucket.org/mojointeractive/market
PHP | 264 lines | 198 code | 19 blank | 47 comment | 6 complexity | 8634c1d28b69f0501bc90b15fbbe6e91 MD5 | raw file
Possible License(s): GPL-3.0, MIT, BSD-3-Clause, GPL-2.0, LGPL-2.1
  1. <?php
  2. /**
  3. * @link https://craftcms.com/
  4. * @copyright Copyright (c) Pixel & Tonic, Inc.
  5. * @license https://craftcms.github.io/license/
  6. */
  7. namespace craft\helpers;
  8. /**
  9. * Search helper.
  10. *
  11. * @author Pixel & Tonic, Inc. <support@pixelandtonic.com>
  12. * @since 3.0
  13. */
  14. class Search
  15. {
  16. // Public Methods
  17. // =========================================================================
  18. /**
  19. * Normalizes search keywords.
  20. *
  21. * @param string[]|string $str The dirty keywords
  22. * @param array $ignore Ignore words to strip out
  23. * @param bool $processCharMap Whether to remove punctuation and diacritics (default is true)
  24. * @return string The cleansed keywords.
  25. */
  26. public static function normalizeKeywords($str, array $ignore = [], bool $processCharMap = true): string
  27. {
  28. // Flatten
  29. if (is_array($str)) {
  30. $str = StringHelper::toString($str, ' ');
  31. }
  32. // Get rid of tags
  33. $str = strip_tags($str);
  34. // Convert non-breaking spaces entities to regular ones
  35. $str = str_replace(['&nbsp;', '&#160;', '&#xa0;'], ' ', $str);
  36. // Get rid of entities
  37. $str = preg_replace('/&#?[a-z0-9]{2,8};/i', '', $str);
  38. // Normalize to lowercase
  39. $str = StringHelper::toLowerCase($str);
  40. if ($processCharMap) {
  41. // Remove punctuation and diacritics
  42. $str = strtr($str, self::_getCharMap());
  43. }
  44. // Remove ignore-words?
  45. if (is_array($ignore) && !empty($ignore)) {
  46. foreach ($ignore as $word) {
  47. $word = preg_quote(static::normalizeKeywords($word), '/');
  48. $str = preg_replace("/\b{$word}\b/u", '', $str);
  49. }
  50. }
  51. // Strip out new lines and superfluous spaces
  52. $str = preg_replace('/[\n\r]+/u', ' ', $str);
  53. $str = preg_replace('/\s{2,}/u', ' ', $str);
  54. // Trim white space
  55. $str = trim($str);
  56. return $str;
  57. }
  58. // Private Methods
  59. // =========================================================================
  60. /**
  61. * Get array of chars to be used for conversion.
  62. *
  63. * @return array
  64. */
  65. private static function _getCharMap(): array
  66. {
  67. // Keep local copy
  68. static $map = [];
  69. if (empty($map)) {
  70. // This will replace accented chars with non-accented chars
  71. foreach (StringHelper::asciiCharMap() as $asciiChar => $charsArray) {
  72. foreach ($charsArray as $char) {
  73. $map[$char] = $asciiChar;
  74. }
  75. }
  76. // Replace punctuation with a space
  77. foreach (self::_getPunctuation() as $value) {
  78. $map[$value] = ' ';
  79. }
  80. }
  81. // Return the char map
  82. return $map;
  83. }
  84. /**
  85. * Returns the asciiPunctuation array.
  86. *
  87. * @return array
  88. */
  89. private static function _getPunctuation(): array
  90. {
  91. // Keep local copy
  92. static $asciiPunctuation = [];
  93. if (empty($asciiPunctuation)) {
  94. $asciiPunctuation = [
  95. '!',
  96. '"',
  97. '#',
  98. '&',
  99. '\'',
  100. '(',
  101. ')',
  102. '*',
  103. '+',
  104. ',',
  105. '-',
  106. '.',
  107. '/',
  108. ':',
  109. ';',
  110. '<',
  111. '>',
  112. '?',
  113. '@',
  114. '[',
  115. '\\',
  116. ']',
  117. '^',
  118. '{',
  119. '|',
  120. '}',
  121. '~',
  122. '¡',
  123. '¢',
  124. '£',
  125. '¤',
  126. '¥',
  127. '¦',
  128. '§',
  129. '¨',
  130. '©',
  131. 'ª',
  132. '«',
  133. '¬',
  134. '®',
  135. '¯',
  136. '°',
  137. '±',
  138. '²',
  139. '³',
  140. '´',
  141. 'µ',
  142. '¶',
  143. '·',
  144. '¸',
  145. '¹',
  146. 'º',
  147. '»',
  148. '¼',
  149. '½',
  150. '¾',
  151. '¿',
  152. '×',
  153. 'ƒ',
  154. 'ˆ',
  155. '˜',
  156. '–',
  157. '—',
  158. '―',
  159. '‘',
  160. '’',
  161. '‚',
  162. '“',
  163. '”',
  164. '„',
  165. '†',
  166. '‡',
  167. '•',
  168. '‣',
  169. '…',
  170. '‰',
  171. '′',
  172. '″',
  173. '‹',
  174. '›',
  175. '‼',
  176. '‾',
  177. '⁄',
  178. '€',
  179. '™',
  180. '←',
  181. '↑',
  182. '→',
  183. '↓',
  184. '↔',
  185. '↵',
  186. '⇐',
  187. '⇑',
  188. '⇒',
  189. '⇓',
  190. '⇔',
  191. '∀',
  192. '∂',
  193. '∃',
  194. '∅',
  195. '∇',
  196. '∈',
  197. '∉',
  198. '∋',
  199. '∏',
  200. '∑',
  201. '−',
  202. '∗',
  203. '√',
  204. '∝',
  205. '∞',
  206. '∠',
  207. '∧',
  208. '∨',
  209. '∩',
  210. '∪',
  211. '∫',
  212. '∴',
  213. '∼',
  214. '≅',
  215. '≈',
  216. '≠',
  217. '≡',
  218. '≤',
  219. '≥',
  220. '⊂',
  221. '⊃',
  222. '⊄',
  223. '⊆',
  224. '⊇',
  225. '⊕',
  226. '⊗',
  227. '⊥',
  228. '⋅',
  229. '⌈',
  230. '⌉',
  231. '⌊',
  232. '⌋',
  233. '〈',
  234. '〉',
  235. '◊',
  236. '♠',
  237. '♣',
  238. '♥',
  239. '♦'
  240. ];
  241. }
  242. return $asciiPunctuation;
  243. }
  244. }