PageRenderTime 43ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/core/modules/search/src/SearchQuery.php

https://gitlab.com/reasonat/test8
PHP | 648 lines | 256 code | 71 blank | 321 comment | 51 complexity | 32eb9ef08546f8ec03892f7b61ae643c MD5 | raw file
  1. <?php
  2. namespace Drupal\search;
  3. use Drupal\Component\Utility\Unicode;
  4. use Drupal\Core\Database\Query\SelectExtender;
  5. use Drupal\Core\Database\Query\SelectInterface;
  6. /**
  7. * Search query extender and helper functions.
  8. *
  9. * Performs a query on the full-text search index for a word or words.
  10. *
  11. * This query is used by search plugins that use the search index (not all
  12. * search plugins do, as some use a different searching mechanism). It
  13. * assumes you have set up a query on the {search_index} table with alias 'i',
  14. * and will only work if the user is searching for at least one "positive"
  15. * keyword or phrase.
  16. *
  17. * For efficiency, users of this query can run the prepareAndNormalize()
  18. * method to figure out if there are any search results, before fully setting
  19. * up and calling execute() to execute the query. The scoring expressions are
  20. * not needed until the execute() step. However, it's not really necessary
  21. * to do this, because this class's execute() method does that anyway.
  22. *
  23. * During both the prepareAndNormalize() and execute() steps, there can be
  24. * problems. Call getStatus() to figure out if the query is OK or not.
  25. *
  26. * The query object is given the tag 'search_$type' and can be further
  27. * extended with hook_query_alter().
  28. */
  29. class SearchQuery extends SelectExtender {
  30. /**
  31. * Indicates no positive keywords were in the search expression.
  32. *
  33. * Positive keywords are words that are searched for, as opposed to negative
  34. * keywords, which are words that are excluded. To count as a keyword, a
  35. * word must be at least
  36. * \Drupal::config('search.settings')->get('index.minimum_word_size')
  37. * characters.
  38. *
  39. * @see SearchQuery::getStatus()
  40. */
  41. const NO_POSITIVE_KEYWORDS = 1;
  42. /**
  43. * Indicates that part of the search expression was ignored.
  44. *
  45. * To prevent Denial of Service attacks, only
  46. * \Drupal::config('search.settings')->get('and_or_limit') expressions
  47. * (positive keywords, phrases, negative keywords) are allowed; this flag
  48. * indicates that expressions existed past that limit and they were removed.
  49. *
  50. * @see SearchQuery::getStatus()
  51. */
  52. const EXPRESSIONS_IGNORED = 2;
  53. /**
  54. * Indicates that lower-case "or" was in the search expression.
  55. *
  56. * The word "or" in lower case was found in the search expression. This
  57. * probably means someone was trying to do an OR search but used lower-case
  58. * instead of upper-case.
  59. *
  60. * @see SearchQuery::getStatus()
  61. */
  62. const LOWER_CASE_OR = 4;
  63. /**
  64. * Indicates that no positive keyword matches were found.
  65. *
  66. * @see SearchQuery::getStatus()
  67. */
  68. const NO_KEYWORD_MATCHES = 8;
  69. /**
  70. * The keywords and advanced search options that are entered by the user.
  71. *
  72. * @var string
  73. */
  74. protected $searchExpression;
  75. /**
  76. * The type of search (search type).
  77. *
  78. * This maps to the value of the type column in search_index, and is usually
  79. * equal to the machine-readable name of the plugin or the search page.
  80. *
  81. * @var string
  82. */
  83. protected $type;
  84. /**
  85. * Parsed-out positive and negative search keys.
  86. *
  87. * @var array
  88. */
  89. protected $keys = array('positive' => array(), 'negative' => array());
  90. /**
  91. * Indicates whether the query conditions are simple or complex (LIKE).
  92. *
  93. * @var bool
  94. */
  95. protected $simple = TRUE;
  96. /**
  97. * Conditions that are used for exact searches.
  98. *
  99. * This is always used for the second step in the query, but is not part of
  100. * the preparation step unless $this->simple is FALSE.
  101. *
  102. * @var DatabaseCondition
  103. */
  104. protected $conditions;
  105. /**
  106. * Indicates how many matches for a search query are necessary.
  107. *
  108. * @var int
  109. */
  110. protected $matches = 0;
  111. /**
  112. * Array of positive search words.
  113. *
  114. * These words have to match against {search_index}.word.
  115. *
  116. * @var array
  117. */
  118. protected $words = array();
  119. /**
  120. * Multiplier to normalize the keyword score.
  121. *
  122. * This value is calculated by the preparation step, and is used as a
  123. * multiplier of the word scores to make sure they are between 0 and 1.
  124. *
  125. * @var float
  126. */
  127. protected $normalize = 0;
  128. /**
  129. * Indicates whether the preparation step has been executed.
  130. *
  131. * @var bool
  132. */
  133. protected $executedPrepare = FALSE;
  134. /**
  135. * A bitmap of status conditions, described in getStatus().
  136. *
  137. * @var int
  138. *
  139. * @see SearchQuery::getStatus()
  140. */
  141. protected $status = 0;
  142. /**
  143. * The word score expressions.
  144. *
  145. * @var array
  146. *
  147. * @see SearchQuery::addScore()
  148. */
  149. protected $scores = array();
  150. /**
  151. * Arguments for the score expressions.
  152. *
  153. * @var array
  154. */
  155. protected $scoresArguments = array();
  156. /**
  157. * The number of 'i.relevance' occurrences in score expressions.
  158. *
  159. * @var int
  160. */
  161. protected $relevance_count = 0;
  162. /**
  163. * Multipliers for score expressions.
  164. *
  165. * @var array
  166. */
  167. protected $multiply = array();
  168. /**
  169. * Sets the search query expression.
  170. *
  171. * @param string $expression
  172. * A search string, which can contain keywords and options.
  173. * @param string $type
  174. * The search type. This maps to {search_index}.type in the database.
  175. *
  176. * @return $this
  177. */
  178. public function searchExpression($expression, $type) {
  179. $this->searchExpression = $expression;
  180. $this->type = $type;
  181. // Add query tag.
  182. $this->addTag('search_' . $type);
  183. // Initialize conditions and status.
  184. $this->conditions = db_and();
  185. $this->status = 0;
  186. return $this;
  187. }
  188. /**
  189. * Parses the search query into SQL conditions.
  190. *
  191. * Sets up the following variables:
  192. * - $this->keys
  193. * - $this->words
  194. * - $this->conditions
  195. * - $this->simple
  196. * - $this->matches
  197. */
  198. protected function parseSearchExpression() {
  199. // Matches words optionally prefixed by a - sign. A word in this case is
  200. // something between two spaces, optionally quoted.
  201. preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' . $this->searchExpression, $keywords, PREG_SET_ORDER);
  202. if (count($keywords) == 0) {
  203. return;
  204. }
  205. // Classify tokens.
  206. $in_or = FALSE;
  207. $limit_combinations = \Drupal::config('search.settings')->get('and_or_limit');
  208. // The first search expression does not count as AND.
  209. $and_count = -1;
  210. $or_count = 0;
  211. foreach ($keywords as $match) {
  212. if ($or_count && $and_count + $or_count >= $limit_combinations) {
  213. // Ignore all further search expressions to prevent Denial-of-Service
  214. // attacks using a high number of AND/OR combinations.
  215. $this->status |= SearchQuery::EXPRESSIONS_IGNORED;
  216. break;
  217. }
  218. // Strip off phrase quotes.
  219. $phrase = FALSE;
  220. if ($match[2]{0} == '"') {
  221. $match[2] = substr($match[2], 1, -1);
  222. $phrase = TRUE;
  223. $this->simple = FALSE;
  224. }
  225. // Simplify keyword according to indexing rules and external
  226. // preprocessors. Use same process as during search indexing, so it
  227. // will match search index.
  228. $words = search_simplify($match[2]);
  229. // Re-explode in case simplification added more words, except when
  230. // matching a phrase.
  231. $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
  232. // Negative matches.
  233. if ($match[1] == '-') {
  234. $this->keys['negative'] = array_merge($this->keys['negative'], $words);
  235. }
  236. // OR operator: instead of a single keyword, we store an array of all
  237. // OR'd keywords.
  238. elseif ($match[2] == 'OR' && count($this->keys['positive'])) {
  239. $last = array_pop($this->keys['positive']);
  240. // Starting a new OR?
  241. if (!is_array($last)) {
  242. $last = array($last);
  243. }
  244. $this->keys['positive'][] = $last;
  245. $in_or = TRUE;
  246. $or_count++;
  247. continue;
  248. }
  249. // AND operator: implied, so just ignore it.
  250. elseif ($match[2] == 'AND' || $match[2] == 'and') {
  251. continue;
  252. }
  253. // Plain keyword.
  254. else {
  255. if ($match[2] == 'or') {
  256. // Lower-case "or" instead of "OR" is a warning condition.
  257. $this->status |= SearchQuery::LOWER_CASE_OR;
  258. }
  259. if ($in_or) {
  260. // Add to last element (which is an array).
  261. $this->keys['positive'][count($this->keys['positive']) - 1] = array_merge($this->keys['positive'][count($this->keys['positive']) - 1], $words);
  262. }
  263. else {
  264. $this->keys['positive'] = array_merge($this->keys['positive'], $words);
  265. $and_count++;
  266. }
  267. }
  268. $in_or = FALSE;
  269. }
  270. // Convert keywords into SQL statements.
  271. $has_and = FALSE;
  272. $has_or = FALSE;
  273. // Positive matches.
  274. foreach ($this->keys['positive'] as $key) {
  275. // Group of ORed terms.
  276. if (is_array($key) && count($key)) {
  277. // If we had already found one OR, this is another one AND-ed with the
  278. // first, meaning it is not a simple query.
  279. if ($has_or) {
  280. $this->simple = FALSE;
  281. }
  282. $has_or = TRUE;
  283. $has_new_scores = FALSE;
  284. $queryor = db_or();
  285. foreach ($key as $or) {
  286. list($num_new_scores) = $this->parseWord($or);
  287. $has_new_scores |= $num_new_scores;
  288. $queryor->condition('d.data', "% $or %", 'LIKE');
  289. }
  290. if (count($queryor)) {
  291. $this->conditions->condition($queryor);
  292. // A group of OR keywords only needs to match once.
  293. $this->matches += ($has_new_scores > 0);
  294. }
  295. }
  296. // Single ANDed term.
  297. else {
  298. $has_and = TRUE;
  299. list($num_new_scores, $num_valid_words) = $this->parseWord($key);
  300. $this->conditions->condition('d.data', "% $key %", 'LIKE');
  301. if (!$num_valid_words) {
  302. $this->simple = FALSE;
  303. }
  304. // Each AND keyword needs to match at least once.
  305. $this->matches += $num_new_scores;
  306. }
  307. }
  308. if ($has_and && $has_or) {
  309. $this->simple = FALSE;
  310. }
  311. // Negative matches.
  312. foreach ($this->keys['negative'] as $key) {
  313. $this->conditions->condition('d.data', "% $key %", 'NOT LIKE');
  314. $this->simple = FALSE;
  315. }
  316. }
  317. /**
  318. * Parses a word or phrase for parseQuery().
  319. *
  320. * Splits a phrase into words. Adds its words to $this->words, if it is not
  321. * already there. Returns a list containing the number of new words found,
  322. * and the total number of words in the phrase.
  323. */
  324. protected function parseWord($word) {
  325. $num_new_scores = 0;
  326. $num_valid_words = 0;
  327. // Determine the scorewords of this word/phrase.
  328. $split = explode(' ', $word);
  329. foreach ($split as $s) {
  330. $num = is_numeric($s);
  331. if ($num || Unicode::strlen($s) >= \Drupal::config('search.settings')->get('index.minimum_word_size')) {
  332. if (!isset($this->words[$s])) {
  333. $this->words[$s] = $s;
  334. $num_new_scores++;
  335. }
  336. $num_valid_words++;
  337. }
  338. }
  339. // Return matching snippet and number of added words.
  340. return array($num_new_scores, $num_valid_words);
  341. }
  342. /**
  343. * Prepares the query and calculates the normalization factor.
  344. *
  345. * After the query is normalized the keywords are weighted to give the results
  346. * a relevancy score. The query is ready for execution after this.
  347. *
  348. * Error and warning conditions can apply. Call getStatus() after calling
  349. * this method to retrieve them.
  350. *
  351. * @return bool
  352. * TRUE if at least one keyword matched the search index; FALSE if not.
  353. */
  354. public function prepareAndNormalize() {
  355. $this->parseSearchExpression();
  356. $this->executedPrepare = TRUE;
  357. if (count($this->words) == 0) {
  358. // Although the query could proceed, there is no point in joining
  359. // with other tables and attempting to normalize if there are no
  360. // keywords present.
  361. $this->status |= SearchQuery::NO_POSITIVE_KEYWORDS;
  362. return FALSE;
  363. }
  364. // Build the basic search query: match the entered keywords.
  365. $or = db_or();
  366. foreach ($this->words as $word) {
  367. $or->condition('i.word', $word);
  368. }
  369. $this->condition($or);
  370. // Add keyword normalization information to the query.
  371. $this->join('search_total', 't', 'i.word = t.word');
  372. $this
  373. ->condition('i.type', $this->type)
  374. ->groupBy('i.type')
  375. ->groupBy('i.sid');
  376. // If the query is simple, we should have calculated the number of
  377. // matching words we need to find, so impose that criterion. For non-
  378. // simple queries, this condition could lead to incorrectly deciding not
  379. // to continue with the full query.
  380. if ($this->simple) {
  381. $this->having('COUNT(*) >= :matches', array(':matches' => $this->matches));
  382. }
  383. // Clone the query object to calculate normalization.
  384. $normalize_query = clone $this->query;
  385. // For complex search queries, add the LIKE conditions; if the query is
  386. // simple, we do not need them for normalization.
  387. if (!$this->simple) {
  388. $normalize_query->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type AND i.langcode = d.langcode');
  389. if (count($this->conditions)) {
  390. $normalize_query->condition($this->conditions);
  391. }
  392. }
  393. // Calculate normalization, which is the max of all the search scores for
  394. // positive keywords in the query. And note that the query could have other
  395. // fields added to it by the user of this extension.
  396. $normalize_query->addExpression('SUM(i.score * t.count)', 'calculated_score');
  397. $result = $normalize_query
  398. ->range(0, 1)
  399. ->orderBy('calculated_score', 'DESC')
  400. ->execute()
  401. ->fetchObject();
  402. if (isset($result->calculated_score)) {
  403. $this->normalize = (float) $result->calculated_score;
  404. }
  405. if ($this->normalize) {
  406. return TRUE;
  407. }
  408. // If the normalization value was zero, that indicates there were no
  409. // matches to the supplied positive keywords.
  410. $this->status |= SearchQuery::NO_KEYWORD_MATCHES;
  411. return FALSE;
  412. }
  413. /**
  414. * {@inheritdoc}
  415. */
  416. public function preExecute(SelectInterface $query = NULL) {
  417. if (!$this->executedPrepare) {
  418. $this->prepareAndNormalize();
  419. }
  420. if (!$this->normalize) {
  421. return FALSE;
  422. }
  423. return parent::preExecute($query);
  424. }
  425. /**
  426. * Adds a custom score expression to the search query.
  427. *
  428. * Score expressions are used to order search results. If no calls to
  429. * addScore() have taken place, a default keyword relevance score will be
  430. * used. However, if at least one call to addScore() has taken place, the
  431. * keyword relevance score is not automatically added.
  432. *
  433. * Note that you must use this method to add ordering to your searches, and
  434. * not call orderBy() directly, when using the SearchQuery extender. This is
  435. * because of the two-pass system the SearchQuery class uses to normalize
  436. * scores.
  437. *
  438. * @param string $score
  439. * The score expression, which should evaluate to a number between 0 and 1.
  440. * The string 'i.relevance' in a score expression will be replaced by a
  441. * measure of keyword relevance between 0 and 1.
  442. * @param array $arguments
  443. * Query arguments needed to provide values to the score expression.
  444. * @param float $multiply
  445. * If set, the score is multiplied with this value. However, all scores
  446. * with multipliers are then divided by the total of all multipliers, so
  447. * that overall, the normalization is maintained.
  448. *
  449. * @return $this
  450. */
  451. public function addScore($score, $arguments = array(), $multiply = FALSE) {
  452. if ($multiply) {
  453. $i = count($this->multiply);
  454. // Modify the score expression so it is multiplied by the multiplier,
  455. // with a divisor to renormalize. Note that the ROUND here is necessary
  456. // for PostgreSQL and SQLite in order to ensure that the :multiply_* and
  457. // :total_* arguments are treated as a numeric type, because the
  458. // PostgreSQL PDO driver sometimes puts values in as strings instead of
  459. // numbers in complex expressions like this.
  460. $score = "(ROUND(:multiply_$i, 4)) * COALESCE(($score), 0) / (ROUND(:total_$i, 4))";
  461. // Add an argument for the multiplier. The :total_$i argument is taken
  462. // care of in the execute() method, which is when the total divisor is
  463. // calculated.
  464. $arguments[':multiply_' . $i] = $multiply;
  465. $this->multiply[] = $multiply;
  466. }
  467. // Search scoring needs a way to include a keyword relevance in the score.
  468. // For historical reasons, this is done by putting 'i.relevance' into the
  469. // search expression. So, use string replacement to change this to a
  470. // calculated query expression, counting the number of occurrences so
  471. // in the execute() method we can add arguments.
  472. while (($pos = strpos($score, 'i.relevance')) !== FALSE) {
  473. $pieces = explode('i.relevance', $score, 2);
  474. $score = implode('((ROUND(:normalization_' . $this->relevance_count . ', 4)) * i.score * t.count)', $pieces);
  475. $this->relevance_count++;
  476. }
  477. $this->scores[] = $score;
  478. $this->scoresArguments += $arguments;
  479. return $this;
  480. }
  481. /**
  482. * Executes the search.
  483. *
  484. * The complex conditions are applied to the query including score
  485. * expressions and ordering.
  486. *
  487. * Error and warning conditions can apply. Call getStatus() after calling
  488. * this method to retrieve them.
  489. *
  490. * @return \Drupal\Core\Database\StatementInterface|null
  491. * A query result set containing the results of the query.
  492. */
  493. public function execute() {
  494. if (!$this->preExecute($this)) {
  495. return NULL;
  496. }
  497. // Add conditions to the query.
  498. $this->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type AND i.langcode = d.langcode');
  499. if (count($this->conditions)) {
  500. $this->condition($this->conditions);
  501. }
  502. // Add default score (keyword relevance) if there are not any defined.
  503. if (empty($this->scores)) {
  504. $this->addScore('i.relevance');
  505. }
  506. if (count($this->multiply)) {
  507. // Re-normalize scores with multipliers by dividing by the total of all
  508. // multipliers. The expressions were altered in addScore(), so here just
  509. // add the arguments for the total.
  510. $sum = array_sum($this->multiply);
  511. for ($i = 0; $i < count($this->multiply); $i++) {
  512. $this->scoresArguments[':total_' . $i] = $sum;
  513. }
  514. }
  515. // Add arguments for the keyword relevance normalization number.
  516. $normalization = 1.0 / $this->normalize;
  517. for ($i = 0; $i < $this->relevance_count; $i++ ) {
  518. $this->scoresArguments[':normalization_' . $i] = $normalization;
  519. }
  520. // Add all scores together to form a query field.
  521. $this->addExpression('SUM(' . implode(' + ', $this->scores) . ')', 'calculated_score', $this->scoresArguments);
  522. // If an order has not yet been set for this query, add a default order
  523. // that sorts by the calculated sum of scores.
  524. if (count($this->getOrderBy()) == 0) {
  525. $this->orderBy('calculated_score', 'DESC');
  526. }
  527. // Add query metadata.
  528. $this
  529. ->addMetaData('normalize', $this->normalize)
  530. ->fields('i', array('type', 'sid'));
  531. return $this->query->execute();
  532. }
  533. /**
  534. * Builds the default count query for SearchQuery.
  535. *
  536. * Since SearchQuery always uses GROUP BY, we can default to a subquery. We
  537. * also add the same conditions as execute() because countQuery() is called
  538. * first.
  539. */
  540. public function countQuery() {
  541. if (!$this->executedPrepare) {
  542. $this->prepareAndNormalize();
  543. }
  544. // Clone the inner query.
  545. $inner = clone $this->query;
  546. // Add conditions to query.
  547. $inner->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
  548. if (count($this->conditions)) {
  549. $inner->condition($this->conditions);
  550. }
  551. // Remove existing fields and expressions, they are not needed for a count
  552. // query.
  553. $fields =& $inner->getFields();
  554. $fields = array();
  555. $expressions =& $inner->getExpressions();
  556. $expressions = array();
  557. // Add sid as the only field and count them as a subquery.
  558. $count = db_select($inner->fields('i', array('sid')), NULL, array('target' => 'replica'));
  559. // Add the COUNT() expression.
  560. $count->addExpression('COUNT(*)');
  561. return $count;
  562. }
  563. /**
  564. * Returns the query status bitmap.
  565. *
  566. * @return int
  567. * A bitmap indicating query status. Zero indicates there were no problems.
  568. * A non-zero value is a combination of one or more of the following flags:
  569. * - SearchQuery::NO_POSITIVE_KEYWORDS
  570. * - SearchQuery::EXPRESSIONS_IGNORED
  571. * - SearchQuery::LOWER_CASE_OR
  572. * - SearchQuery::NO_KEYWORD_MATCHES
  573. */
  574. public function getStatus() {
  575. return $this->status;
  576. }
  577. }