PageRenderTime 44ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/core/modules/search/src/SearchQuery.php

https://gitlab.com/geeta7/drupal
PHP | 653 lines | 256 code | 72 blank | 325 comment | 51 complexity | 4139e13490816962beff5275d8be1ce2 MD5 | raw file
  1. <?php
  2. /**
  3. * @file
  4. * Contains \Drupal\search\SearchQuery.
  5. *
  6. * Search query extender and helper functions.
  7. */
  8. namespace Drupal\search;
  9. use Drupal\Component\Utility\Unicode;
  10. use Drupal\Core\Database\Query\SelectExtender;
  11. use Drupal\Core\Database\Query\SelectInterface;
  12. /**
  13. * Performs a query on the full-text search index for a word or words.
  14. *
  15. * This query is used by search plugins that use the search index (not all
  16. * search plugins do, as some use a different searching mechanism). It
  17. * assumes you have set up a query on the {search_index} table with alias 'i',
  18. * and will only work if the user is searching for at least one "positive"
  19. * keyword or phrase.
  20. *
  21. * For efficiency, users of this query can run the prepareAndNormalize()
  22. * method to figure out if there are any search results, before fully setting
  23. * up and calling execute() to execute the query. The scoring expressions are
  24. * not needed until the execute() step. However, it's not really necessary
  25. * to do this, because this class's execute() method does that anyway.
  26. *
  27. * During both the prepareAndNormalize() and execute() steps, there can be
  28. * problems. Call getStatus() to figure out if the query is OK or not.
  29. *
  30. * The query object is given the tag 'search_$type' and can be further
  31. * extended with hook_query_alter().
  32. */
  33. class SearchQuery extends SelectExtender {
  34. /**
  35. * Indicates no positive keywords were in the search expression.
  36. *
  37. * Positive keywords are words that are searched for, as opposed to negative
  38. * keywords, which are words that are excluded. To count as a keyword, a
  39. * word must be at least
  40. * \Drupal::config('search.settings')->get('index.minimum_word_size')
  41. * characters.
  42. *
  43. * @see SearchQuery::getStatus()
  44. */
  45. const NO_POSITIVE_KEYWORDS = 1;
  46. /**
  47. * Indicates that part of the search expression was ignored.
  48. *
  49. * To prevent Denial of Service attacks, only
  50. * \Drupal::config('search.settings')->get('and_or_limit') expressions
  51. * (positive keywords, phrases, negative keywords) are allowed; this flag
  52. * indicates that expressions existed past that limit and they were removed.
  53. *
  54. * @see SearchQuery::getStatus()
  55. */
  56. const EXPRESSIONS_IGNORED = 2;
  57. /**
  58. * Indicates that lower-case "or" was in the search expression.
  59. *
  60. * The word "or" in lower case was found in the search expression. This
  61. * probably means someone was trying to do an OR search but used lower-case
  62. * instead of upper-case.
  63. *
  64. * @see SearchQuery::getStatus()
  65. */
  66. const LOWER_CASE_OR = 4;
  67. /**
  68. * Indicates that no positive keyword matches were found.
  69. *
  70. * @see SearchQuery::getStatus()
  71. */
  72. const NO_KEYWORD_MATCHES = 8;
  73. /**
  74. * The keywords and advanced search options that are entered by the user.
  75. *
  76. * @var string
  77. */
  78. protected $searchExpression;
  79. /**
  80. * The type of search (search type).
  81. *
  82. * This maps to the value of the type column in search_index, and is usually
  83. * equal to the machine-readable name of the plugin or the search page.
  84. *
  85. * @var string
  86. */
  87. protected $type;
  88. /**
  89. * Parsed-out positive and negative search keys.
  90. *
  91. * @var array
  92. */
  93. protected $keys = array('positive' => array(), 'negative' => array());
  94. /**
  95. * Indicates whether the query conditions are simple or complex (LIKE).
  96. *
  97. * @var bool
  98. */
  99. protected $simple = TRUE;
  100. /**
  101. * Conditions that are used for exact searches.
  102. *
  103. * This is always used for the second step in the query, but is not part of
  104. * the preparation step unless $this->simple is FALSE.
  105. *
  106. * @var DatabaseCondition
  107. */
  108. protected $conditions;
  109. /**
  110. * Indicates how many matches for a search query are necessary.
  111. *
  112. * @var int
  113. */
  114. protected $matches = 0;
  115. /**
  116. * Array of positive search words.
  117. *
  118. * These words have to match against {search_index}.word.
  119. *
  120. * @var array
  121. */
  122. protected $words = array();
  123. /**
  124. * Multiplier to normalize the keyword score.
  125. *
  126. * This value is calculated by the preparation step, and is used as a
  127. * multiplier of the word scores to make sure they are between 0 and 1.
  128. *
  129. * @var float
  130. */
  131. protected $normalize = 0;
  132. /**
  133. * Indicates whether the preparation step has been executed.
  134. *
  135. * @var bool
  136. */
  137. protected $executedPrepare = FALSE;
  138. /**
  139. * A bitmap of status conditions, described in getStatus().
  140. *
  141. * @var int
  142. *
  143. * @see SearchQuery::getStatus()
  144. */
  145. protected $status = 0;
  146. /**
  147. * The word score expressions.
  148. *
  149. * @var array
  150. *
  151. * @see SearchQuery::addScore()
  152. */
  153. protected $scores = array();
  154. /**
  155. * Arguments for the score expressions.
  156. *
  157. * @var array
  158. */
  159. protected $scoresArguments = array();
  160. /**
  161. * The number of 'i.relevance' occurrences in score expressions.
  162. *
  163. * @var int
  164. */
  165. protected $relevance_count = 0;
  166. /**
  167. * Multipliers for score expressions.
  168. *
  169. * @var array
  170. */
  171. protected $multiply = array();
  172. /**
  173. * Sets the search query expression.
  174. *
  175. * @param string $expression
  176. * A search string, which can contain keywords and options.
  177. * @param string $type
  178. * The search type. This maps to {search_index}.type in the database.
  179. *
  180. * @return $this
  181. */
  182. public function searchExpression($expression, $type) {
  183. $this->searchExpression = $expression;
  184. $this->type = $type;
  185. // Add query tag.
  186. $this->addTag('search_' . $type);
  187. // Initialize conditions and status.
  188. $this->conditions = db_and();
  189. $this->status = 0;
  190. return $this;
  191. }
  192. /**
  193. * Parses the search query into SQL conditions.
  194. *
  195. * Sets up the following variables:
  196. * - $this->keys
  197. * - $this->words
  198. * - $this->conditions
  199. * - $this->simple
  200. * - $this->matches
  201. */
  202. protected function parseSearchExpression() {
  203. // Matches words optionally prefixed by a - sign. A word in this case is
  204. // something between two spaces, optionally quoted.
  205. preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' . $this->searchExpression , $keywords, PREG_SET_ORDER);
  206. if (count($keywords) == 0) {
  207. return;
  208. }
  209. // Classify tokens.
  210. $in_or = FALSE;
  211. $limit_combinations = \Drupal::config('search.settings')->get('and_or_limit');
  212. // The first search expression does not count as AND.
  213. $and_count = -1;
  214. $or_count = 0;
  215. foreach ($keywords as $match) {
  216. if ($or_count && $and_count + $or_count >= $limit_combinations) {
  217. // Ignore all further search expressions to prevent Denial-of-Service
  218. // attacks using a high number of AND/OR combinations.
  219. $this->status |= SearchQuery::EXPRESSIONS_IGNORED;
  220. break;
  221. }
  222. // Strip off phrase quotes.
  223. $phrase = FALSE;
  224. if ($match[2]{0} == '"') {
  225. $match[2] = substr($match[2], 1, -1);
  226. $phrase = TRUE;
  227. $this->simple = FALSE;
  228. }
  229. // Simplify keyword according to indexing rules and external
  230. // preprocessors. Use same process as during search indexing, so it
  231. // will match search index.
  232. $words = search_simplify($match[2]);
  233. // Re-explode in case simplification added more words, except when
  234. // matching a phrase.
  235. $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
  236. // Negative matches.
  237. if ($match[1] == '-') {
  238. $this->keys['negative'] = array_merge($this->keys['negative'], $words);
  239. }
  240. // OR operator: instead of a single keyword, we store an array of all
  241. // OR'd keywords.
  242. elseif ($match[2] == 'OR' && count($this->keys['positive'])) {
  243. $last = array_pop($this->keys['positive']);
  244. // Starting a new OR?
  245. if (!is_array($last)) {
  246. $last = array($last);
  247. }
  248. $this->keys['positive'][] = $last;
  249. $in_or = TRUE;
  250. $or_count++;
  251. continue;
  252. }
  253. // AND operator: implied, so just ignore it.
  254. elseif ($match[2] == 'AND' || $match[2] == 'and') {
  255. continue;
  256. }
  257. // Plain keyword.
  258. else {
  259. if ($match[2] == 'or') {
  260. // Lower-case "or" instead of "OR" is a warning condition.
  261. $this->status |= SearchQuery::LOWER_CASE_OR;
  262. }
  263. if ($in_or) {
  264. // Add to last element (which is an array).
  265. $this->keys['positive'][count($this->keys['positive']) - 1] = array_merge($this->keys['positive'][count($this->keys['positive']) - 1], $words);
  266. }
  267. else {
  268. $this->keys['positive'] = array_merge($this->keys['positive'], $words);
  269. $and_count++;
  270. }
  271. }
  272. $in_or = FALSE;
  273. }
  274. // Convert keywords into SQL statements.
  275. $has_and = FALSE;
  276. $has_or = FALSE;
  277. // Positive matches.
  278. foreach ($this->keys['positive'] as $key) {
  279. // Group of ORed terms.
  280. if (is_array($key) && count($key)) {
  281. // If we had already found one OR, this is another one AND-ed with the
  282. // first, meaning it is not a simple query.
  283. if ($has_or) {
  284. $this->simple = FALSE;
  285. }
  286. $has_or = TRUE;
  287. $has_new_scores = FALSE;
  288. $queryor = db_or();
  289. foreach ($key as $or) {
  290. list($num_new_scores) = $this->parseWord($or);
  291. $has_new_scores |= $num_new_scores;
  292. $queryor->condition('d.data', "% $or %", 'LIKE');
  293. }
  294. if (count($queryor)) {
  295. $this->conditions->condition($queryor);
  296. // A group of OR keywords only needs to match once.
  297. $this->matches += ($has_new_scores > 0);
  298. }
  299. }
  300. // Single ANDed term.
  301. else {
  302. $has_and = TRUE;
  303. list($num_new_scores, $num_valid_words) = $this->parseWord($key);
  304. $this->conditions->condition('d.data', "% $key %", 'LIKE');
  305. if (!$num_valid_words) {
  306. $this->simple = FALSE;
  307. }
  308. // Each AND keyword needs to match at least once.
  309. $this->matches += $num_new_scores;
  310. }
  311. }
  312. if ($has_and && $has_or) {
  313. $this->simple = FALSE;
  314. }
  315. // Negative matches.
  316. foreach ($this->keys['negative'] as $key) {
  317. $this->conditions->condition('d.data', "% $key %", 'NOT LIKE');
  318. $this->simple = FALSE;
  319. }
  320. }
  321. /**
  322. * Parses a word or phrase for parseQuery().
  323. *
  324. * Splits a phrase into words. Adds its words to $this->words, if it is not
  325. * already there. Returns a list containing the number of new words found,
  326. * and the total number of words in the phrase.
  327. */
  328. protected function parseWord($word) {
  329. $num_new_scores = 0;
  330. $num_valid_words = 0;
  331. // Determine the scorewords of this word/phrase.
  332. $split = explode(' ', $word);
  333. foreach ($split as $s) {
  334. $num = is_numeric($s);
  335. if ($num || Unicode::strlen($s) >= \Drupal::config('search.settings')->get('index.minimum_word_size')) {
  336. if (!isset($this->words[$s])) {
  337. $this->words[$s] = $s;
  338. $num_new_scores++;
  339. }
  340. $num_valid_words++;
  341. }
  342. }
  343. // Return matching snippet and number of added words.
  344. return array($num_new_scores, $num_valid_words);
  345. }
  346. /**
  347. * Prepares the query and calculates the normalization factor.
  348. *
  349. * After the query is normalized the keywords are weighted to give the results
  350. * a relevancy score. The query is ready for execution after this.
  351. *
  352. * Error and warning conditions can apply. Call getStatus() after calling
  353. * this method to retrieve them.
  354. *
  355. * @return bool
  356. * TRUE if at least one keyword matched the search index; FALSE if not.
  357. */
  358. public function prepareAndNormalize() {
  359. $this->parseSearchExpression();
  360. $this->executedPrepare = TRUE;
  361. if (count($this->words) == 0) {
  362. // Although the query could proceed, there is no point in joining
  363. // with other tables and attempting to normalize if there are no
  364. // keywords present.
  365. $this->status |= SearchQuery::NO_POSITIVE_KEYWORDS;
  366. return FALSE;
  367. }
  368. // Build the basic search query: match the entered keywords.
  369. $or = db_or();
  370. foreach ($this->words as $word) {
  371. $or->condition('i.word', $word);
  372. }
  373. $this->condition($or);
  374. // Add keyword normalization information to the query.
  375. $this->join('search_total', 't', 'i.word = t.word');
  376. $this
  377. ->condition('i.type', $this->type)
  378. ->groupBy('i.type')
  379. ->groupBy('i.sid');
  380. // If the query is simple, we should have calculated the number of
  381. // matching words we need to find, so impose that criterion. For non-
  382. // simple queries, this condition could lead to incorrectly deciding not
  383. // to continue with the full query.
  384. if ($this->simple) {
  385. $this->having('COUNT(*) >= :matches', array(':matches' => $this->matches));
  386. }
  387. // Clone the query object to calculate normalization.
  388. $normalize_query = clone $this->query;
  389. // For complex search queries, add the LIKE conditions; if the query is
  390. // simple, we do not need them for normalization.
  391. if (!$this->simple) {
  392. $normalize_query->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type AND i.langcode = d.langcode');
  393. if (count($this->conditions)) {
  394. $normalize_query->condition($this->conditions);
  395. }
  396. }
  397. // Calculate normalization, which is the max of all the search scores for
  398. // positive keywords in the query. And note that the query could have other
  399. // fields added to it by the user of this extension.
  400. $normalize_query->addExpression('SUM(i.score * t.count)', 'calculated_score');
  401. $result = $normalize_query
  402. ->range(0, 1)
  403. ->orderBy('calculated_score', 'DESC')
  404. ->execute()
  405. ->fetchObject();
  406. if (isset($result->calculated_score)) {
  407. $this->normalize = (float) $result->calculated_score;
  408. }
  409. if ($this->normalize) {
  410. return TRUE;
  411. }
  412. // If the normalization value was zero, that indicates there were no
  413. // matches to the supplied positive keywords.
  414. $this->status |= SearchQuery::NO_KEYWORD_MATCHES;
  415. return FALSE;
  416. }
  417. /**
  418. * {@inheritdoc}
  419. */
  420. public function preExecute(SelectInterface $query = NULL) {
  421. if (!$this->executedPrepare) {
  422. $this->prepareAndNormalize();
  423. }
  424. if (!$this->normalize) {
  425. return FALSE;
  426. }
  427. return parent::preExecute($query);
  428. }
  429. /**
  430. * Adds a custom score expression to the search query.
  431. *
  432. * Score expressions are used to order search results. If no calls to
  433. * addScore() have taken place, a default keyword relevance score will be
  434. * used. However, if at least one call to addScore() has taken place, the
  435. * keyword relevance score is not automatically added.
  436. *
  437. * Note that you must use this method to add ordering to your searches, and
  438. * not call orderBy() directly, when using the SearchQuery extender. This is
  439. * because of the two-pass system the SearchQuery class uses to normalize
  440. * scores.
  441. *
  442. * @param string $score
  443. * The score expression, which should evaluate to a number between 0 and 1.
  444. * The string 'i.relevance' in a score expression will be replaced by a
  445. * measure of keyword relevance between 0 and 1.
  446. * @param array $arguments
  447. * Query arguments needed to provide values to the score expression.
  448. * @param float $multiply
  449. * If set, the score is multiplied with this value. However, all scores
  450. * with multipliers are then divided by the total of all multipliers, so
  451. * that overall, the normalization is maintained.
  452. *
  453. * @return $this
  454. */
  455. public function addScore($score, $arguments = array(), $multiply = FALSE) {
  456. if ($multiply) {
  457. $i = count($this->multiply);
  458. // Modify the score expression so it is multiplied by the multiplier,
  459. // with a divisor to renormalize. Note that the ROUND here is necessary
  460. // for PostgreSQL and SQLite in order to ensure that the :multiply_* and
  461. // :total_* arguments are treated as a numeric type, because the
  462. // PostgreSQL PDO driver sometimes puts values in as strings instead of
  463. // numbers in complex expressions like this.
  464. $score = "(ROUND(:multiply_$i, 4)) * COALESCE(($score), 0) / (ROUND(:total_$i, 4))";
  465. // Add an argument for the multiplier. The :total_$i argument is taken
  466. // care of in the execute() method, which is when the total divisor is
  467. // calculated.
  468. $arguments[':multiply_' . $i] = $multiply;
  469. $this->multiply[] = $multiply;
  470. }
  471. // Search scoring needs a way to include a keyword relevance in the score.
  472. // For historical reasons, this is done by putting 'i.relevance' into the
  473. // search expression. So, use string replacement to change this to a
  474. // calculated query expression, counting the number of occurrences so
  475. // in the execute() method we can add arguments.
  476. while (($pos = strpos($score, 'i.relevance')) !== FALSE) {
  477. $pieces = explode('i.relevance', $score, 2);
  478. $score = implode('((ROUND(:normalization_' . $this->relevance_count . ', 4)) * i.score * t.count)', $pieces);
  479. $this->relevance_count++;
  480. }
  481. $this->scores[] = $score;
  482. $this->scoresArguments += $arguments;
  483. return $this;
  484. }
  485. /**
  486. * Executes the search.
  487. *
  488. * The complex conditions are applied to the query including score
  489. * expressions and ordering.
  490. *
  491. * Error and warning conditions can apply. Call getStatus() after calling
  492. * this method to retrieve them.
  493. *
  494. * @return \Drupal\Core\Database\StatementInterface|null
  495. * A query result set containing the results of the query.
  496. */
  497. public function execute() {
  498. if (!$this->preExecute($this)) {
  499. return NULL;
  500. }
  501. // Add conditions to the query.
  502. $this->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type AND i.langcode = d.langcode');
  503. if (count($this->conditions)) {
  504. $this->condition($this->conditions);
  505. }
  506. // Add default score (keyword relevance) if there are not any defined.
  507. if (empty($this->scores)) {
  508. $this->addScore('i.relevance');
  509. }
  510. if (count($this->multiply)) {
  511. // Re-normalize scores with multipliers by dividing by the total of all
  512. // multipliers. The expressions were altered in addScore(), so here just
  513. // add the arguments for the total.
  514. $sum = array_sum($this->multiply);
  515. for ($i = 0; $i < count($this->multiply); $i++) {
  516. $this->scoresArguments[':total_' . $i] = $sum;
  517. }
  518. }
  519. // Add arguments for the keyword relevance normalization number.
  520. $normalization = 1.0 / $this->normalize;
  521. for ($i = 0; $i < $this->relevance_count; $i++ ) {
  522. $this->scoresArguments[':normalization_' . $i] = $normalization;
  523. }
  524. // Add all scores together to form a query field.
  525. $this->addExpression('SUM(' . implode(' + ', $this->scores) . ')', 'calculated_score', $this->scoresArguments);
  526. // If an order has not yet been set for this query, add a default order
  527. // that sorts by the calculated sum of scores.
  528. if (count($this->getOrderBy()) == 0) {
  529. $this->orderBy('calculated_score', 'DESC');
  530. }
  531. // Add query metadata.
  532. $this
  533. ->addMetaData('normalize', $this->normalize)
  534. ->fields('i', array('type', 'sid'));
  535. return $this->query->execute();
  536. }
  537. /**
  538. * Builds the default count query for SearchQuery.
  539. *
  540. * Since SearchQuery always uses GROUP BY, we can default to a subquery. We
  541. * also add the same conditions as execute() because countQuery() is called
  542. * first.
  543. */
  544. public function countQuery() {
  545. if (!$this->executedPrepare) {
  546. $this->prepareAndNormalize();
  547. }
  548. // Clone the inner query.
  549. $inner = clone $this->query;
  550. // Add conditions to query.
  551. $inner->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
  552. if (count($this->conditions)) {
  553. $inner->condition($this->conditions);
  554. }
  555. // Remove existing fields and expressions, they are not needed for a count
  556. // query.
  557. $fields =& $inner->getFields();
  558. $fields = array();
  559. $expressions =& $inner->getExpressions();
  560. $expressions = array();
  561. // Add sid as the only field and count them as a subquery.
  562. $count = db_select($inner->fields('i', array('sid')), NULL, array('target' => 'replica'));
  563. // Add the COUNT() expression.
  564. $count->addExpression('COUNT(*)');
  565. return $count;
  566. }
  567. /**
  568. * Returns the query status bitmap.
  569. *
  570. * @return int
  571. * A bitmap indicating query status. Zero indicates there were no problems.
  572. * A non-zero value is a combination of one or more of the following flags:
  573. * - SearchQuery::NO_POSITIVE_KEYWORDS
  574. * - SearchQuery::EXPRESSIONS_IGNORED
  575. * - SearchQuery::LOWER_CASE_OR
  576. * - SearchQuery::NO_KEYWORD_MATCHES
  577. */
  578. public function getStatus() {
  579. return $this->status;
  580. }
  581. }