PageRenderTime 43ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 0ms

/core/modules/search/lib/Drupal/search/SearchQuery.php

https://bitbucket.org/aswinvk28/smartpan-stock-drupal
PHP | 515 lines | 226 code | 50 blank | 239 comment | 45 complexity | 024f87cc1c3cfffe74bca503907c03c3 MD5 | raw file
Possible License(s): LGPL-2.1
  1. <?php
  2. /**
  3. * @file
  4. * Definition of Drupal\search\SearchQuery.
  5. *
  6. * Search query extender and helper functions.
  7. */
  8. namespace Drupal\search;
  9. use Drupal\Core\Database\Query\SelectExtender;
  10. use Drupal\Core\Database\StatementEmpty;
  11. /**
  12. * Performs a query on the full-text search index for a word or words.
  13. *
  14. * This function is normally only called by each plugin that supports the
  15. * indexed search.
  16. *
  17. * Results are retrieved in two logical passes. However, the two passes are
  18. * joined together into a single query, and in the case of most simple queries
  19. * the second pass is not even used.
  20. *
  21. * The first pass selects a set of all possible matches, which has the benefit
  22. * of also providing the exact result set for simple "AND" or "OR" searches.
  23. *
  24. * The second portion of the query further refines this set by verifying
  25. * advanced text conditions (such as negative or phrase matches).
  26. *
  27. * The used query object has the tag 'search_$type' and can be further
  28. * extended with hook_query_alter().
  29. */
  30. class SearchQuery extends SelectExtender {
  31. /**
  32. * The search query that is used for searching.
  33. *
  34. * @var string
  35. */
  36. protected $searchExpression;
  37. /**
  38. * The type of search (search type).
  39. *
  40. * This maps to the value of the type column in search_index, and is equal
  41. * to the machine-readable name of the entity type being indexed, or other
  42. * identifier provided by a search plugin.
  43. *
  44. * @var string
  45. */
  46. protected $type;
  47. /**
  48. * Positive and negative search keys.
  49. *
  50. * @var array
  51. */
  52. protected $keys = array('positive' => array(), 'negative' => array());
  53. /**
  54. * Indicates whether the first pass query requires complex conditions (LIKE).
  55. *
  56. * @var bool
  57. */
  58. protected $simple = TRUE;
  59. /**
  60. * Conditions that are used for exact searches.
  61. *
  62. * This is always used for the second pass query but not for the first pass,
  63. * unless $this->simple is FALSE.
  64. *
  65. * @var DatabaseCondition
  66. */
  67. protected $conditions;
  68. /**
  69. * Indicates how many matches for a search query are necessary.
  70. *
  71. * @var int
  72. */
  73. protected $matches = 0;
  74. /**
  75. * Array of search words.
  76. *
  77. * These words have to match against {search_index}.word.
  78. *
  79. * @var array
  80. */
  81. protected $words = array();
  82. /**
  83. * Multiplier for the normalized search score.
  84. *
  85. * This value is calculated by the first pass query and multiplied with the
  86. * actual score of a specific word to make sure that the resulting calculated
  87. * score is between 0 and 1.
  88. *
  89. * @var float
  90. */
  91. protected $normalize;
  92. /**
  93. * Indicates whether the first pass query has been executed.
  94. *
  95. * @var bool
  96. */
  97. protected $executedFirstPass = FALSE;
  98. /**
  99. * Stores score expressions.
  100. *
  101. * @var array
  102. *
  103. * @see addScore()
  104. */
  105. protected $scores = array();
  106. /**
  107. * Stores arguments for score expressions.
  108. *
  109. * @var array
  110. */
  111. protected $scoresArguments = array();
  112. /**
  113. * Stores multipliers for score expressions.
  114. *
  115. * @var array
  116. */
  117. protected $multiply = array();
  118. /**
  119. * Whether or not search expressions were ignored.
  120. *
  121. * The maximum number of AND/OR combinations exceeded can be configured to
  122. * avoid Denial-of-Service attacks. Expressions beyond the limit are ignored.
  123. *
  124. * @var bool
  125. */
  126. protected $expressionsIgnored = FALSE;
  127. /**
  128. * Sets up the search query expression.
  129. *
  130. * @param $query
  131. * A search query string, which can contain options.
  132. * @param $type
  133. * The search type. This maps to {search_index}.type in the database.
  134. *
  135. * @return
  136. * The SearchQuery object.
  137. */
  138. public function searchExpression($expression, $type) {
  139. $this->searchExpression = $expression;
  140. $this->type = $type;
  141. return $this;
  142. }
  143. /**
  144. * Parses the search query into SQL conditions.
  145. *
  146. * We build two queries that match the dataset bodies.
  147. */
  148. protected function parseSearchExpression() {
  149. // Matchs words optionally prefixed by a dash. A word in this case is
  150. // something between two spaces, optionally quoted.
  151. preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' . $this->searchExpression , $keywords, PREG_SET_ORDER);
  152. if (count($keywords) == 0) {
  153. return;
  154. }
  155. // Classify tokens.
  156. $or = FALSE;
  157. $warning = '';
  158. $limit_combinations = \Drupal::config('search.settings')->get('and_or_limit');
  159. // The first search expression does not count as AND.
  160. $and_count = -1;
  161. $or_count = 0;
  162. foreach ($keywords as $match) {
  163. if ($or_count && $and_count + $or_count >= $limit_combinations) {
  164. // Ignore all further search expressions to prevent Denial-of-Service
  165. // attacks using a high number of AND/OR combinations.
  166. $this->expressionsIgnored = TRUE;
  167. break;
  168. }
  169. $phrase = FALSE;
  170. // Strip off phrase quotes.
  171. if ($match[2]{0} == '"') {
  172. $match[2] = substr($match[2], 1, -1);
  173. $phrase = TRUE;
  174. $this->simple = FALSE;
  175. }
  176. // Simplify keyword according to indexing rules and external
  177. // preprocessors. Use same process as during search indexing, so it
  178. // will match search index.
  179. $words = search_simplify($match[2]);
  180. // Re-explode in case simplification added more words, except when
  181. // matching a phrase.
  182. $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
  183. // Negative matches.
  184. if ($match[1] == '-') {
  185. $this->keys['negative'] = array_merge($this->keys['negative'], $words);
  186. }
  187. // OR operator: instead of a single keyword, we store an array of all
  188. // OR'd keywords.
  189. elseif ($match[2] == 'OR' && count($this->keys['positive'])) {
  190. $last = array_pop($this->keys['positive']);
  191. // Starting a new OR?
  192. if (!is_array($last)) {
  193. $last = array($last);
  194. }
  195. $this->keys['positive'][] = $last;
  196. $or = TRUE;
  197. $or_count++;
  198. continue;
  199. }
  200. // AND operator: implied, so just ignore it.
  201. elseif ($match[2] == 'AND' || $match[2] == 'and') {
  202. $warning = $match[2];
  203. continue;
  204. }
  205. // Plain keyword.
  206. else {
  207. if ($match[2] == 'or') {
  208. $warning = $match[2];
  209. }
  210. if ($or) {
  211. // Add to last element (which is an array).
  212. $this->keys['positive'][count($this->keys['positive']) - 1] = array_merge($this->keys['positive'][count($this->keys['positive']) - 1], $words);
  213. }
  214. else {
  215. $this->keys['positive'] = array_merge($this->keys['positive'], $words);
  216. $and_count++;
  217. }
  218. }
  219. $or = FALSE;
  220. }
  221. // Convert keywords into SQL statements.
  222. $this->conditions = db_and();
  223. $simple_and = FALSE;
  224. $simple_or = FALSE;
  225. // Positive matches.
  226. foreach ($this->keys['positive'] as $key) {
  227. // Group of ORed terms.
  228. if (is_array($key) && count($key)) {
  229. $simple_or = TRUE;
  230. $any = FALSE;
  231. $queryor = db_or();
  232. foreach ($key as $or) {
  233. list($num_new_scores) = $this->parseWord($or);
  234. $any |= $num_new_scores;
  235. $queryor->condition('d.data', "% $or %", 'LIKE');
  236. }
  237. if (count($queryor)) {
  238. $this->conditions->condition($queryor);
  239. // A group of OR keywords only needs to match once.
  240. $this->matches += ($any > 0);
  241. }
  242. }
  243. // Single ANDed term.
  244. else {
  245. $simple_and = TRUE;
  246. list($num_new_scores, $num_valid_words) = $this->parseWord($key);
  247. $this->conditions->condition('d.data', "% $key %", 'LIKE');
  248. if (!$num_valid_words) {
  249. $this->simple = FALSE;
  250. }
  251. // Each AND keyword needs to match at least once.
  252. $this->matches += $num_new_scores;
  253. }
  254. }
  255. if ($simple_and && $simple_or) {
  256. $this->simple = FALSE;
  257. }
  258. // Negative matches.
  259. foreach ($this->keys['negative'] as $key) {
  260. $this->conditions->condition('d.data', "% $key %", 'NOT LIKE');
  261. $this->simple = FALSE;
  262. }
  263. if ($warning == 'or') {
  264. drupal_set_message(t('Search for either of the two terms with uppercase <strong>OR</strong>. For example, <strong>cats OR dogs</strong>.'));
  265. }
  266. }
  267. /**
  268. * Helper function for parseQuery().
  269. */
  270. protected function parseWord($word) {
  271. $num_new_scores = 0;
  272. $num_valid_words = 0;
  273. // Determine the scorewords of this word/phrase.
  274. $split = explode(' ', $word);
  275. foreach ($split as $s) {
  276. $num = is_numeric($s);
  277. if ($num || drupal_strlen($s) >= \Drupal::config('search.settings')->get('index.minimum_word_size')) {
  278. if (!isset($this->words[$s])) {
  279. $this->words[$s] = $s;
  280. $num_new_scores++;
  281. }
  282. $num_valid_words++;
  283. }
  284. }
  285. // Return matching snippet and number of added words.
  286. return array($num_new_scores, $num_valid_words);
  287. }
  288. /**
  289. * Executes the first pass query.
  290. *
  291. * This can either be done explicitly, so that additional scores and
  292. * conditions can be applied to the second pass query, or implicitly by
  293. * addScore() or execute().
  294. *
  295. * @return
  296. * TRUE if search items exist, FALSE if not.
  297. */
  298. public function executeFirstPass() {
  299. $this->parseSearchExpression();
  300. if (count($this->words) == 0) {
  301. form_set_error('keys', $form_state, format_plural(\Drupal::config('search.settings')->get('index.minimum_word_size'), 'You must include at least one positive keyword with 1 character or more.', 'You must include at least one positive keyword with @count characters or more.'));
  302. return FALSE;
  303. }
  304. if ($this->expressionsIgnored) {
  305. drupal_set_message(t('Your search used too many AND/OR expressions. Only the first @count terms were included in this search.', array('@count' => \Drupal::config('search.settings')->get('and_or_limit'))), 'warning');
  306. }
  307. $this->executedFirstPass = TRUE;
  308. if (!empty($this->words)) {
  309. $or = db_or();
  310. foreach ($this->words as $word) {
  311. $or->condition('i.word', $word);
  312. }
  313. $this->condition($or);
  314. }
  315. // Build query for keyword normalization.
  316. $this->join('search_total', 't', 'i.word = t.word');
  317. $this
  318. ->condition('i.type', $this->type)
  319. ->groupBy('i.type')
  320. ->groupBy('i.sid')
  321. ->having('COUNT(*) >= :matches', array(':matches' => $this->matches));
  322. // Clone the query object to do the firstPass query;
  323. $first = clone $this->query;
  324. // For complex search queries, add the LIKE conditions to the first pass query.
  325. if (!$this->simple) {
  326. $first->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
  327. $first->condition($this->conditions);
  328. }
  329. // Calculate maximum keyword relevance, to normalize it.
  330. $first->addExpression('SUM(i.score * t.count)', 'calculated_score');
  331. $this->normalize = $first
  332. ->range(0, 1)
  333. ->orderBy('calculated_score', 'DESC')
  334. ->execute()
  335. ->fetchField();
  336. if ($this->normalize) {
  337. return TRUE;
  338. }
  339. return FALSE;
  340. }
  341. /**
  342. * Adds a custom score expression to the search query.
  343. *
  344. * Score expressions are used to order search results. If no calls to
  345. * addScore() have taken place, a default keyword relevance score will be
  346. * used. However, if at least one call to addScore() has taken place, the
  347. * keyword relevance score is not automatically added.
  348. *
  349. * Also note that if you call orderBy() directly on the query, search scores
  350. * will not automatically be used to order search results. Your orderBy()
  351. * expression can reference 'calculated_score', which will be the total
  352. * calculated score value.
  353. *
  354. * @param $score
  355. * The score expression, which should evaluate to a number between 0 and 1.
  356. * The string 'i.relevance' in a score expression will be replaced by a
  357. * measure of keyword relevance between 0 and 1.
  358. * @param $arguments
  359. * Query arguments needed to provide values to the score expression.
  360. * @param $multiply
  361. * If set, the score is multiplied with this value. However, all scores
  362. * with multipliers are then divided by the total of all multipliers, so
  363. * that overall, the normalization is maintained.
  364. *
  365. * @return object
  366. * The updated query object.
  367. */
  368. public function addScore($score, $arguments = array(), $multiply = FALSE) {
  369. if ($multiply) {
  370. $i = count($this->multiply);
  371. // Modify the score expression so it is multiplied by the multiplier,
  372. // with a divisor to renormalize.
  373. $score = "CAST(:multiply_$i AS DECIMAL) * COALESCE(( " . $score . "), 0) / CAST(:total_$i AS DECIMAL)";
  374. // Add an argument for the multiplier. The :total_$i argument is taken
  375. // care of in the execute() method, which is when the total divisor is
  376. // calculated.
  377. $arguments[':multiply_' . $i] = $multiply;
  378. $this->multiply[] = $multiply;
  379. }
  380. $this->scores[] = $score;
  381. $this->scoresArguments += $arguments;
  382. return $this;
  383. }
  384. /**
  385. * Executes the search.
  386. *
  387. * If not already done, this executes the first pass query. Then the complex
  388. * conditions are applied to the query including score expressions and
  389. * ordering.
  390. *
  391. * @return
  392. * FALSE if the first pass query returned no results, and a database result
  393. * set if there were results.
  394. */
  395. public function execute()
  396. {
  397. if (!$this->executedFirstPass) {
  398. $this->executeFirstPass();
  399. }
  400. if (!$this->normalize) {
  401. return new StatementEmpty();
  402. }
  403. // Add conditions to query.
  404. $this->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
  405. $this->condition($this->conditions);
  406. if (empty($this->scores)) {
  407. // Add default score.
  408. $this->addScore('i.relevance');
  409. }
  410. if (count($this->multiply)) {
  411. // Re-normalize scores with multipliers by dividing by the total of all
  412. // multipliers. The expressions were altered in addScore(), so here just
  413. // add the arguments for the total.
  414. $sum = array_sum($this->multiply);
  415. for ($i = 0; $i < count($this->multiply); $i++) {
  416. $this->scoresArguments[':total_' . $i] = $sum;
  417. }
  418. }
  419. // Replace the pseudo-expression 'i.relevance' with a measure of keyword
  420. // relevance in all score expressions, using string replacement. Careful
  421. // though! If you just print out a float, some locales use ',' as the
  422. // decimal separator in PHP, while SQL always uses '.'. So, make sure to
  423. // set the number format correctly.
  424. $relevance = number_format((1.0 / $this->normalize), 10, '.', '');
  425. $this->scores = str_replace('i.relevance', '(' . $relevance . ' * i.score * t.count)', $this->scores);
  426. // Add all scores together to form a query field.
  427. $this->addExpression('SUM(' . implode(' + ', $this->scores) . ')', 'calculated_score', $this->scoresArguments);
  428. // If an order has not yet been set for this query, add a default order
  429. // that sorts by the calculated sum of scores.
  430. if (count($this->getOrderBy()) == 0) {
  431. $this->orderBy('calculated_score', 'DESC');
  432. }
  433. // Add tag and useful metadata.
  434. $this
  435. ->addTag('search_' . $this->type)
  436. ->addMetaData('normalize', $this->normalize)
  437. ->fields('i', array('type', 'sid'));
  438. return $this->query->execute();
  439. }
  440. /**
  441. * Builds the default count query for SearchQuery.
  442. *
  443. * Since SearchQuery always uses GROUP BY, we can default to a subquery. We
  444. * also add the same conditions as execute() because countQuery() is called
  445. * first.
  446. */
  447. public function countQuery() {
  448. // Clone the inner query.
  449. $inner = clone $this->query;
  450. // Add conditions to query.
  451. $inner->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type');
  452. $inner->condition($this->conditions);
  453. // Remove existing fields and expressions, they are not needed for a count
  454. // query.
  455. $fields =& $inner->getFields();
  456. $fields = array();
  457. $expressions =& $inner->getExpressions();
  458. $expressions = array();
  459. // Add the sid as the only field and count them as a subquery.
  460. $count = db_select($inner->fields('i', array('sid')), NULL, array('target' => 'slave'));
  461. // Add the COUNT() expression.
  462. $count->addExpression('COUNT(*)');
  463. return $count;
  464. }
  465. }