PageRenderTime 162ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/services/Search/Services_Search_QueryParser.php

https://github.com/elstenaar86/spotweb
PHP | 986 lines | 505 code | 106 blank | 375 comment | 121 complexity | 965240588f0d0e8f3291645c23172ee6 MD5 | raw file
Possible License(s): BSD-3-Clause, GPL-2.0, MPL-2.0-no-copyleft-exception, 0BSD, Apache-2.0
  1. <?php
  2. class Services_Search_QueryParser {
  3. /*
  4. * We need a databse class to be able to properly escape
  5. * data in queries
  6. */
  7. private $_dbEng = null;
  8. /*
  9. * constructor
  10. */
  11. function __construct(dbeng_abs $db) {
  12. $this->_dbEng = $db;
  13. } # ctor
  14. /*
  15. * When passed an array with categories, this array is expanded \
  16. * to contain the fully qualified categories and subcategories.
  17. */
  18. public function prepareCategorySelection($dynaList) {
  19. SpotTiming::start(__FUNCTION__);
  20. $strongNotList = array();
  21. $categoryList = array();
  22. /*
  23. * The Dynatree jquery widget which we use, retrieves its data from ?page=catsjson,
  24. * for each node in the dynatree we provide a key. The tree could be seen as follows,
  25. * with the unique key within parenthesis.
  26. *
  27. * - Image (cat0)
  28. * +-- Film (cat0_z0)
  29. * +--- Format (cat0_z0_a)
  30. * +----- DivX (cat0_z0_a0)
  31. * +----- WMV (cat0_z0_a1)
  32. * +-- Series (cat0_z1)
  33. * +--- Format (cat0_z1_a)
  34. *+----- DivX (cat0_z1_a0)
  35. * +----- WMV (cat0_z1_a1)
  36. * +--- Source (cat0_z1_b)
  37. * - Applications (cat3)
  38. * +-- Format (cat1_zz_a / cat1_a)
  39. *
  40. * Basially, you have a headcategory number, then you have a categorytype, then a subcategorytype (a,b,c,d, ...)
  41. * then the subcategorynumber follows.
  42. *
  43. * When you want to select, in above example, a Film in DivX, the keyvalue is simply cat0_z0_a0.
  44. * However, when you want to select the whole of 'Image', keyvalue 'cat0' would suffice.
  45. *
  46. * If users would select categories manually (for example a manually constructed search), it would
  47. * be more convienent for them to be able to provide shorthands, allowing one to select common category
  48. * groups more easily. Spotweb wil expand those category selection items to contain the full selection.
  49. *
  50. * The following shorthands are allowed:
  51. *
  52. * cat0 - Will be expanded to all subcategoies of category 0
  53. * cat0_z0_a - Will be expanded to subcategory A of category 0, but the type must be z0
  54. * !cat0_z0_a1 - Will remove cat0_z0_a1 from the list (order in the list is important)
  55. * ~cat0_z0_a1 - 'Forbids' cat0_z0_a1 to be in the list (a NOT will be applied to it)
  56. * cat0_a - Select everything from subcategory A in category 0 (all z-types)
  57. *
  58. */
  59. $newTreeQuery = '';
  60. /*
  61. * Process each item in the list, and expand it where necessary
  62. */
  63. $dynaListCount = count($dynaList);
  64. for($i = 0; $i < $dynaListCount; $i++) {
  65. /*
  66. * The given category can be one of the following four types:
  67. * cat1_z0_a ==> Everything of cat1, type z0, and then everything of subcategory a
  68. * cat1_z0 ==> Everything of cat1, type z0
  69. * cat1_a ==> Everything of cat1 which is of 'subcategory a'
  70. * cat1 ==> Select the whole of cat1
  71. *
  72. */
  73. if ((strlen($dynaList[$i]) > 0) && ($dynaList[$i][0] == 'c')) {
  74. $hCat = (int) substr($dynaList[$i], 3, 1);
  75. # Was a type + global subcategory selected? (cat1_z0_a)
  76. if (strlen($dynaList[$i]) == 9) {
  77. $typeSelected = substr($dynaList[$i], 5, 2);
  78. $subCatSelected = substr($dynaList[$i], 8);
  79. # Was only as category selected (cat1)
  80. } elseif (strlen($dynaList[$i]) == 4) {
  81. $typeSelected = '*';
  82. $subCatSelected = '*';
  83. # Was a category and type selected (cat1_z0)
  84. } elseif ((strlen($dynaList[$i]) == 7) && ($dynaList[$i][5] === 'z')) {
  85. $typeSelected = substr($dynaList[$i], 5, 2);
  86. $subCatSelected = '*';
  87. # Was a category and subcateory specified, old stype? (cat1_a3)
  88. } elseif (((strlen($dynaList[$i]) == 7) || (strlen($dynaList[$i]) == 8)) && ($dynaList[$i][5] !== 'z')) {
  89. # Convert the old style to explicit categories (cat1_z0_a3, cat1_z1_a3, cat1_z2_a3, ... )
  90. foreach(SpotCategories::$_categories[$hCat]['z'] as $typeKey => $typeValue) {
  91. $newTreeQuery .= "," . substr($dynaList[$i], 0, 4) . '_z' . $typeKey . '_' . substr($dynaList[$i], 5);
  92. } # foreach
  93. $typeSelected = '';
  94. $subCatSelected = '';
  95. # was a subcategory specified? (cat1_a)
  96. } elseif (strlen($dynaList[$i]) == 6) {
  97. $typeSelected = '*';
  98. $subCatSelected = substr($dynaList[$i], 5, 1);
  99. } else {
  100. $newTreeQuery .= "," . $dynaList[$i];
  101. $typeSelected = '';
  102. $subCatSelected = '';
  103. } # else
  104. /*
  105. * Createa a string containing all subcategories.
  106. *
  107. * We always loop through all subcategories so we can reuse this bit of code
  108. * both for complete category selection as subcategory selection.
  109. */
  110. $tmpStr = '';
  111. foreach(SpotCategories::$_categories[$hCat] as $subCat => $subcatValues) {
  112. /*
  113. * There are four possible cases:
  114. *
  115. * $subcatSelected contains an empty string, it matches to nothing.
  116. * $subcatSelected contains an asterisk, it matches all subcategories.
  117. * $typeSelected contains an empty string, it matches nothing.
  118. * $typeSelected contains an asterisk, it matches all types.
  119. */
  120. if ($subCatSelected == '*') {
  121. foreach(SpotCategories::$_categories[$hCat]['z'] as $typeKey => $typeValue) {
  122. $typeKey = 'z' . $typeKey;
  123. if (($typeKey == $typeSelected) || ($typeSelected == '*')) {
  124. $tmpStr .= ',sub' . $hCat . '_' . $typeKey;
  125. } # if
  126. } # foreach
  127. } elseif (($subCat == $subCatSelected) && ($subCat !== 'z')) {
  128. foreach(SpotCategories::$_categories[$hCat]['z'] as $typeKey => $typeValue) {
  129. $typeKey = 'z' . $typeKey;
  130. if (($typeKey == $typeSelected) || ($typeSelected == '*')) {
  131. foreach(SpotCategories::$_categories[$hCat][$subCat] as $x => $y) {
  132. if (in_array($typeKey, $y[2])) {
  133. $tmpStr .= ",cat" . $hCat . "_" . $typeKey . '_' . $subCat . $x;
  134. } # if
  135. } # foreach
  136. } # if
  137. } # foreach
  138. } # if
  139. } # foreach
  140. $newTreeQuery .= $tmpStr;
  141. } elseif (substr($dynaList[$i], 0, 1) == '!') {
  142. # For a not, we just remove / exclude it from the list.
  143. $newTreeQuery = str_replace(',' . substr($dynaList[$i], 1), "", $newTreeQuery);
  144. } elseif (substr($dynaList[$i], 0, 1) == '~') {
  145. /*
  146. * For a STRONG NOT, we cannot remove it from the list because want to explicitly
  147. * remove those results from the query and we have to pass it in other URL's and the
  148. * likes
  149. */
  150. $newTreeQuery .= "," . $dynaList[$i];
  151. # and add it to the strongNotList array for usage later on
  152. $strongNotTmp = explode("_", $dynaList[$i], 2);
  153. /* To deny a whole category, we have to take an other shortcut */
  154. if (count($strongNotTmp) == 1) {
  155. $strongNotList[(int) substr($strongNotTmp[0], 4)][] = '';
  156. } else {
  157. $strongNotList[(int) substr($strongNotTmp[0], 4)][] = $strongNotTmp[1];
  158. } # else
  159. } else {
  160. $newTreeQuery .= "," . $dynaList[$i];
  161. } # else
  162. } # for
  163. if ((!empty($newTreeQuery)) && ($newTreeQuery[0] == ",")) {
  164. $newTreeQuery = substr($newTreeQuery, 1);
  165. } # if
  166. /*
  167. *
  168. * Starting from here, we have a prepared list - meaning, a list with all
  169. * categories fully expanded.
  170. *
  171. * We now translate this list to an nested list of elements which is easier
  172. * to convert to SQL. The format of the array is fairly typical:
  173. *
  174. * list['cat']
  175. * [cat] -> Head category, eg: 0 for Images
  176. * [type] -> Type, eg: 0 for z0
  177. * [subcattype] -> Subcategory type, eg: a
  178. * = value -> eg 1 for in total cat0_z0_a1
  179. */
  180. $dynaList = explode(',', $newTreeQuery);
  181. foreach($dynaList as $val) {
  182. if (substr($val, 0, 3) == 'cat') {
  183. # 0 element is headcategory
  184. # 1st element is type
  185. # 2ndelement is category
  186. $val = explode('_', (substr($val, 3) . '_'));
  187. $catVal = $val[0];
  188. $typeVal = $val[1];
  189. $subCatIdx = substr($val[2], 0, 1);
  190. $subCatVal = substr($val[2], 1);
  191. if (count($val) >= 4) {
  192. $categoryList['cat'][$catVal][$typeVal][$subCatIdx][] = $subCatVal;
  193. } # if
  194. } elseif (substr($val, 0, 3) == 'sub') {
  195. # 0 element is headcategory
  196. # 1st element is type
  197. $val = explode('_', (substr($val, 3) . '_'));
  198. $catVal = $val[0];
  199. $typeVal = $val[1];
  200. # Create the z-category in the categorylist
  201. if (count($val) == 3) {
  202. if (!isset($categoryList['cat'][$catVal][$typeVal])) {
  203. $categoryList['cat'][$catVal][$typeVal] = array();
  204. } # if
  205. } # if
  206. } # elseif
  207. } # foreach
  208. SpotTiming::stop(__FUNCTION__, array($categoryList, $strongNotList));
  209. return array($categoryList, $strongNotList);
  210. } # prepareCategorySelection
  211. /*
  212. * Converts a list of categories to an SQL filter
  213. */
  214. private function categoryListToSql($categoryList) {
  215. SpotTiming::start(__FUNCTION__);
  216. $categorySql = array();
  217. # Make sure we were passed a valid filter
  218. if ((!isset($categoryList['cat'])) || (!is_array($categoryList['cat']))) {
  219. return $categorySql;
  220. } # if
  221. /*
  222. * We have to translate the list of sub- and headcategories to an SQL WHERE statement in
  223. * multiple steps, where the 'category' is the basis for our filter.
  224. *
  225. * A testste for filters could be the following:
  226. * cat0_z0_a9,cat0_z1_a9,cat0_z3_a9, ==> HD beeld
  227. * cat0_z0_a9,cat0_z0_b3,cat0_z0_c1,cat0_z0_c2,cat0_z0_c6,cat0_z0_c11,~cat0_z1,~cat0_z2,~cat0_z3 ==> Nederlands ondertitelde films
  228. * cat0_a9 ==> Alles in x264HD
  229. * cat1_z0,cat1_z1,cat1_z2,cat1_z3 ==> Alle muziek, maar soms heeft muziek geen genre ingevuld!
  230. *
  231. * The category list structure is:
  232. *
  233. * array(1) {
  234. * ["cat"]=>
  235. * array(1) {
  236. * [1]=> <== Headcategory number (cat1)
  237. * array(4) {
  238. * ["z0"]=> <== Type (subcatz) number (cat1_z0)
  239. * array(4) {
  240. * ["a"]=> <== Subcategorylist (cat1_z0_a)
  241. * array(9) {
  242. * [0]=>
  243. * string(1) "0" <== Selected subcategory (so: cat1_z0_a0)
  244. * }
  245. * ["b"]=>
  246. * array(7) {
  247. * [0]=>
  248. * string(1) "0"
  249. *
  250. */
  251. foreach($categoryList['cat'] as $catid => $cat) {
  252. /*
  253. * Each category we have, we try to procss all subcategories
  254. * and convert it to a filter
  255. */
  256. if ((is_array($cat)) && (!empty($cat))) {
  257. foreach($cat as $type => $typeValues) {
  258. $catid = (int) $catid;
  259. $tmpStr = "((s.category = " . (int) $catid . ")";
  260. # dont filter the zz types (games/apps)
  261. if ($type[1] !== 'z') {
  262. $tmpStr .= " AND (s.subcatz = '" . $type . "|')";
  263. } # if
  264. $subcatItems = array();
  265. foreach($typeValues as $subcat => $subcatItem) {
  266. $subcatValues = array();
  267. foreach($subcatItem as $subcatValue) {
  268. /*
  269. * A spot can only contain one 'A' and 'Z' subcategory value, so we
  270. * can perform an equality filter instead of a LIKE
  271. */
  272. if ($subcat == 'a') {
  273. $subcatValues[] = "(s.subcata = '" . $subcat . $subcatValue . "|') ";
  274. } elseif (in_array($subcat, array('b', 'c', 'd'))) {
  275. $subcatValues[] = "(s.subcat" . $subcat . " LIKE '%" . $subcat . $subcatValue . "|%') ";
  276. } # if
  277. } # foreach
  278. /*
  279. *
  280. * We add all subactegories within the same subcategory together (for example all
  281. * formats of a movie) with an OR. This means you can pick between DivX and WMV as
  282. * a format
  283. *
  284. */
  285. if (count($subcatValues) > 0) {
  286. $subcatItems[] = " (" . join(" OR ", $subcatValues) . ") ";
  287. } # if
  288. } # foreach subcat
  289. /*
  290. * After this, same headcategory and type (Image + Movie, Sound) filters for
  291. * subcategories are merged together with an AND.
  292. *
  293. * This results in a filter like:
  294. *
  295. * (((category = 0) AND ( ((subcata = 'a0|') ) AND ((subcatd LIKE '%d0|%')
  296. *
  297. * This makes sure you are able to pick multiple genres within the same category/subcategory,
  298. * but you will not get unpredictable results by getting an 'Action' game for Linux when you
  299. * accidentally asked for either 'Action' or 'Romance'.
  300. */
  301. if (count($subcatItems) > 0) {
  302. $tmpStr .= " AND (" . join(" AND ", $subcatItems) . ") ";
  303. } # if
  304. # Finish of the query
  305. $tmpStr .= ")";
  306. $categorySql[] = $tmpStr;
  307. } # foreach type
  308. } # if
  309. } # foreach
  310. SpotTiming::stop(__FUNCTION__, array($categorySql));
  311. return $categorySql;
  312. } # categoryListToSql
  313. /*
  314. * Converts a list of "strong nots" to the corresponding
  315. * SQL statements
  316. */
  317. private function strongNotListToSql($strongNotList) {
  318. SpotTiming::start(__FUNCTION__);
  319. $strongNotSql = array();
  320. if (empty($strongNotList)) {
  321. return array();
  322. } # if
  323. /*
  324. * Each STRONG NOT is to be converted individually to a NOT
  325. * SQL WHERE filter
  326. */
  327. foreach(array_keys($strongNotList) as $strongNotCat) {
  328. foreach($strongNotList[$strongNotCat] as $strongNotSubcat) {
  329. /*
  330. * When the strongnot is for a whole category (eg: cat0), we can
  331. * make the NOT even simpler
  332. */
  333. if (empty($strongNotSubcat)) {
  334. $strongNotSql[] = "(NOT (s.Category = " . (int) $strongNotCat . "))";
  335. } else {
  336. $subcats = explode('_', $strongNotSubcat);
  337. /*
  338. * A spot can only contain one 'A' and 'Z' subcategory value, so we
  339. * can perform an equality filter instead of a LIKE
  340. */
  341. if (count($subcats) == 1) {
  342. if (in_array($subcats[0][0], array('a', 'z'))) {
  343. $strongNotSql[] = "(NOT ((s.Category = " . (int) $strongNotCat . ") AND (s.subcat" . $subcats[0][0] . " = '" . $this->_dbEng->safe($subcats[0]) . "|')))";
  344. } elseif (in_array($subcats[0][0], array('b', 'c', 'd'))) {
  345. $strongNotSql[] = "(NOT ((s.Category = " . (int) $strongNotCat . ") AND (s.subcat" . $subcats[0][0] . " LIKE '%" . $this->_dbEng->safe($subcats[0]) . "|%')))";
  346. } # if
  347. } elseif (count($subcats) == 2) {
  348. if (in_array($subcats[1][0], array('a', 'z'))) {
  349. $strongNotSql[] = "(NOT ((s.Category = " . (int) $strongNotCat . ") AND (s.subcatz = '" . $subcats[0] . "|') AND (subcat" . $subcats[1][0] . " = '" . $this->_dbEng->safe($subcats[1]) . "|')))";
  350. } elseif (in_array($subcats[1][0], array('b', 'c', 'd'))) {
  351. $strongNotSql[] = "(NOT ((s.Category = " . (int) $strongNotCat . ") AND (s.subcatz = '" . $subcats[0] . "|') AND (subcat" . $subcats[1][0] . " LIKE '%" . $this->_dbEng->safe($subcats[1]) . "|%')))";
  352. } # if
  353. } # else
  354. } # else not whole subcat
  355. } # foreach
  356. } # forEach
  357. SpotTiming::stop(__FUNCTION__, array($strongNotSql));
  358. return $strongNotSql;
  359. } # strongNotListToSql
  360. /*
  361. * Prepareert de filter values naar een altijd juist formaat
  362. */
  363. private function prepareFilterValues($search) {
  364. SpotTiming::start(__FUNCTION__);
  365. $filterValueList = array();
  366. /*
  367. * We have drie kinds of filters:
  368. * - Old type where you have a search[type] with the values stamp,title,tag and an search[text]
  369. * containing the value to search for. This limits you to a maximum of one filter which is not
  370. * sufficient.
  371. *
  372. * We automatically convert these kind of searches to the new type.
  373. *
  374. *
  375. * - New type where there is a search[value] array, which contain values in the following shape:
  376. * type:operator:value.
  377. * For example, tag:=:spotweb. A shorthand is also available when the operator is left out (eg: tag:spotweb),
  378. * we assume the EQ operator was intended.
  379. *
  380. * - Special kind of lists, there are a few values with a special meaning:
  381. * New:0 (new spots)
  382. * Downloaded:0 (spots which are downloaded by this account)
  383. * Watch:0 (spots on the watchlist of this account)
  384. * Seen:0 (spots which have already been opened by this account)
  385. * MyPostedSpots:0 (spots posted by this account)
  386. * WhitelistedSpotters:0 (spots posted by a whitelisted spotter)
  387. *
  388. */
  389. if (isset($search['type'])) {
  390. if (!isset($search['text'])) {
  391. $search['text'] = '';
  392. } # if
  393. /*
  394. * We can be provided a set of old and new filters, we don't want to
  395. * overwrite the regular filters, so we take care to append to them
  396. */
  397. if ((!isset($search['value'])) || (!is_array($search['value']))) {
  398. $search['value'] = array();
  399. } # if
  400. $search['value'][] = $search['type'] . ':=:' . $search['text'];
  401. unset($search['type']);
  402. } # if
  403. # Make sure that we always have something to iterate through
  404. if ((!isset($search['value'])) || (!is_array($search['value']))) {
  405. $search['value'] = array();
  406. } # if
  407. # Now we transform the new query (field:operator:value pair) to an exploded array for easier iteration
  408. foreach($search['value'] as $value) {
  409. if (!empty($value)) {
  410. $tmpFilter = explode(':', $value);
  411. # Default to an '=' operator when none is given
  412. if (count($tmpFilter) < 3) {
  413. $tmpFilter = array($tmpFilter[0],
  414. '=',
  415. $tmpFilter[1]);
  416. } # if
  417. /*
  418. * Create the actual filter, we add the array_slice part to
  419. * allow for an ':' in the actual search value.
  420. */
  421. $filterValueTemp = Array('fieldname' => $tmpFilter[0],
  422. 'operator' => $tmpFilter[1],
  423. 'value' => join(":", array_slice($tmpFilter, 2)));
  424. /*
  425. * and create the actual filter list. Before appending it,
  426. * we want to make sure no identical filter is already
  427. * in the list, because this might make MySQL very slow.
  428. */
  429. if (!in_array($filterValueTemp, $filterValueList)) {
  430. $filterValueList[] = $filterValueTemp;
  431. } # if
  432. } # if
  433. } # for
  434. SpotTiming::stop(__FUNCTION__, array($filterValueList));
  435. return $filterValueList;
  436. } # prepareFilterValues
  437. /*
  438. * Converts one or multiple userprovided txt filters to SQL statements
  439. */
  440. private function filterValuesToSql($filterValueList, $currentSession) {
  441. SpotTiming::start(__FUNCTION__);
  442. # Add a list of possible text searches
  443. $filterValueSql = array('OR' => array(), 'AND' => array());
  444. $additionalFields = array();
  445. $additionalTables = array();
  446. $additionalJoins = array();
  447. $sortFields = array();
  448. $textSearchFields = array();
  449. # Lookp table from 'friendly' name to fully qualified one
  450. $filterFieldMapping = array('filesize' => 's.filesize',
  451. 'date' => 's.stamp',
  452. 'stamp' => 's.stamp',
  453. 'userid' => 's.spotterid',
  454. 'spotterid' => 's.spotterid',
  455. 'moderated' => 's.moderated',
  456. 'poster' => 's.poster',
  457. 'titel' => 's.title',
  458. 'title' => 's.title',
  459. 'tag' => 's.tag',
  460. 'new' => 'new',
  461. 'reportcount' => 's.reportcount',
  462. 'commentcount' => 's.commentcount',
  463. 'downloaded' => 'downloaded',
  464. 'mypostedspots' => 'mypostedspots',
  465. 'whitelistedspotters' => 'whitelistedspotters',
  466. 'watch' => 'watch',
  467. 'seen' => 'seen');
  468. foreach($filterValueList as $filterRecord) {
  469. $tmpFilterFieldname = strtolower($filterRecord['fieldname']);
  470. $tmpFilterOperator = $filterRecord['operator'];
  471. $tmpFilterValue = $filterRecord['value'];
  472. # When no match for friendly name -> column name is found, ignore the search
  473. if (!isset($filterFieldMapping[$tmpFilterFieldname])) {
  474. break;
  475. } # if
  476. # make sure the operators are valid
  477. if (!in_array($tmpFilterOperator, array('>', '<', '>=', '<=', '=', '!='))) {
  478. break;
  479. } # if
  480. /*
  481. * Ignore empty searches. We cannot use the empty() operator,
  482. * because empty(0) evaluates to true but is an valid
  483. * value to search for
  484. */
  485. if (strlen($tmpFilterValue) == 0) {
  486. continue;
  487. } # if
  488. /*
  489. * When the search is pure textsearch, it might be able to be optimized
  490. * by utilizing the fulltext search (engine). If so, we take this path
  491. * to gain the most performance.
  492. */
  493. if (in_array($tmpFilterFieldname, array('tag', 'poster', 'titel'))) {
  494. /*
  495. * Some databases (sqlite for example), want to have all their fulltext
  496. * searches available in one SQL function call.
  497. *
  498. * To be able to do this, we append all fulltext searches for now, so we
  499. * can create the actual fulltext search later on.
  500. */
  501. if (!isset($textSearchFields[$filterFieldMapping[$tmpFilterFieldname]])) {
  502. $textSearchFields[$filterFieldMapping[$tmpFilterFieldname]] = array();
  503. } # if
  504. $textSearchFields[$filterFieldMapping[$tmpFilterFieldname]][] = array('fieldname' => $filterFieldMapping[$tmpFilterFieldname], 'value' => $tmpFilterValue);
  505. } elseif (in_array($tmpFilterFieldname, array('new', 'downloaded', 'watch', 'seen', 'mypostedspots', 'whitelistedspotters'))) {
  506. /*
  507. * Some fieldnames are mere dummy fields which map to actual
  508. * functionality. Those dummiefields are processed below
  509. */
  510. switch($tmpFilterFieldname) {
  511. case 'new' : {
  512. $tmpFilterValue = ' ((s.stamp > ' . (int) $this->_dbEng->safe($currentSession['user']['lastread']) . ')';
  513. $tmpFilterValue .= ' AND (l.seen IS NULL))';
  514. break;
  515. } # case 'new'
  516. case 'whitelistedspotters' : {
  517. $tmpFilterValue = ' (wl.spotterid IS NOT NULL)';
  518. break;
  519. } # case 'whitelistedspotters'
  520. case 'mypostedspots' : {
  521. $additionalFields[] = '1 AS mypostedspot';
  522. $additionalJoins[] = array('tablename' => 'spotsposted',
  523. 'tablealias' => 'spost',
  524. 'jointype' => 'LEFT',
  525. 'joincondition' => 'spost.messageid = s.messageid');
  526. $tmpFilterValue = ' (spost.ouruserid = ' . (int) $this->_dbEng->safe($currentSession['user']['userid']) . ') ';
  527. $sortFields[] = array('field' => 'spost.stamp',
  528. 'direction' => 'DESC',
  529. 'autoadded' => true,
  530. 'friendlyname' => null);
  531. break;
  532. } # case 'mypostedspots'
  533. case 'downloaded' : {
  534. $tmpFilterValue = ' (l.download IS NOT NULL)';
  535. $sortFields[] = array('field' => 'downloadstamp',
  536. 'direction' => 'DESC',
  537. 'autoadded' => true,
  538. 'friendlyname' => null);
  539. break;
  540. } # case 'downloaded'
  541. case 'watch' : {
  542. $additionalFields[] = '1 AS mywatchedspot';
  543. $tmpFilterValue = ' (l.watch IS NOT NULL)';
  544. $sortFields[] = array('field' => 'watchstamp',
  545. 'direction' => 'DESC',
  546. 'autoadded' => true,
  547. 'friendlyname' => null);
  548. break;
  549. } # case 'watch'
  550. case 'seen' : {
  551. $additionalFields[] = '1 AS myseenspot';
  552. $tmpFilterValue = ' (l.seen IS NOT NULL)';
  553. $sortFields[] = array('field' => 'seenstamp',
  554. 'direction' => 'DESC',
  555. 'autoadded' => true,
  556. 'friendlyname' => null);
  557. break;
  558. } # case 'seen'
  559. } # switch
  560. # append the created query string to be an AND filter
  561. $filterValueSql['AND'][] = $tmpFilterValue;
  562. } else {
  563. /*
  564. * No FTS, no dummyfield, it must be some sort of comparison then.
  565. *
  566. * First we want to extract the field we are filtering on.
  567. */
  568. if ($tmpFilterFieldname == 'date') {
  569. $tmpFilterValue = date("U", strtotime($tmpFilterValue));
  570. } elseif ($tmpFilterFieldname == 'stamp') {
  571. $tmpFilterValue = (int) $tmpFilterValue;
  572. } elseif (($tmpFilterFieldname == 'filesize') && (is_numeric($tmpFilterValue) === false)) {
  573. # Explicitly cast to float to workaroun a rounding bug in PHP on x86
  574. $val = (float) trim(substr($tmpFilterValue, 0, -1));
  575. $last = strtolower($tmpFilterValue[strlen($tmpFilterValue) - 1]);
  576. switch($last) {
  577. case 'g': $val *= (float) 1024;
  578. case 'm': $val *= (float) 1024;
  579. case 'k': $val *= (float) 1024;
  580. } # switch
  581. $tmpFilterValue = round($val, 0);
  582. } # if
  583. /*
  584. * add quotes around it when not numeric. We cannot blankly always add quotes
  585. * as postgresql doesn't like that of course
  586. */
  587. if (!is_numeric($tmpFilterValue)) {
  588. $tmpFilterValue = "'" . $this->_dbEng->safe($tmpFilterValue) . "'";
  589. } else {
  590. $tmpFilterValue = $this->_dbEng->safe($tmpFilterValue);
  591. } # if
  592. # depending on the type of search, we either add the filter as an AND or an OR
  593. if (in_array($tmpFilterFieldname, array('spotterid', 'userid'))) {
  594. $filterValueSql['OR'][] = ' (' . $filterFieldMapping[$tmpFilterFieldname] . ' ' . $tmpFilterOperator . ' ' . $tmpFilterValue . ') ';
  595. } else {
  596. $filterValueSql['AND'][] = ' (' . $filterFieldMapping[$tmpFilterFieldname] . ' ' . $tmpFilterOperator . ' ' . $tmpFilterValue . ') ';
  597. } # else
  598. } # if
  599. } # foreach
  600. /*
  601. * When all filters are processed, we want to check wether we actually
  602. * have to process any of the $textSearchFields for which we could run
  603. * the db specific FTS engine.
  604. *
  605. * If so, ask the FTS engin to process the query.
  606. */
  607. if (!empty($textSearchFields)) {
  608. /*
  609. * We group searches per search type, but this means
  610. * we need to pass several the total amount of added
  611. * fields to the FTS so it can create unique relevancy
  612. * field names.
  613. *
  614. * For example: search for poster AA and Title BB
  615. */
  616. foreach($textSearchFields as $searchField => $searches) {
  617. $ftsEng = dbfts_abs::Factory($this->_dbEng);
  618. $parsedTextQueryResult = $ftsEng->createTextQuery($searches, $additionalFields);
  619. if (in_array($searches[0]['fieldname'], array('s.poster', 's.tag', 's.title'))) {
  620. $filterValueSql['AND'][] = ' (' . implode(' OR ', $parsedTextQueryResult['filterValueSql']) . ') ';
  621. } else {
  622. $filterValueSql['AND'][] = ' (' . implode(' AND ', $parsedTextQueryResult['filterValueSql']) . ') ';
  623. } # if
  624. $additionalTables = array_merge($additionalTables, $parsedTextQueryResult['additionalTables']);
  625. $additionalFields = array_merge($additionalFields, $parsedTextQueryResult['additionalFields']);
  626. $sortFields = array_merge($sortFields, $parsedTextQueryResult['sortFields']);
  627. } # foreach
  628. } # if
  629. SpotTiming::stop(__FUNCTION__, array($filterValueSql, $additionalFields, $additionalTables, $additionalJoins, $sortFields));
  630. return array($filterValueSql, $additionalFields, $additionalTables, $additionalJoins, $sortFields);
  631. } # filterValuesToSql
  632. /*
  633. * Converts the sorting as asked to an intermediate format ready for processing
  634. */
  635. private function prepareSortFields($sort, $sortFields) {
  636. SpotTiming::start(__FUNCTION__);
  637. $VALID_SORT_FIELDS = array('category' => 1,
  638. 'poster' => 1,
  639. 'title' => 1,
  640. 'filesize' => 1,
  641. 'stamp' => 1,
  642. 'subcata' => 1,
  643. 'spotrating' => 1,
  644. 'commentcount' => 1);
  645. if ((!isset($sort['field'])) || (!isset($VALID_SORT_FIELDS[$sort['field']]))) {
  646. /*
  647. * Add an extra sort on stamp. It might be that a FTS engine or something else,
  648. * has added a requested sorting as well, so make sure we add it to the end of
  649. * sortfields.
  650. */
  651. $sortFields[] = array('field' => 's.stamp', 'direction' => 'DESC', 'autoadded' => true, 'friendlyname' => null);
  652. } else {
  653. if (strtoupper($sort['direction']) != 'ASC') {
  654. $sort['direction'] = 'DESC';
  655. } # if
  656. /*
  657. * Explicit requested sorts, are prepended to the beginning of the array, so
  658. * the user requested sorting always is preferred above any other sorting
  659. */
  660. array_unshift($sortFields, array('field' => 's.' . $sort['field'],
  661. 'direction' => $sort['direction'],
  662. 'autoadded' => false,
  663. 'friendlyname' => $sort['field']));
  664. } # else
  665. SpotTiming::stop(__FUNCTION__, array($sortFields));
  666. return $sortFields;
  667. } # prepareSortFields
  668. /*
  669. * "Compresses" an expanded category list. It tries to search for the smallest
  670. * (in string length) match which contains the same information.
  671. *
  672. * This function, for example, will translate cat0_z0_a1,cat0_z0_a2,... to a
  673. * simple cat0_z0_a string and other nifty tricks.
  674. *
  675. * This is wanted to get cleaner urls, to be more efficient when parsing and
  676. * to be able to lessen the change we will hit the GET HTTP url limit.
  677. *
  678. */
  679. public function compressCategorySelection($categoryList, $strongNotList) {
  680. SpotTiming::start(__FUNCTION__);
  681. $compressedList = '';
  682. /*
  683. * We process each category, and the matching subcategories, to make sure all
  684. * required elments are set. If so, we remove the individual elements and
  685. * add the shorthand for it.
  686. */
  687. foreach(SpotCategories::$_head_categories as $headCatNumber => $headCatValue) {
  688. $subcatsMissing = array();
  689. # match each subcategory
  690. if (isset($categoryList['cat'][$headCatNumber])) {
  691. $subcatsMissing[$headCatNumber] = array();
  692. foreach($categoryList['cat'][$headCatNumber] as $subCatType => $subCatValues) {
  693. $subcatsMissing[$headCatNumber][$subCatType] = array();
  694. foreach(SpotCategories::$_categories[$headCatNumber] as $subCat => $subcatValues) {
  695. if ($subCat !== 'z') {
  696. if (isset($categoryList['cat'][$headCatNumber][$subCatType][$subCat])) {
  697. # process all subcategory values to see if any are missing
  698. foreach(SpotCategories::$_categories[$headCatNumber][$subCat] as $subcatValue => $subcatDescription) {
  699. # Make sure the subcategory is actually avaialble for this type
  700. if (in_array($subCatType, $subcatDescription[2])) {
  701. # and if the subcat element is missing, add it to the missing list
  702. if (array_search($subcatValue, $categoryList['cat'][$headCatNumber][$subCatType][$subCat]) === false) {
  703. $subcatsMissing[$headCatNumber][$subCatType][$subCat][$subcatValue] = 1;
  704. } # if
  705. } # if
  706. } # foreach
  707. } else {
  708. // $subcatsMissing[$headCatNumber][$subCatType][$subCat] = array();
  709. } # if
  710. } # if
  711. } # foreach
  712. } # foreach
  713. //var_dump($categoryList);
  714. //var_dump(expression)($subcatsMissing);
  715. //die();
  716. /*
  717. * If not the complete headcategory has been selected, we have to
  718. * do a tiny bit more work to get the exact match
  719. */
  720. if (!empty($subcatsMissing[$headCatNumber])) {
  721. /*
  722. * There are three possible situations:
  723. *
  724. * - the subcategory does not exist at all, we select the complete subcategory
  725. * - the subcategory exists, but is empty. It means we do not want anything out of it
  726. * - the subcategory exists, and is not empty. The items in it, are the items we do not want
  727. */
  728. foreach($categoryList['cat'][$headCatNumber] as $subType => $subTypeValue) {
  729. /*
  730. * Check wether the complete headcat+subtype (cat0_z0, cat0_z1) is selected
  731. */
  732. if (!empty($subcatsMissing[$headCatNumber][$subType])) {
  733. foreach(SpotCategories::$_subcat_descriptions[$headCatNumber] as $subCatKey => $subCatValue) {
  734. if ($subCatKey !== 'z') {
  735. if (!isset($subcatsMissing[$headCatNumber][$subType][$subCatKey])) {
  736. // $compressedList .= 'cat' . $headCatNumber . '_' . $subType . '_' . $subCatKey . ',';
  737. } elseif (empty($subcatsMissing[$headCatNumber][$subType][$subCatKey])) {
  738. /*
  739. * If the subcategory is completely empty, the user doesn't
  740. * want anything from it
  741. */
  742. } else {
  743. /*
  744. * The subcategory does exist, but contains only items
  745. * the user doesn't want or need. We deselected them here.
  746. *
  747. * We can either add the whole category, and add a few
  748. * "NOT"'s (!cat0_z0_a1) or just selected the individual
  749. * items. We determine this whether the majority is
  750. * selected or excluded.
  751. */
  752. $moreFalseThanTrue = (count(@$subcatsMissing[$headCatNumber][$subType][$subCatKey]) > (count(@SpotCategories::$_categories[$headCatNumber][$subCatKey][$subCatValue]) / 2));
  753. foreach(SpotCategories::$_categories[$headCatNumber][$subCatKey] as $subCatValue => $subCatDesc) {
  754. if (in_array($subType, $subCatDesc[2])) {
  755. if ($moreFalseThanTrue) {
  756. if (!isset($subcatsMissing[$headCatNumber][$subType][$subCatKey][$subCatValue])) {
  757. $compressedList .= 'cat' . $headCatNumber . '_' . $subType . '_' . $subCatKey . $subCatValue . ',';
  758. } # if
  759. } else {
  760. if (isset($subcatsMissing[$headCatNumber][$subType][$subCatKey][$subCatValue])) {
  761. /*
  762. * We have to make sure the whole category is selected, so we perform an
  763. * extra check for it
  764. */
  765. if (strpos(',' . $compressedList . ',', ',cat' . $headCatNumber . '_' . $subType . '_' . $subCatKey . ',') === false) {
  766. $compressedList .= 'cat' . $headCatNumber . '_' . $subType . '_' . $subCatKey . ',';
  767. } # if
  768. # and start deselecting the subcategories
  769. $compressedList .= '!cat' . $headCatNumber . '_' . $subType . '_' . $subCatKey . $subCatValue . ',';
  770. } # if
  771. } # if
  772. } # if
  773. } # foreach
  774. } # else
  775. } # if
  776. } # foreach
  777. } else {
  778. $compressedList .= 'cat' . $headCatNumber . '_' . $subType . ',';
  779. } # if
  780. } # foreach
  781. } else {
  782. $compressedList .= 'cat' . $headCatNumber . ',';
  783. } # else
  784. } # if
  785. } # foreach
  786. # and of course, add the strong not list
  787. if (!empty($strongNotList)) {
  788. foreach($strongNotList as $headCat => $subcatList) {
  789. foreach($subcatList as $subcatValue) {
  790. $compressedList .= '~cat' . $headCat . '_' . $subcatValue . ',';
  791. } # foreach
  792. } # foreach
  793. } # if
  794. SpotTiming::stop(__FUNCTION__, array($compressedList));
  795. return $compressedList;
  796. } # compressCategorySelection
  797. /*
  798. * Converts an array with search terms (tree, type, valus) to an SQL statement
  799. * to be glued to an SQL WHERE query
  800. */
  801. public function filterToQuery($search, $sort, $currentSession, $indexFilter) {
  802. SpotTiming::start(__FUNCTION__);
  803. $isUnfiltered = false;
  804. $categoryList = array();
  805. $categorySql = array();
  806. $strongNotList = array();
  807. $strongNotSql = array();
  808. $filterValueList = array();
  809. $filterValueSql = array();
  810. $additionalFields = array();
  811. $additionalTables = array();
  812. $additionalJoins = array();
  813. $sortFields = array();
  814. # Take the easy way out of no filters have been given
  815. if (empty($search)) {
  816. return array('filter' => '',
  817. 'search' => array(),
  818. 'additionalFields' => array(),
  819. 'additionalTables' => array(),
  820. 'additionalJoins' => array(),
  821. 'categoryList' => array(),
  822. 'strongNotList' => array(),
  823. 'filterValueList' => array(),
  824. 'unfiltered' => false,
  825. 'sortFields' => array(array('field' => 'stamp', 'direction' => 'DESC', 'autoadded' => true, 'friendlyname' => null)));
  826. } # if
  827. /*
  828. * Process the parameters in $search, legacy parameters are converted
  829. * to a common format by prepareFilterValues, this list is then
  830. * converted to SQL
  831. */
  832. $filterValueList = $this->prepareFilterValues($search);
  833. list($filterValueSql, $additionalFields, $additionalTables, $additionalJoins, $sortFields) = $this->filterValuesToSql($filterValueList, $currentSession);
  834. /*
  835. * When asked to forget all category filters (and only search for a word/typefilter)
  836. * we simply reset the filter by overwriting $search with $indexfilter
  837. */
  838. if ((isset($search['unfiltered'])) && (($search['unfiltered'] === 'true'))) {
  839. $search = array_merge($search, $indexFilter);
  840. $isUnfiltered = true;
  841. } # if
  842. /*
  843. * If a tree was given, convert it to subcategories etc.
  844. * prepareCategorySelection() makes sure all categories eventually
  845. * are in a common format
  846. */
  847. if (!empty($search['tree'])) {
  848. # explode the dynaList
  849. $dynaList = explode(',', $search['tree']);
  850. list($categoryList, $strongNotList) = $this->prepareCategorySelection($dynaList);
  851. # and convert to SQL
  852. $categorySql = $this->categoryListToSql($categoryList);
  853. $strongNotSql = $this->strongNotListToSql($strongNotList);
  854. } # if
  855. # Check for an explicit sorting convention
  856. $sortFields = $this->prepareSortFields($sort, $sortFields);
  857. $endFilter = array();
  858. if (!empty($categorySql)) {
  859. $endFilter[] = '(' . join(' OR ', $categorySql) . ') ';
  860. } # if
  861. if (!empty($filterValueSql['AND'])) {
  862. $endFilter[] = '(' . join(' AND ', $filterValueSql['AND']) . ') ';
  863. } # if
  864. if (!empty($filterValueSql['OR'])) {
  865. $endFilter[] = '(' . join(' OR ', $filterValueSql['OR']) . ') ';
  866. } # if
  867. $endFilter[] = join(' AND ', $strongNotSql);
  868. $endFilter = array_filter($endFilter);
  869. SpotTiming::stop(__FUNCTION__, array(join(" AND ", $endFilter)));
  870. return array('filter' => join(" AND ", $endFilter),
  871. 'categoryList' => $categoryList,
  872. 'unfiltered' => $isUnfiltered,
  873. 'strongNotList' => $strongNotList,
  874. 'filterValueList' => $filterValueList,
  875. 'additionalFields' => $additionalFields,
  876. 'additionalTables' => $additionalTables,
  877. 'additionalJoins' => $additionalJoins,
  878. 'sortFields' => $sortFields);
  879. } # filterToQuery
  880. } # Services_Search_QueryParser