PageRenderTime 53ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/includes/search/SearchSqlite.php

https://bitbucket.org/brunodefraine/mediawiki
PHP | 344 lines | 270 code | 15 blank | 59 comment | 14 complexity | 1b09de88073273cb383cbecb3e15f3d2 MD5 | raw file
Possible License(s): GPL-2.0, Apache-2.0, LGPL-3.0
  1. <?php
  2. /**
  3. * SQLite search backend, based upon SearchMysql
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. * @ingroup Search
  22. */
  23. /**
  24. * Search engine hook for SQLite
  25. * @ingroup Search
  26. */
  27. class SearchSqlite extends SearchEngine {
  28. /**
  29. * @var DatabaseSqlite
  30. */
  31. protected $db;
  32. /**
  33. * Creates an instance of this class
  34. * @param $db DatabaseSqlite: database object
  35. */
  36. function __construct( $db ) {
  37. parent::__construct( $db );
  38. }
  39. /**
  40. * Whether fulltext search is supported by current schema
  41. * @return Boolean
  42. */
  43. function fulltextSearchSupported() {
  44. return $this->db->checkForEnabledSearch();
  45. }
  46. /**
  47. * Parse the user's query and transform it into an SQL fragment which will
  48. * become part of a WHERE clause
  49. *
  50. * @return string
  51. */
  52. function parseQuery( $filteredText, $fulltext ) {
  53. global $wgContLang;
  54. $lc = SearchEngine::legalSearchChars(); // Minus format chars
  55. $searchon = '';
  56. $this->searchTerms = array();
  57. $m = array();
  58. if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
  59. $filteredText, $m, PREG_SET_ORDER ) ) {
  60. foreach( $m as $bits ) {
  61. @list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits;
  62. if( $nonQuoted != '' ) {
  63. $term = $nonQuoted;
  64. $quote = '';
  65. } else {
  66. $term = str_replace( '"', '', $term );
  67. $quote = '"';
  68. }
  69. if( $searchon !== '' ) {
  70. $searchon .= ' ';
  71. }
  72. // Some languages such as Serbian store the input form in the search index,
  73. // so we may need to search for matches in multiple writing system variants.
  74. $convertedVariants = $wgContLang->autoConvertToAllVariants( $term );
  75. if( is_array( $convertedVariants ) ) {
  76. $variants = array_unique( array_values( $convertedVariants ) );
  77. } else {
  78. $variants = array( $term );
  79. }
  80. // The low-level search index does some processing on input to work
  81. // around problems with minimum lengths and encoding in MySQL's
  82. // fulltext engine.
  83. // For Chinese this also inserts spaces between adjacent Han characters.
  84. $strippedVariants = array_map(
  85. array( $wgContLang, 'normalizeForSearch' ),
  86. $variants );
  87. // Some languages such as Chinese force all variants to a canonical
  88. // form when stripping to the low-level search index, so to be sure
  89. // let's check our variants list for unique items after stripping.
  90. $strippedVariants = array_unique( $strippedVariants );
  91. $searchon .= $modifier;
  92. if( count( $strippedVariants) > 1 )
  93. $searchon .= '(';
  94. foreach( $strippedVariants as $stripped ) {
  95. if( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
  96. // Hack for Chinese: we need to toss in quotes for
  97. // multiple-character phrases since normalizeForSearch()
  98. // added spaces between them to make word breaks.
  99. $stripped = '"' . trim( $stripped ) . '"';
  100. }
  101. $searchon .= "$quote$stripped$quote$wildcard ";
  102. }
  103. if( count( $strippedVariants) > 1 )
  104. $searchon .= ')';
  105. // Match individual terms or quoted phrase in result highlighting...
  106. // Note that variants will be introduced in a later stage for highlighting!
  107. $regexp = $this->regexTerm( $term, $wildcard );
  108. $this->searchTerms[] = $regexp;
  109. }
  110. } else {
  111. wfDebug( __METHOD__ . ": Can't understand search query '{$filteredText}'\n" );
  112. }
  113. $searchon = $this->db->strencode( $searchon );
  114. $field = $this->getIndexField( $fulltext );
  115. return " $field MATCH '$searchon' ";
  116. }
  117. function regexTerm( $string, $wildcard ) {
  118. global $wgContLang;
  119. $regex = preg_quote( $string, '/' );
  120. if( $wgContLang->hasWordBreaks() ) {
  121. if( $wildcard ) {
  122. // Don't cut off the final bit!
  123. $regex = "\b$regex";
  124. } else {
  125. $regex = "\b$regex\b";
  126. }
  127. } else {
  128. // For Chinese, words may legitimately abut other words in the text literal.
  129. // Don't add \b boundary checks... note this could cause false positives
  130. // for latin chars.
  131. }
  132. return $regex;
  133. }
  134. public static function legalSearchChars() {
  135. return "\"*" . parent::legalSearchChars();
  136. }
  137. /**
  138. * Perform a full text search query and return a result set.
  139. *
  140. * @param $term String: raw search term
  141. * @return SqliteSearchResultSet
  142. */
  143. function searchText( $term ) {
  144. return $this->searchInternal( $term, true );
  145. }
  146. /**
  147. * Perform a title-only search query and return a result set.
  148. *
  149. * @param $term String: raw search term
  150. * @return SqliteSearchResultSet
  151. */
  152. function searchTitle( $term ) {
  153. return $this->searchInternal( $term, false );
  154. }
  155. protected function searchInternal( $term, $fulltext ) {
  156. global $wgCountTotalSearchHits, $wgContLang;
  157. if ( !$this->fulltextSearchSupported() ) {
  158. return null;
  159. }
  160. $filteredTerm = $this->filter( $wgContLang->lc( $term ) );
  161. $resultSet = $this->db->query( $this->getQuery( $filteredTerm, $fulltext ) );
  162. $total = null;
  163. if( $wgCountTotalSearchHits ) {
  164. $totalResult = $this->db->query( $this->getCountQuery( $filteredTerm, $fulltext ) );
  165. $row = $totalResult->fetchObject();
  166. if( $row ) {
  167. $total = intval( $row->c );
  168. }
  169. $totalResult->free();
  170. }
  171. return new SqliteSearchResultSet( $resultSet, $this->searchTerms, $total );
  172. }
  173. /**
  174. * Return a partial WHERE clause to exclude redirects, if so set
  175. * @return String
  176. */
  177. function queryRedirect() {
  178. if( $this->showRedirects ) {
  179. return '';
  180. } else {
  181. return 'AND page_is_redirect=0';
  182. }
  183. }
  184. /**
  185. * Return a partial WHERE clause to limit the search to the given namespaces
  186. * @return String
  187. */
  188. function queryNamespaces() {
  189. if( is_null($this->namespaces) )
  190. return ''; # search all
  191. if ( !count( $this->namespaces ) ) {
  192. $namespaces = '0';
  193. } else {
  194. $namespaces = $this->db->makeList( $this->namespaces );
  195. }
  196. return 'AND page_namespace IN (' . $namespaces . ')';
  197. }
  198. /**
  199. * Returns a query with limit for number of results set.
  200. * @param $sql String:
  201. * @return String
  202. */
  203. function limitResult( $sql ) {
  204. return $this->db->limitResult( $sql, $this->limit, $this->offset );
  205. }
  206. /**
  207. * Construct the full SQL query to do the search.
  208. * The guts shoulds be constructed in queryMain()
  209. * @param $filteredTerm String
  210. * @param $fulltext Boolean
  211. */
  212. function getQuery( $filteredTerm, $fulltext ) {
  213. return $this->limitResult(
  214. $this->queryMain( $filteredTerm, $fulltext ) . ' ' .
  215. $this->queryRedirect() . ' ' .
  216. $this->queryNamespaces()
  217. );
  218. }
  219. /**
  220. * Picks which field to index on, depending on what type of query.
  221. * @param $fulltext Boolean
  222. * @return String
  223. */
  224. function getIndexField( $fulltext ) {
  225. return $fulltext ? 'si_text' : 'si_title';
  226. }
  227. /**
  228. * Get the base part of the search query.
  229. *
  230. * @param $filteredTerm String
  231. * @param $fulltext Boolean
  232. * @return String
  233. */
  234. function queryMain( $filteredTerm, $fulltext ) {
  235. $match = $this->parseQuery( $filteredTerm, $fulltext );
  236. $page = $this->db->tableName( 'page' );
  237. $searchindex = $this->db->tableName( 'searchindex' );
  238. return "SELECT $searchindex.rowid, page_namespace, page_title " .
  239. "FROM $page,$searchindex " .
  240. "WHERE page_id=$searchindex.rowid AND $match";
  241. }
  242. function getCountQuery( $filteredTerm, $fulltext ) {
  243. $match = $this->parseQuery( $filteredTerm, $fulltext );
  244. $page = $this->db->tableName( 'page' );
  245. $searchindex = $this->db->tableName( 'searchindex' );
  246. return "SELECT COUNT(*) AS c " .
  247. "FROM $page,$searchindex " .
  248. "WHERE page_id=$searchindex.rowid AND $match" .
  249. $this->queryRedirect() . ' ' .
  250. $this->queryNamespaces();
  251. }
  252. /**
  253. * Create or update the search index record for the given page.
  254. * Title and text should be pre-processed.
  255. *
  256. * @param $id Integer
  257. * @param $title String
  258. * @param $text String
  259. */
  260. function update( $id, $title, $text ) {
  261. if ( !$this->fulltextSearchSupported() ) {
  262. return;
  263. }
  264. // @todo: find a method to do it in a single request,
  265. // couldn't do it so far due to typelessness of FTS3 tables.
  266. $dbw = wfGetDB( DB_MASTER );
  267. $dbw->delete( 'searchindex', array( 'rowid' => $id ), __METHOD__ );
  268. $dbw->insert( 'searchindex',
  269. array(
  270. 'rowid' => $id,
  271. 'si_title' => $title,
  272. 'si_text' => $text
  273. ), __METHOD__ );
  274. }
  275. /**
  276. * Update a search index record's title only.
  277. * Title should be pre-processed.
  278. *
  279. * @param $id Integer
  280. * @param $title String
  281. */
  282. function updateTitle( $id, $title ) {
  283. if ( !$this->fulltextSearchSupported() ) {
  284. return;
  285. }
  286. $dbw = wfGetDB( DB_MASTER );
  287. $dbw->update( 'searchindex',
  288. array( 'si_title' => $title ),
  289. array( 'rowid' => $id ),
  290. __METHOD__ );
  291. }
  292. }
  293. /**
  294. * @ingroup Search
  295. */
  296. class SqliteSearchResultSet extends SqlSearchResultSet {
  297. function __construct( $resultSet, $terms, $totalHits=null ) {
  298. parent::__construct( $resultSet, $terms );
  299. $this->mTotalHits = $totalHits;
  300. }
  301. function getTotalHits() {
  302. return $this->mTotalHits;
  303. }
  304. }