/src/pyechonest/doc/build/html/_static/searchtools.js
Relevant Search: With Applications for Solr and Elasticsearch
For more in depth reading about search, ranking and generally everything you could ever want to know about how lucene, elasticsearch or solr work under the hood I highly suggest this book. Easily one of the most interesting technical books I have read in a long time. If you are tasked with solving search relevance problems even if not in Solr or Elasticsearch it should be your first reference. Amazon Affiliate LinkJavaScript | 518 lines | 397 code | 52 blank | 69 comment | 89 complexity | 2d2d54448ebb4ce62716b0a8fbb11dbe MD5 | raw file
1/* 2 * searchtools.js 3 * ~~~~~~~~~~~~~~ 4 * 5 * Sphinx JavaScript utilties for the full-text search. 6 * 7 * :copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS. 8 * :license: BSD, see LICENSE for details. 9 * 10 */ 11 12/** 13 * helper function to return a node containing the 14 * search summary for a given text. keywords is a list 15 * of stemmed words, hlwords is the list of normal, unstemmed 16 * words. the first one is used to find the occurance, the 17 * latter for highlighting it. 18 */ 19 20jQuery.makeSearchSummary = function(text, keywords, hlwords) { 21 var textLower = text.toLowerCase(); 22 var start = 0; 23 $.each(keywords, function() { 24 var i = textLower.indexOf(this.toLowerCase()); 25 if (i > -1) 26 start = i; 27 }); 28 start = Math.max(start - 120, 0); 29 var excerpt = ((start > 0) ? '...' : '') + 30 $.trim(text.substr(start, 240)) + 31 ((start + 240 - text.length) ? '...' : ''); 32 var rv = $('<div class="context"></div>').text(excerpt); 33 $.each(hlwords, function() { 34 rv = rv.highlightText(this, 'highlighted'); 35 }); 36 return rv; 37} 38 39/** 40 * Porter Stemmer 41 */ 42var PorterStemmer = function() { 43 44 var step2list = { 45 ational: 'ate', 46 tional: 'tion', 47 enci: 'ence', 48 anci: 'ance', 49 izer: 'ize', 50 bli: 'ble', 51 alli: 'al', 52 entli: 'ent', 53 eli: 'e', 54 ousli: 'ous', 55 ization: 'ize', 56 ation: 'ate', 57 ator: 'ate', 58 alism: 'al', 59 iveness: 'ive', 60 fulness: 'ful', 61 ousness: 'ous', 62 aliti: 'al', 63 iviti: 'ive', 64 biliti: 'ble', 65 logi: 'log' 66 }; 67 68 var step3list = { 69 icate: 'ic', 70 ative: '', 71 alize: 'al', 72 iciti: 'ic', 73 ical: 'ic', 74 ful: '', 75 ness: '' 76 }; 77 78 var c = "[^aeiou]"; // consonant 79 var v = "[aeiouy]"; // vowel 80 var C = c + "[^aeiouy]*"; // consonant sequence 81 var V = v + "[aeiou]*"; // vowel sequence 82 83 var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 84 var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 85 var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 86 var s_v = "^(" + C + ")?" + v; // vowel in stem 87 88 this.stemWord = function (w) { 89 var stem; 90 var suffix; 91 var firstch; 92 var origword = w; 93 94 if (w.length < 3) 95 return w; 96 97 var re; 98 var re2; 99 var re3; 100 var re4; 101 102 firstch = w.substr(0,1); 103 if (firstch == "y") 104 w = firstch.toUpperCase() + w.substr(1); 105 106 // Step 1a 107 re = /^(.+?)(ss|i)es$/; 108 re2 = /^(.+?)([^s])s$/; 109 110 if (re.test(w)) 111 w = w.replace(re,"$1$2"); 112 else if (re2.test(w)) 113 w = w.replace(re2,"$1$2"); 114 115 // Step 1b 116 re = /^(.+?)eed$/; 117 re2 = /^(.+?)(ed|ing)$/; 118 if (re.test(w)) { 119 var fp = re.exec(w); 120 re = new RegExp(mgr0); 121 if (re.test(fp[1])) { 122 re = /.$/; 123 w = w.replace(re,""); 124 } 125 } 126 else if (re2.test(w)) { 127 var fp = re2.exec(w); 128 stem = fp[1]; 129 re2 = new RegExp(s_v); 130 if (re2.test(stem)) { 131 w = stem; 132 re2 = /(at|bl|iz)$/; 133 re3 = new RegExp("([^aeiouylsz])\\1$"); 134 re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 135 if (re2.test(w)) 136 w = w + "e"; 137 else if (re3.test(w)) { 138 re = /.$/; 139 w = w.replace(re,""); 140 } 141 else if (re4.test(w)) 142 w = w + "e"; 143 } 144 } 145 146 // Step 1c 147 re = /^(.+?)y$/; 148 if (re.test(w)) { 149 var fp = re.exec(w); 150 stem = fp[1]; 151 re = new RegExp(s_v); 152 if (re.test(stem)) 153 w = stem + "i"; 154 } 155 156 // Step 2 157 re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; 158 if (re.test(w)) { 159 var fp = re.exec(w); 160 stem = fp[1]; 161 suffix = fp[2]; 162 re = new RegExp(mgr0); 163 if (re.test(stem)) 164 w = stem + step2list[suffix]; 165 } 166 167 // Step 3 168 re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; 169 if (re.test(w)) { 170 var fp = re.exec(w); 171 stem = fp[1]; 172 suffix = fp[2]; 173 re = new RegExp(mgr0); 174 if (re.test(stem)) 175 w = stem + step3list[suffix]; 176 } 177 178 // Step 4 179 re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; 180 re2 = /^(.+?)(s|t)(ion)$/; 181 if (re.test(w)) { 182 var fp = re.exec(w); 183 stem = fp[1]; 184 re = new RegExp(mgr1); 185 if (re.test(stem)) 186 w = stem; 187 } 188 else if (re2.test(w)) { 189 var fp = re2.exec(w); 190 stem = fp[1] + fp[2]; 191 re2 = new RegExp(mgr1); 192 if (re2.test(stem)) 193 w = stem; 194 } 195 196 // Step 5 197 re = /^(.+?)e$/; 198 if (re.test(w)) { 199 var fp = re.exec(w); 200 stem = fp[1]; 201 re = new RegExp(mgr1); 202 re2 = new RegExp(meq1); 203 re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 204 if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) 205 w = stem; 206 } 207 re = /ll$/; 208 re2 = new RegExp(mgr1); 209 if (re.test(w) && re2.test(w)) { 210 re = /.$/; 211 w = w.replace(re,""); 212 } 213 214 // and turn initial Y back to y 215 if (firstch == "y") 216 w = firstch.toLowerCase() + w.substr(1); 217 return w; 218 } 219} 220 221 222/** 223 * Search Module 224 */ 225var Search = { 226 227 _index : null, 228 _queued_query : null, 229 _pulse_status : -1, 230 231 init : function() { 232 var params = $.getQueryParameters(); 233 if (params.q) { 234 var query = params.q[0]; 235 $('input[name="q"]')[0].value = query; 236 this.performSearch(query); 237 } 238 }, 239 240 loadIndex : function(url) { 241 $.ajax({type: "GET", url: url, data: null, success: null, 242 dataType: "script", cache: true}); 243 }, 244 245 setIndex : function(index) { 246 var q; 247 this._index = index; 248 if ((q = this._queued_query) !== null) { 249 this._queued_query = null; 250 Search.query(q); 251 } 252 }, 253 254 hasIndex : function() { 255 return this._index !== null; 256 }, 257 258 deferQuery : function(query) { 259 this._queued_query = query; 260 }, 261 262 stopPulse : function() { 263 this._pulse_status = 0; 264 }, 265 266 startPulse : function() { 267 if (this._pulse_status >= 0) 268 return; 269 function pulse() { 270 Search._pulse_status = (Search._pulse_status + 1) % 4; 271 var dotString = ''; 272 for (var i = 0; i < Search._pulse_status; i++) 273 dotString += '.'; 274 Search.dots.text(dotString); 275 if (Search._pulse_status > -1) 276 window.setTimeout(pulse, 500); 277 }; 278 pulse(); 279 }, 280 281 /** 282 * perform a search for something 283 */ 284 performSearch : function(query) { 285 // create the required interface elements 286 this.out = $('#search-results'); 287 this.title = $('<h2>' + _('Searching') + '</h2>').appendTo(this.out); 288 this.dots = $('<span></span>').appendTo(this.title); 289 this.status = $('<p style="display: none"></p>').appendTo(this.out); 290 this.output = $('<ul class="search"/>').appendTo(this.out); 291 292 $('#search-progress').text(_('Preparing search...')); 293 this.startPulse(); 294 295 // index already loaded, the browser was quick! 296 if (this.hasIndex()) 297 this.query(query); 298 else 299 this.deferQuery(query); 300 }, 301 302 query : function(query) { 303 var stopwords = ['and', 'then', 'into', 'it', 'as', 'are', 'in', 304 'if', 'for', 'no', 'there', 'their', 'was', 'is', 305 'be', 'to', 'that', 'but', 'they', 'not', 'such', 306 'with', 'by', 'a', 'on', 'these', 'of', 'will', 307 'this', 'near', 'the', 'or', 'at']; 308 309 // stem the searchterms and add them to the correct list 310 var stemmer = new PorterStemmer(); 311 var searchterms = []; 312 var excluded = []; 313 var hlterms = []; 314 var tmp = query.split(/\s+/); 315 var object = (tmp.length == 1) ? tmp[0].toLowerCase() : null; 316 for (var i = 0; i < tmp.length; i++) { 317 if ($u.indexOf(stopwords, tmp[i]) != -1 || tmp[i].match(/^\d+$/) || 318 tmp[i] == "") { 319 // skip this "word" 320 continue; 321 } 322 // stem the word 323 var word = stemmer.stemWord(tmp[i]).toLowerCase(); 324 // select the correct list 325 if (word[0] == '-') { 326 var toAppend = excluded; 327 word = word.substr(1); 328 } 329 else { 330 var toAppend = searchterms; 331 hlterms.push(tmp[i].toLowerCase()); 332 } 333 // only add if not already in the list 334 if (!$.contains(toAppend, word)) 335 toAppend.push(word); 336 }; 337 var highlightstring = '?highlight=' + $.urlencode(hlterms.join(" ")); 338 339 // console.debug('SEARCH: searching for:'); 340 // console.info('required: ', searchterms); 341 // console.info('excluded: ', excluded); 342 343 // prepare search 344 var filenames = this._index.filenames; 345 var titles = this._index.titles; 346 var terms = this._index.terms; 347 var objects = this._index.objects; 348 var objtypes = this._index.objtypes; 349 var objnames = this._index.objnames; 350 var fileMap = {}; 351 var files = null; 352 // different result priorities 353 var importantResults = []; 354 var objectResults = []; 355 var regularResults = []; 356 var unimportantResults = []; 357 $('#search-progress').empty(); 358 359 // lookup as object 360 if (object != null) { 361 for (var prefix in objects) { 362 for (var name in objects[prefix]) { 363 var fullname = (prefix ? prefix + '.' : '') + name; 364 if (fullname.toLowerCase().indexOf(object) > -1) { 365 match = objects[prefix][name]; 366 descr = objnames[match[1]] + _(', in ') + titles[match[0]]; 367 // XXX the generated anchors are not generally correct 368 // XXX there may be custom prefixes 369 result = [filenames[match[0]], fullname, '#'+fullname, descr]; 370 switch (match[2]) { 371 case 1: objectResults.push(result); break; 372 case 0: importantResults.push(result); break; 373 case 2: unimportantResults.push(result); break; 374 } 375 } 376 } 377 } 378 } 379 380 // sort results descending 381 objectResults.sort(function(a, b) { 382 return (a[1] > b[1]) ? -1 : ((a[1] < b[1]) ? 1 : 0); 383 }); 384 385 importantResults.sort(function(a, b) { 386 return (a[1] > b[1]) ? -1 : ((a[1] < b[1]) ? 1 : 0); 387 }); 388 389 unimportantResults.sort(function(a, b) { 390 return (a[1] > b[1]) ? -1 : ((a[1] < b[1]) ? 1 : 0); 391 }); 392 393 394 // perform the search on the required terms 395 for (var i = 0; i < searchterms.length; i++) { 396 var word = searchterms[i]; 397 // no match but word was a required one 398 if ((files = terms[word]) == null) 399 break; 400 if (files.length == undefined) { 401 files = [files]; 402 } 403 // create the mapping 404 for (var j = 0; j < files.length; j++) { 405 var file = files[j]; 406 if (file in fileMap) 407 fileMap[file].push(word); 408 else 409 fileMap[file] = [word]; 410 } 411 } 412 413 // now check if the files don't contain excluded terms 414 for (var file in fileMap) { 415 var valid = true; 416 417 // check if all requirements are matched 418 if (fileMap[file].length != searchterms.length) 419 continue; 420 421 // ensure that none of the excluded terms is in the 422 // search result. 423 for (var i = 0; i < excluded.length; i++) { 424 if (terms[excluded[i]] == file || 425 $.contains(terms[excluded[i]] || [], file)) { 426 valid = false; 427 break; 428 } 429 } 430 431 // if we have still a valid result we can add it 432 // to the result list 433 if (valid) 434 regularResults.push([filenames[file], titles[file], '', null]); 435 } 436 437 // delete unused variables in order to not waste 438 // memory until list is retrieved completely 439 delete filenames, titles, terms; 440 441 // now sort the regular results descending by title 442 regularResults.sort(function(a, b) { 443 var left = a[1].toLowerCase(); 444 var right = b[1].toLowerCase(); 445 return (left > right) ? -1 : ((left < right) ? 1 : 0); 446 }); 447 448 // combine all results 449 var results = unimportantResults.concat(regularResults) 450 .concat(objectResults).concat(importantResults); 451 452 // print the results 453 var resultCount = results.length; 454 function displayNextItem() { 455 // results left, load the summary and display it 456 if (results.length) { 457 var item = results.pop(); 458 var listItem = $('<li style="display:none"></li>'); 459 if (DOCUMENTATION_OPTIONS.FILE_SUFFIX == '') { 460 // dirhtml builder 461 var dirname = item[0] + '/'; 462 if (dirname.match(/\/index\/$/)) { 463 dirname = dirname.substring(0, dirname.length-6); 464 } else if (dirname == 'index/') { 465 dirname = ''; 466 } 467 listItem.append($('<a/>').attr('href', 468 DOCUMENTATION_OPTIONS.URL_ROOT + dirname + 469 highlightstring + item[2]).html(item[1])); 470 } else { 471 // normal html builders 472 listItem.append($('<a/>').attr('href', 473 item[0] + DOCUMENTATION_OPTIONS.FILE_SUFFIX + 474 highlightstring + item[2]).html(item[1])); 475 } 476 if (item[3]) { 477 listItem.append($('<span> (' + item[3] + ')</span>')); 478 Search.output.append(listItem); 479 listItem.slideDown(5, function() { 480 displayNextItem(); 481 }); 482 } else if (DOCUMENTATION_OPTIONS.HAS_SOURCE) { 483 $.get(DOCUMENTATION_OPTIONS.URL_ROOT + '_sources/' + 484 item[0] + '.txt', function(data) { 485 if (data != '') { 486 listItem.append($.makeSearchSummary(data, searchterms, hlterms)); 487 Search.output.append(listItem); 488 } 489 listItem.slideDown(5, function() { 490 displayNextItem(); 491 }); 492 }); 493 } else { 494 // no source available, just display title 495 Search.output.append(listItem); 496 listItem.slideDown(5, function() { 497 displayNextItem(); 498 }); 499 } 500 } 501 // search finished, update title and status message 502 else { 503 Search.stopPulse(); 504 Search.title.text(_('Search Results')); 505 if (!resultCount) 506 Search.status.text(_('Your search did not match any documents. Please make sure that all words are spelled correctly and that you\'ve selected enough categories.')); 507 else 508 Search.status.text(_('Search finished, found %s page(s) matching the search query.').replace('%s', resultCount)); 509 Search.status.fadeIn(500); 510 } 511 } 512 displayNextItem(); 513 } 514} 515 516$(document).ready(function() { 517 Search.init(); 518});