PageRenderTime 45ms CodeModel.GetById 12ms app.highlight 28ms RepoModel.GetById 1ms app.codeStats 0ms

/src/pyechonest/doc/build/html/_static/searchtools.js

Relevant Search: With Applications for Solr and Elasticsearch

For more in depth reading about search, ranking and generally everything you could ever want to know about how lucene, elasticsearch or solr work under the hood I highly suggest this book. Easily one of the most interesting technical books I have read in a long time. If you are tasked with solving search relevance problems even if not in Solr or Elasticsearch it should be your first reference. Amazon Affiliate Link
http://echo-nest-remix.googlecode.com/
JavaScript | 518 lines | 397 code | 52 blank | 69 comment | 89 complexity | 2d2d54448ebb4ce62716b0a8fbb11dbe MD5 | raw file
  1/*
  2 * searchtools.js
  3 * ~~~~~~~~~~~~~~
  4 *
  5 * Sphinx JavaScript utilties for the full-text search.
  6 *
  7 * :copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS.
  8 * :license: BSD, see LICENSE for details.
  9 *
 10 */
 11
 12/**
 13 * helper function to return a node containing the
 14 * search summary for a given text. keywords is a list
 15 * of stemmed words, hlwords is the list of normal, unstemmed
 16 * words. the first one is used to find the occurance, the
 17 * latter for highlighting it.
 18 */
 19
 20jQuery.makeSearchSummary = function(text, keywords, hlwords) {
 21  var textLower = text.toLowerCase();
 22  var start = 0;
 23  $.each(keywords, function() {
 24    var i = textLower.indexOf(this.toLowerCase());
 25    if (i > -1)
 26      start = i;
 27  });
 28  start = Math.max(start - 120, 0);
 29  var excerpt = ((start > 0) ? '...' : '') +
 30  $.trim(text.substr(start, 240)) +
 31  ((start + 240 - text.length) ? '...' : '');
 32  var rv = $('<div class="context"></div>').text(excerpt);
 33  $.each(hlwords, function() {
 34    rv = rv.highlightText(this, 'highlighted');
 35  });
 36  return rv;
 37}
 38
 39/**
 40 * Porter Stemmer
 41 */
 42var PorterStemmer = function() {
 43
 44  var step2list = {
 45    ational: 'ate',
 46    tional: 'tion',
 47    enci: 'ence',
 48    anci: 'ance',
 49    izer: 'ize',
 50    bli: 'ble',
 51    alli: 'al',
 52    entli: 'ent',
 53    eli: 'e',
 54    ousli: 'ous',
 55    ization: 'ize',
 56    ation: 'ate',
 57    ator: 'ate',
 58    alism: 'al',
 59    iveness: 'ive',
 60    fulness: 'ful',
 61    ousness: 'ous',
 62    aliti: 'al',
 63    iviti: 'ive',
 64    biliti: 'ble',
 65    logi: 'log'
 66  };
 67
 68  var step3list = {
 69    icate: 'ic',
 70    ative: '',
 71    alize: 'al',
 72    iciti: 'ic',
 73    ical: 'ic',
 74    ful: '',
 75    ness: ''
 76  };
 77
 78  var c = "[^aeiou]";          // consonant
 79  var v = "[aeiouy]";          // vowel
 80  var C = c + "[^aeiouy]*";    // consonant sequence
 81  var V = v + "[aeiou]*";      // vowel sequence
 82
 83  var mgr0 = "^(" + C + ")?" + V + C;                      // [C]VC... is m>0
 84  var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$";    // [C]VC[V] is m=1
 85  var mgr1 = "^(" + C + ")?" + V + C + V + C;              // [C]VCVC... is m>1
 86  var s_v   = "^(" + C + ")?" + v;                         // vowel in stem
 87
 88  this.stemWord = function (w) {
 89    var stem;
 90    var suffix;
 91    var firstch;
 92    var origword = w;
 93
 94    if (w.length < 3)
 95      return w;
 96
 97    var re;
 98    var re2;
 99    var re3;
100    var re4;
101
102    firstch = w.substr(0,1);
103    if (firstch == "y")
104      w = firstch.toUpperCase() + w.substr(1);
105
106    // Step 1a
107    re = /^(.+?)(ss|i)es$/;
108    re2 = /^(.+?)([^s])s$/;
109
110    if (re.test(w))
111      w = w.replace(re,"$1$2");
112    else if (re2.test(w))
113      w = w.replace(re2,"$1$2");
114
115    // Step 1b
116    re = /^(.+?)eed$/;
117    re2 = /^(.+?)(ed|ing)$/;
118    if (re.test(w)) {
119      var fp = re.exec(w);
120      re = new RegExp(mgr0);
121      if (re.test(fp[1])) {
122        re = /.$/;
123        w = w.replace(re,"");
124      }
125    }
126    else if (re2.test(w)) {
127      var fp = re2.exec(w);
128      stem = fp[1];
129      re2 = new RegExp(s_v);
130      if (re2.test(stem)) {
131        w = stem;
132        re2 = /(at|bl|iz)$/;
133        re3 = new RegExp("([^aeiouylsz])\\1$");
134        re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
135        if (re2.test(w))
136          w = w + "e";
137        else if (re3.test(w)) {
138          re = /.$/;
139          w = w.replace(re,"");
140        }
141        else if (re4.test(w))
142          w = w + "e";
143      }
144    }
145
146    // Step 1c
147    re = /^(.+?)y$/;
148    if (re.test(w)) {
149      var fp = re.exec(w);
150      stem = fp[1];
151      re = new RegExp(s_v);
152      if (re.test(stem))
153        w = stem + "i";
154    }
155
156    // Step 2
157    re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
158    if (re.test(w)) {
159      var fp = re.exec(w);
160      stem = fp[1];
161      suffix = fp[2];
162      re = new RegExp(mgr0);
163      if (re.test(stem))
164        w = stem + step2list[suffix];
165    }
166
167    // Step 3
168    re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
169    if (re.test(w)) {
170      var fp = re.exec(w);
171      stem = fp[1];
172      suffix = fp[2];
173      re = new RegExp(mgr0);
174      if (re.test(stem))
175        w = stem + step3list[suffix];
176    }
177
178    // Step 4
179    re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
180    re2 = /^(.+?)(s|t)(ion)$/;
181    if (re.test(w)) {
182      var fp = re.exec(w);
183      stem = fp[1];
184      re = new RegExp(mgr1);
185      if (re.test(stem))
186        w = stem;
187    }
188    else if (re2.test(w)) {
189      var fp = re2.exec(w);
190      stem = fp[1] + fp[2];
191      re2 = new RegExp(mgr1);
192      if (re2.test(stem))
193        w = stem;
194    }
195
196    // Step 5
197    re = /^(.+?)e$/;
198    if (re.test(w)) {
199      var fp = re.exec(w);
200      stem = fp[1];
201      re = new RegExp(mgr1);
202      re2 = new RegExp(meq1);
203      re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
204      if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
205        w = stem;
206    }
207    re = /ll$/;
208    re2 = new RegExp(mgr1);
209    if (re.test(w) && re2.test(w)) {
210      re = /.$/;
211      w = w.replace(re,"");
212    }
213
214    // and turn initial Y back to y
215    if (firstch == "y")
216      w = firstch.toLowerCase() + w.substr(1);
217    return w;
218  }
219}
220
221
222/**
223 * Search Module
224 */
225var Search = {
226
227  _index : null,
228  _queued_query : null,
229  _pulse_status : -1,
230
231  init : function() {
232      var params = $.getQueryParameters();
233      if (params.q) {
234          var query = params.q[0];
235          $('input[name="q"]')[0].value = query;
236          this.performSearch(query);
237      }
238  },
239
240  loadIndex : function(url) {
241    $.ajax({type: "GET", url: url, data: null, success: null,
242            dataType: "script", cache: true});
243  },
244
245  setIndex : function(index) {
246    var q;
247    this._index = index;
248    if ((q = this._queued_query) !== null) {
249      this._queued_query = null;
250      Search.query(q);
251    }
252  },
253
254  hasIndex : function() {
255      return this._index !== null;
256  },
257
258  deferQuery : function(query) {
259      this._queued_query = query;
260  },
261
262  stopPulse : function() {
263      this._pulse_status = 0;
264  },
265
266  startPulse : function() {
267    if (this._pulse_status >= 0)
268        return;
269    function pulse() {
270      Search._pulse_status = (Search._pulse_status + 1) % 4;
271      var dotString = '';
272      for (var i = 0; i < Search._pulse_status; i++)
273        dotString += '.';
274      Search.dots.text(dotString);
275      if (Search._pulse_status > -1)
276        window.setTimeout(pulse, 500);
277    };
278    pulse();
279  },
280
281  /**
282   * perform a search for something
283   */
284  performSearch : function(query) {
285    // create the required interface elements
286    this.out = $('#search-results');
287    this.title = $('<h2>' + _('Searching') + '</h2>').appendTo(this.out);
288    this.dots = $('<span></span>').appendTo(this.title);
289    this.status = $('<p style="display: none"></p>').appendTo(this.out);
290    this.output = $('<ul class="search"/>').appendTo(this.out);
291
292    $('#search-progress').text(_('Preparing search...'));
293    this.startPulse();
294
295    // index already loaded, the browser was quick!
296    if (this.hasIndex())
297      this.query(query);
298    else
299      this.deferQuery(query);
300  },
301
302  query : function(query) {
303    var stopwords = ['and', 'then', 'into', 'it', 'as', 'are', 'in',
304                     'if', 'for', 'no', 'there', 'their', 'was', 'is',
305                     'be', 'to', 'that', 'but', 'they', 'not', 'such',
306                     'with', 'by', 'a', 'on', 'these', 'of', 'will',
307                     'this', 'near', 'the', 'or', 'at'];
308
309    // stem the searchterms and add them to the correct list
310    var stemmer = new PorterStemmer();
311    var searchterms = [];
312    var excluded = [];
313    var hlterms = [];
314    var tmp = query.split(/\s+/);
315    var object = (tmp.length == 1) ? tmp[0].toLowerCase() : null;
316    for (var i = 0; i < tmp.length; i++) {
317      if ($u.indexOf(stopwords, tmp[i]) != -1 || tmp[i].match(/^\d+$/) ||
318          tmp[i] == "") {
319        // skip this "word"
320        continue;
321      }
322      // stem the word
323      var word = stemmer.stemWord(tmp[i]).toLowerCase();
324      // select the correct list
325      if (word[0] == '-') {
326        var toAppend = excluded;
327        word = word.substr(1);
328      }
329      else {
330        var toAppend = searchterms;
331        hlterms.push(tmp[i].toLowerCase());
332      }
333      // only add if not already in the list
334      if (!$.contains(toAppend, word))
335        toAppend.push(word);
336    };
337    var highlightstring = '?highlight=' + $.urlencode(hlterms.join(" "));
338
339    // console.debug('SEARCH: searching for:');
340    // console.info('required: ', searchterms);
341    // console.info('excluded: ', excluded);
342
343    // prepare search
344    var filenames = this._index.filenames;
345    var titles = this._index.titles;
346    var terms = this._index.terms;
347    var objects = this._index.objects;
348    var objtypes = this._index.objtypes;
349    var objnames = this._index.objnames;
350    var fileMap = {};
351    var files = null;
352    // different result priorities
353    var importantResults = [];
354    var objectResults = [];
355    var regularResults = [];
356    var unimportantResults = [];
357    $('#search-progress').empty();
358
359    // lookup as object
360    if (object != null) {
361      for (var prefix in objects) {
362        for (var name in objects[prefix]) {
363          var fullname = (prefix ? prefix + '.' : '') + name;
364          if (fullname.toLowerCase().indexOf(object) > -1) {
365            match = objects[prefix][name];
366            descr = objnames[match[1]] + _(', in ') + titles[match[0]];
367            // XXX the generated anchors are not generally correct
368            // XXX there may be custom prefixes
369            result = [filenames[match[0]], fullname, '#'+fullname, descr];
370            switch (match[2]) {
371            case 1: objectResults.push(result); break;
372            case 0: importantResults.push(result); break;
373            case 2: unimportantResults.push(result); break;
374            }
375          }
376        }
377      }
378    }
379
380    // sort results descending
381    objectResults.sort(function(a, b) {
382      return (a[1] > b[1]) ? -1 : ((a[1] < b[1]) ? 1 : 0);
383    });
384
385    importantResults.sort(function(a, b) {
386      return (a[1] > b[1]) ? -1 : ((a[1] < b[1]) ? 1 : 0);
387    });
388
389    unimportantResults.sort(function(a, b) {
390      return (a[1] > b[1]) ? -1 : ((a[1] < b[1]) ? 1 : 0);
391    });
392
393
394    // perform the search on the required terms
395    for (var i = 0; i < searchterms.length; i++) {
396      var word = searchterms[i];
397      // no match but word was a required one
398      if ((files = terms[word]) == null)
399        break;
400      if (files.length == undefined) {
401        files = [files];
402      }
403      // create the mapping
404      for (var j = 0; j < files.length; j++) {
405        var file = files[j];
406        if (file in fileMap)
407          fileMap[file].push(word);
408        else
409          fileMap[file] = [word];
410      }
411    }
412
413    // now check if the files don't contain excluded terms
414    for (var file in fileMap) {
415      var valid = true;
416
417      // check if all requirements are matched
418      if (fileMap[file].length != searchterms.length)
419        continue;
420
421      // ensure that none of the excluded terms is in the
422      // search result.
423      for (var i = 0; i < excluded.length; i++) {
424        if (terms[excluded[i]] == file ||
425            $.contains(terms[excluded[i]] || [], file)) {
426          valid = false;
427          break;
428        }
429      }
430
431      // if we have still a valid result we can add it
432      // to the result list
433      if (valid)
434        regularResults.push([filenames[file], titles[file], '', null]);
435    }
436
437    // delete unused variables in order to not waste
438    // memory until list is retrieved completely
439    delete filenames, titles, terms;
440
441    // now sort the regular results descending by title
442    regularResults.sort(function(a, b) {
443      var left = a[1].toLowerCase();
444      var right = b[1].toLowerCase();
445      return (left > right) ? -1 : ((left < right) ? 1 : 0);
446    });
447
448    // combine all results
449    var results = unimportantResults.concat(regularResults)
450      .concat(objectResults).concat(importantResults);
451
452    // print the results
453    var resultCount = results.length;
454    function displayNextItem() {
455      // results left, load the summary and display it
456      if (results.length) {
457        var item = results.pop();
458        var listItem = $('<li style="display:none"></li>');
459        if (DOCUMENTATION_OPTIONS.FILE_SUFFIX == '') {
460          // dirhtml builder
461          var dirname = item[0] + '/';
462          if (dirname.match(/\/index\/$/)) {
463            dirname = dirname.substring(0, dirname.length-6);
464          } else if (dirname == 'index/') {
465            dirname = '';
466          }
467          listItem.append($('<a/>').attr('href',
468            DOCUMENTATION_OPTIONS.URL_ROOT + dirname +
469            highlightstring + item[2]).html(item[1]));
470        } else {
471          // normal html builders
472          listItem.append($('<a/>').attr('href',
473            item[0] + DOCUMENTATION_OPTIONS.FILE_SUFFIX +
474            highlightstring + item[2]).html(item[1]));
475        }
476        if (item[3]) {
477          listItem.append($('<span> (' + item[3] + ')</span>'));
478          Search.output.append(listItem);
479          listItem.slideDown(5, function() {
480            displayNextItem();
481          });
482        } else if (DOCUMENTATION_OPTIONS.HAS_SOURCE) {
483          $.get(DOCUMENTATION_OPTIONS.URL_ROOT + '_sources/' +
484                item[0] + '.txt', function(data) {
485            if (data != '') {
486              listItem.append($.makeSearchSummary(data, searchterms, hlterms));
487              Search.output.append(listItem);
488            }
489            listItem.slideDown(5, function() {
490              displayNextItem();
491            });
492          });
493        } else {
494          // no source available, just display title
495          Search.output.append(listItem);
496          listItem.slideDown(5, function() {
497            displayNextItem();
498          });
499        }
500      }
501      // search finished, update title and status message
502      else {
503        Search.stopPulse();
504        Search.title.text(_('Search Results'));
505        if (!resultCount)
506          Search.status.text(_('Your search did not match any documents. Please make sure that all words are spelled correctly and that you\'ve selected enough categories.'));
507        else
508            Search.status.text(_('Search finished, found %s page(s) matching the search query.').replace('%s', resultCount));
509        Search.status.fadeIn(500);
510      }
511    }
512    displayNextItem();
513  }
514}
515
516$(document).ready(function() {
517  Search.init();
518});