/branches/branch-3x/src/xml/DefaultSimilarity.js

http://luke.googlecode.com/ · JavaScript · 61 lines · 32 code · 11 blank · 18 comment · 1 complexity · cd15c80920cb6ff9aad62a08a4ea4ca0 MD5 · raw file

  1. // This is an implementation of DefaultSimilarity
  2. // in JavaScript.
  3. //
  4. // NOTE: Since JavaScript is a weakly-typed language, some
  5. // overloaded methods have been renamed to avoid ambiguity.
  6. // You need to keep these changed names as they are, because
  7. // the plugin depends on them. Other than that you are free
  8. // to change anything else.
  9. //--- ABSTRACT METHODS ---
  10. // You HAVE TO implement these
  11. function coord(overlap, maxOverlap) {
  12. return overlap / (1.0 * maxOverlap);
  13. }
  14. function idf(docFreq, numDocs) {
  15. return (Math.log(numDocs/(docFreq+1)) + 1.0);
  16. }
  17. function lengthNorm(fieldName, numTerms) {
  18. return (1.0 / Math.sqrt(numTerms));
  19. }
  20. function queryNorm(sumOfSquaredWeights) {
  21. return (1.0 / Math.sqrt(sumOfSquaredWeights));
  22. }
  23. function sloppyFreq(distance) {
  24. return 1.0 / (distance + 1);
  25. }
  26. function tf(freq) {
  27. return Math.sqrt(freq);
  28. }
  29. //--- PUBLIC METHODS ---
  30. // You may choose to override these. If they are not overridden, the
  31. // plugin will use DefaultSimilarity implementation, which is equivalent
  32. // to the code reproduced below.
  33. // RENAMED: float idf(Collection terms, Searcher searcher)
  34. function idf_cs(terms, searcher) {
  35. var idf = 0.0;
  36. var i = terms.iterator();
  37. while (i.hasNext()) {
  38. // NOTE: we use a renamed method, due to ambiguity in overloading
  39. idf += idf_ts(i.next(), searcher);
  40. }
  41. return idf;
  42. }
  43. // RENAMED: float idf(Term term, Searcher searcher)
  44. function idf_ts(term, searcher) {
  45. return idf(searcher.docFreq(term), searcher.maxDoc());
  46. }
  47. // RENAMED: float tf(int freq)
  48. function tf_i(freq) {
  49. return tf(freq);
  50. }