/src/3rdparty/clucene/src/CLucene/search/TermScorer.cpp

https://bitbucket.org/ultra_iter/qt-vtl · C++ · 120 lines · 98 code · 13 blank · 9 comment · 17 complexity · 9f412363a03af5b6ab6c5fa9fa808196 MD5 · raw file

  1. /*------------------------------------------------------------------------------
  2. * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
  3. *
  4. * Distributable under the terms of either the Apache License (Version 2.0) or
  5. * the GNU Lesser General Public License, as specified in the COPYING file.
  6. ------------------------------------------------------------------------------*/
  7. #include "CLucene/StdHeader.h"
  8. #include "TermScorer.h"
  9. #include "CLucene/index/Terms.h"
  10. #include "TermQuery.h"
  11. CL_NS_USE(index)
  12. CL_NS_DEF(search)
  13. //TermScorer takes TermDocs and delets it when TermScorer is cleaned up
  14. TermScorer::TermScorer(Weight* w, CL_NS(index)::TermDocs* td,
  15. Similarity* similarity,uint8_t* _norms):
  16. Scorer(similarity),
  17. termDocs(td),
  18. norms(_norms),
  19. weight(w),
  20. weightValue(w->getValue()),
  21. _doc(0),
  22. pointer(0),
  23. pointerMax(0)
  24. {
  25. memset(docs,0,32*sizeof(int32_t));
  26. memset(freqs,0,32*sizeof(int32_t));
  27. for (int32_t i = 0; i < LUCENE_SCORE_CACHE_SIZE; i++)
  28. scoreCache[i] = getSimilarity()->tf(i) * weightValue;
  29. }
  30. TermScorer::~TermScorer(){
  31. _CLDELETE(termDocs);
  32. }
  33. bool TermScorer::next(){
  34. pointer++;
  35. if (pointer >= pointerMax) {
  36. pointerMax = termDocs->read(docs, freqs, 32); // refill buffer
  37. if (pointerMax != 0) {
  38. pointer = 0;
  39. } else {
  40. termDocs->close(); // close stream
  41. _doc = LUCENE_INT32_MAX_SHOULDBE; // set to sentinel value
  42. return false;
  43. }
  44. }
  45. _doc = docs[pointer];
  46. return true;
  47. }
  48. bool TermScorer::skipTo(int32_t target) {
  49. // first scan in cache
  50. for (pointer++; pointer < pointerMax; pointer++) {
  51. if (docs[pointer] >= target) {
  52. _doc = docs[pointer];
  53. return true;
  54. }
  55. }
  56. // not found in cache, seek underlying stream
  57. bool result = termDocs->skipTo(target);
  58. if (result) {
  59. pointerMax = 1;
  60. pointer = 0;
  61. docs[pointer] = _doc = termDocs->doc();
  62. freqs[pointer] = termDocs->freq();
  63. } else {
  64. _doc = LUCENE_INT32_MAX_SHOULDBE;
  65. }
  66. return result;
  67. }
  68. void TermScorer::explain(int32_t doc, Explanation* tfExplanation) {
  69. TermQuery* query = (TermQuery*)weight->getQuery();
  70. int32_t tf = 0;
  71. while (pointer < pointerMax) {
  72. if (docs[pointer] == doc)
  73. tf = freqs[pointer];
  74. pointer++;
  75. }
  76. if (tf == 0) {
  77. while (termDocs->next()) {
  78. if (termDocs->doc() == doc) {
  79. tf = termDocs->freq();
  80. }
  81. }
  82. }
  83. termDocs->close();
  84. tfExplanation->setValue(getSimilarity()->tf(tf));
  85. TCHAR buf[LUCENE_SEARCH_EXPLANATION_DESC_LEN+1];
  86. TCHAR* termToString = query->getTerm(false)->toString();
  87. _sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN,_T("tf(termFreq(%s)=%d)"), termToString, tf);
  88. _CLDELETE_CARRAY(termToString);
  89. tfExplanation->setDescription(buf);
  90. }
  91. TCHAR* TermScorer::toString() {
  92. TCHAR* wb = weight->toString();
  93. int32_t rl = _tcslen(wb) + 9; //9=_tcslen("scorer(" ")") + 1
  94. TCHAR* ret = _CL_NEWARRAY(TCHAR,rl);
  95. _sntprintf(ret,rl,_T("scorer(%s)"), wb);
  96. _CLDELETE_ARRAY(wb);
  97. return ret;
  98. }
  99. qreal TermScorer::score(){
  100. int32_t f = freqs[pointer];
  101. qreal raw = // compute tf(f)*weight
  102. f < LUCENE_SCORE_CACHE_SIZE // check cache
  103. ? scoreCache[f] // cache hit
  104. : getSimilarity()->tf(f) * weightValue; // cache miss
  105. return raw * Similarity::decodeNorm(norms[_doc]); // normalize for field
  106. }
  107. CL_NS_END