/src/3rdparty/clucene/src/CLucene/search/PhraseScorer.cpp

https://bitbucket.org/ultra_iter/qt-vtl · C++ · 225 lines · 135 code · 43 blank · 47 comment · 40 complexity · 08053c48353c0a98ba4299a5152bd13b MD5 · raw file

  1. /*------------------------------------------------------------------------------
  2. * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
  3. *
  4. * Distributable under the terms of either the Apache License (Version 2.0) or
  5. * the GNU Lesser General Public License, as specified in the COPYING file.
  6. ------------------------------------------------------------------------------*/
  7. #include "CLucene/StdHeader.h"
  8. #include "PhraseScorer.h"
  9. #include "PhraseQueue.h"
  10. #include "PhrasePositions.h"
  11. #include "Scorer.h"
  12. #include "Similarity.h"
  13. CL_NS_USE(index)
  14. CL_NS_USE(util)
  15. CL_NS_DEF(search)
  16. PhraseScorer::PhraseScorer(Weight* weight, TermPositions** tps,
  17. int32_t* positions, Similarity* similarity, uint8_t* norms):
  18. Scorer(similarity)
  19. {
  20. //Func - Constructor
  21. //Pre - tps != NULL and is an array of TermPositions
  22. // tpsLength >= 0
  23. // n != NULL
  24. //Post - The instance has been created
  25. CND_PRECONDITION(tps != NULL,"tps is NULL");
  26. //norms are only used if phraseFreq returns more than 0.0
  27. //phraseFreq should only return more than 0.0 if norms != NULL
  28. //CND_PRECONDITION(n != NULL,"n is NULL");
  29. firstTime = true;
  30. more = true;
  31. this->norms = norms;
  32. this->weight = weight;
  33. this->value = weight->getValue();
  34. //reset internal pointers
  35. first = NULL;
  36. last = NULL;
  37. //use pq to build a sorted list of PhrasePositions
  38. int32_t i = 0;
  39. while(tps[i] != NULL){
  40. PhrasePositions *pp = _CLNEW PhrasePositions(tps[i], positions[i]);
  41. CND_CONDITION(pp != NULL,"Could not allocate memory for pp");
  42. //Store PhrasePos into the PhrasePos pq
  43. if (last != NULL) { // add next to end of list
  44. last->_next = pp;
  45. } else
  46. first = pp;
  47. last = pp;
  48. i++;
  49. }
  50. pq = _CLNEW PhraseQueue(i); //i==tps.length
  51. CND_CONDITION(pq != NULL,"Could not allocate memory for pq");
  52. }
  53. PhraseScorer::~PhraseScorer() {
  54. //Func - Destructor
  55. //Pre - true
  56. //Post - The instance has been destroyed
  57. //The PhraseQueue pq (which is a PriorityQueue) pq is actually empty at present, the elements
  58. //having been transferred by pqToList() to the linked list starting with
  59. //first. The nodes of that linked list are deleted by the destructor of
  60. //first, rather than the destructor of pq.
  61. _CLDELETE(first);
  62. _CLDELETE(pq);
  63. }
  64. bool PhraseScorer::next(){
  65. if (firstTime) {
  66. init();
  67. firstTime = false;
  68. } else if (more) {
  69. more = last->next(); // trigger further scanning
  70. }
  71. return doNext();
  72. }
  73. // next without initial increment
  74. bool PhraseScorer::doNext() {
  75. while (more) {
  76. while (more && first->doc < last->doc) { // find doc w/ all the terms
  77. more = first->skipTo(last->doc); // skip first upto last
  78. firstToLast(); // and move it to the end
  79. }
  80. if (more) {
  81. // found a doc with all of the terms
  82. freq = phraseFreq(); // check for phrase
  83. if (freq == 0.0f) // no match
  84. more = last->next(); // trigger further scanning
  85. else
  86. return true; // found a match
  87. }
  88. }
  89. return false; // no more matches
  90. }
  91. qreal PhraseScorer::score(){
  92. //System.out.println("scoring " + first.doc);
  93. qreal raw = getSimilarity()->tf(freq) * value; // raw score
  94. return raw * Similarity::decodeNorm(norms[first->doc]); // normalize
  95. }
  96. bool PhraseScorer::skipTo(int32_t target) {
  97. for (PhrasePositions* pp = first; more && pp != NULL; pp = pp->_next) {
  98. more = pp->skipTo(target);
  99. }
  100. if (more)
  101. sort(); // re-sort
  102. return doNext();
  103. }
  104. void PhraseScorer::init() {
  105. for (PhrasePositions* pp = first; more && pp != NULL; pp = pp->_next)
  106. more = pp->next();
  107. if(more)
  108. sort();
  109. }
  110. void PhraseScorer::sort() {
  111. pq->clear();
  112. for (PhrasePositions* pp = first; pp != NULL; pp = pp->_next)
  113. pq->put(pp);
  114. pqToList();
  115. }
  116. void PhraseScorer::pqToList(){
  117. //Func - Transfers the PhrasePositions from the PhraseQueue pq to
  118. // the PhrasePositions list with first as its first element
  119. //Pre - pq != NULL
  120. // first = NULL
  121. // last = NULL
  122. //Post - All PhrasePositions have been transfered to the list
  123. // of PhrasePositions of which the first element is pointed to by first
  124. // and the last element is pointed to by last
  125. CND_PRECONDITION(pq != NULL,"pq is NULL");
  126. last = first = NULL;
  127. PhrasePositions* PhrasePos = NULL;
  128. //As long pq is not empty
  129. while (pq->top() != NULL){
  130. //Pop a PhrasePositions instance
  131. PhrasePos = pq->pop();
  132. // add next to end of list
  133. if (last != NULL) {
  134. last->_next = PhrasePos;
  135. } else {
  136. first = PhrasePos;
  137. }
  138. //Let last point to the new last PhrasePositions instance just added
  139. last = PhrasePos;
  140. //Reset the next of last to NULL
  141. last->_next = NULL;
  142. }
  143. //Check to see that pq is empty now
  144. CND_CONDITION(pq->size()==0, "pq is not empty while it should be");
  145. }
  146. void PhraseScorer::firstToLast(){
  147. //Func - Moves first to the end of the list
  148. //Pre - first is NULL or points to an PhrasePositions Instance
  149. // last is NULL or points to an PhrasePositions Instance
  150. // first and last both are NULL or both are not NULL
  151. //Post - The first element has become the last element in the list
  152. CND_PRECONDITION(((first==NULL && last==NULL) ||(first !=NULL && last != NULL)),
  153. "Either first or last is NULL but not both");
  154. //Check if first and last are valid pointers
  155. if(first && last){
  156. last->_next = first;
  157. last = first;
  158. first = first->_next;
  159. last->_next = NULL;
  160. }
  161. }
  162. void PhraseScorer::explain(int32_t _doc, Explanation* tfExplanation) {
  163. while (next() && doc() < _doc){
  164. }
  165. qreal phraseFreq = (doc() == _doc) ? freq : 0.0f;
  166. tfExplanation->setValue(getSimilarity()->tf(phraseFreq));
  167. StringBuffer buf;
  168. buf.append(_T("tf(phraseFreq="));
  169. buf.appendFloat(phraseFreq,2);
  170. buf.append(_T(")"));
  171. tfExplanation->setDescription(buf.getBuffer());
  172. }
  173. TCHAR* PhraseScorer::toString() {
  174. StringBuffer buf;
  175. buf.append(_T("scorer("));
  176. TCHAR* tmp = weight->toString();
  177. buf.append(tmp);
  178. _CLDELETE_CARRAY(tmp);
  179. buf.append(_T(")"));
  180. return buf.toString();
  181. }
  182. CL_NS_END