/src/3rdparty/clucene/src/CLucene/search/PhraseScorer.cpp
https://bitbucket.org/ultra_iter/qt-vtl · C++ · 225 lines · 135 code · 43 blank · 47 comment · 40 complexity · 08053c48353c0a98ba4299a5152bd13b MD5 · raw file
- /*------------------------------------------------------------------------------
- * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
- *
- * Distributable under the terms of either the Apache License (Version 2.0) or
- * the GNU Lesser General Public License, as specified in the COPYING file.
- ------------------------------------------------------------------------------*/
- #include "CLucene/StdHeader.h"
- #include "PhraseScorer.h"
- #include "PhraseQueue.h"
- #include "PhrasePositions.h"
- #include "Scorer.h"
- #include "Similarity.h"
- CL_NS_USE(index)
- CL_NS_USE(util)
- CL_NS_DEF(search)
- PhraseScorer::PhraseScorer(Weight* weight, TermPositions** tps,
- int32_t* positions, Similarity* similarity, uint8_t* norms):
- Scorer(similarity)
- {
- //Func - Constructor
- //Pre - tps != NULL and is an array of TermPositions
- // tpsLength >= 0
- // n != NULL
- //Post - The instance has been created
- CND_PRECONDITION(tps != NULL,"tps is NULL");
-
- //norms are only used if phraseFreq returns more than 0.0
- //phraseFreq should only return more than 0.0 if norms != NULL
- //CND_PRECONDITION(n != NULL,"n is NULL");
- firstTime = true;
- more = true;
- this->norms = norms;
- this->weight = weight;
- this->value = weight->getValue();
- //reset internal pointers
- first = NULL;
- last = NULL;
- //use pq to build a sorted list of PhrasePositions
- int32_t i = 0;
- while(tps[i] != NULL){
- PhrasePositions *pp = _CLNEW PhrasePositions(tps[i], positions[i]);
- CND_CONDITION(pp != NULL,"Could not allocate memory for pp");
- //Store PhrasePos into the PhrasePos pq
- if (last != NULL) { // add next to end of list
- last->_next = pp;
- } else
- first = pp;
- last = pp;
- i++;
- }
- pq = _CLNEW PhraseQueue(i); //i==tps.length
- CND_CONDITION(pq != NULL,"Could not allocate memory for pq");
- }
- PhraseScorer::~PhraseScorer() {
- //Func - Destructor
- //Pre - true
- //Post - The instance has been destroyed
- //The PhraseQueue pq (which is a PriorityQueue) pq is actually empty at present, the elements
- //having been transferred by pqToList() to the linked list starting with
- //first. The nodes of that linked list are deleted by the destructor of
- //first, rather than the destructor of pq.
- _CLDELETE(first);
- _CLDELETE(pq);
- }
- bool PhraseScorer::next(){
- if (firstTime) {
- init();
- firstTime = false;
- } else if (more) {
- more = last->next(); // trigger further scanning
- }
- return doNext();
- }
- // next without initial increment
- bool PhraseScorer::doNext() {
- while (more) {
- while (more && first->doc < last->doc) { // find doc w/ all the terms
- more = first->skipTo(last->doc); // skip first upto last
- firstToLast(); // and move it to the end
- }
- if (more) {
- // found a doc with all of the terms
- freq = phraseFreq(); // check for phrase
- if (freq == 0.0f) // no match
- more = last->next(); // trigger further scanning
- else
- return true; // found a match
- }
- }
- return false; // no more matches
- }
- qreal PhraseScorer::score(){
- //System.out.println("scoring " + first.doc);
- qreal raw = getSimilarity()->tf(freq) * value; // raw score
- return raw * Similarity::decodeNorm(norms[first->doc]); // normalize
- }
- bool PhraseScorer::skipTo(int32_t target) {
- for (PhrasePositions* pp = first; more && pp != NULL; pp = pp->_next) {
- more = pp->skipTo(target);
- }
- if (more)
- sort(); // re-sort
- return doNext();
- }
- void PhraseScorer::init() {
- for (PhrasePositions* pp = first; more && pp != NULL; pp = pp->_next)
- more = pp->next();
- if(more)
- sort();
- }
-
- void PhraseScorer::sort() {
- pq->clear();
- for (PhrasePositions* pp = first; pp != NULL; pp = pp->_next)
- pq->put(pp);
- pqToList();
- }
- void PhraseScorer::pqToList(){
- //Func - Transfers the PhrasePositions from the PhraseQueue pq to
- // the PhrasePositions list with first as its first element
- //Pre - pq != NULL
- // first = NULL
- // last = NULL
- //Post - All PhrasePositions have been transfered to the list
- // of PhrasePositions of which the first element is pointed to by first
- // and the last element is pointed to by last
- CND_PRECONDITION(pq != NULL,"pq is NULL");
-
- last = first = NULL;
- PhrasePositions* PhrasePos = NULL;
- //As long pq is not empty
- while (pq->top() != NULL){
- //Pop a PhrasePositions instance
- PhrasePos = pq->pop();
- // add next to end of list
- if (last != NULL) {
- last->_next = PhrasePos;
- } else {
- first = PhrasePos;
- }
- //Let last point to the new last PhrasePositions instance just added
- last = PhrasePos;
- //Reset the next of last to NULL
- last->_next = NULL;
- }
- //Check to see that pq is empty now
- CND_CONDITION(pq->size()==0, "pq is not empty while it should be");
- }
- void PhraseScorer::firstToLast(){
- //Func - Moves first to the end of the list
- //Pre - first is NULL or points to an PhrasePositions Instance
- // last is NULL or points to an PhrasePositions Instance
- // first and last both are NULL or both are not NULL
- //Post - The first element has become the last element in the list
- CND_PRECONDITION(((first==NULL && last==NULL) ||(first !=NULL && last != NULL)),
- "Either first or last is NULL but not both");
- //Check if first and last are valid pointers
- if(first && last){
- last->_next = first;
- last = first;
- first = first->_next;
- last->_next = NULL;
- }
- }
- void PhraseScorer::explain(int32_t _doc, Explanation* tfExplanation) {
- while (next() && doc() < _doc){
- }
- qreal phraseFreq = (doc() == _doc) ? freq : 0.0f;
- tfExplanation->setValue(getSimilarity()->tf(phraseFreq));
- StringBuffer buf;
- buf.append(_T("tf(phraseFreq="));
- buf.appendFloat(phraseFreq,2);
- buf.append(_T(")"));
- tfExplanation->setDescription(buf.getBuffer());
- }
- TCHAR* PhraseScorer::toString() {
- StringBuffer buf;
- buf.append(_T("scorer("));
- TCHAR* tmp = weight->toString();
- buf.append(tmp);
- _CLDELETE_CARRAY(tmp);
- buf.append(_T(")"));
- return buf.toString();
- }
- CL_NS_END