/src/3rdparty/clucene/src/CLucene/search/TermScorer.cpp
https://bitbucket.org/ultra_iter/qt-vtl · C++ · 120 lines · 98 code · 13 blank · 9 comment · 17 complexity · 9f412363a03af5b6ab6c5fa9fa808196 MD5 · raw file
- /*------------------------------------------------------------------------------
- * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
- *
- * Distributable under the terms of either the Apache License (Version 2.0) or
- * the GNU Lesser General Public License, as specified in the COPYING file.
- ------------------------------------------------------------------------------*/
- #include "CLucene/StdHeader.h"
- #include "TermScorer.h"
- #include "CLucene/index/Terms.h"
- #include "TermQuery.h"
- CL_NS_USE(index)
- CL_NS_DEF(search)
- //TermScorer takes TermDocs and delets it when TermScorer is cleaned up
- TermScorer::TermScorer(Weight* w, CL_NS(index)::TermDocs* td,
- Similarity* similarity,uint8_t* _norms):
- Scorer(similarity),
- termDocs(td),
- norms(_norms),
- weight(w),
- weightValue(w->getValue()),
- _doc(0),
- pointer(0),
- pointerMax(0)
- {
- memset(docs,0,32*sizeof(int32_t));
- memset(freqs,0,32*sizeof(int32_t));
- for (int32_t i = 0; i < LUCENE_SCORE_CACHE_SIZE; i++)
- scoreCache[i] = getSimilarity()->tf(i) * weightValue;
- }
- TermScorer::~TermScorer(){
- _CLDELETE(termDocs);
- }
- bool TermScorer::next(){
- pointer++;
- if (pointer >= pointerMax) {
- pointerMax = termDocs->read(docs, freqs, 32); // refill buffer
- if (pointerMax != 0) {
- pointer = 0;
- } else {
- termDocs->close(); // close stream
- _doc = LUCENE_INT32_MAX_SHOULDBE; // set to sentinel value
- return false;
- }
- }
- _doc = docs[pointer];
- return true;
- }
- bool TermScorer::skipTo(int32_t target) {
- // first scan in cache
- for (pointer++; pointer < pointerMax; pointer++) {
- if (docs[pointer] >= target) {
- _doc = docs[pointer];
- return true;
- }
- }
- // not found in cache, seek underlying stream
- bool result = termDocs->skipTo(target);
- if (result) {
- pointerMax = 1;
- pointer = 0;
- docs[pointer] = _doc = termDocs->doc();
- freqs[pointer] = termDocs->freq();
- } else {
- _doc = LUCENE_INT32_MAX_SHOULDBE;
- }
- return result;
- }
- void TermScorer::explain(int32_t doc, Explanation* tfExplanation) {
- TermQuery* query = (TermQuery*)weight->getQuery();
- int32_t tf = 0;
- while (pointer < pointerMax) {
- if (docs[pointer] == doc)
- tf = freqs[pointer];
- pointer++;
- }
- if (tf == 0) {
- while (termDocs->next()) {
- if (termDocs->doc() == doc) {
- tf = termDocs->freq();
- }
- }
- }
- termDocs->close();
- tfExplanation->setValue(getSimilarity()->tf(tf));
- TCHAR buf[LUCENE_SEARCH_EXPLANATION_DESC_LEN+1];
- TCHAR* termToString = query->getTerm(false)->toString();
- _sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN,_T("tf(termFreq(%s)=%d)"), termToString, tf);
- _CLDELETE_CARRAY(termToString);
- tfExplanation->setDescription(buf);
- }
- TCHAR* TermScorer::toString() {
- TCHAR* wb = weight->toString();
- int32_t rl = _tcslen(wb) + 9; //9=_tcslen("scorer(" ")") + 1
- TCHAR* ret = _CL_NEWARRAY(TCHAR,rl);
- _sntprintf(ret,rl,_T("scorer(%s)"), wb);
- _CLDELETE_ARRAY(wb);
- return ret;
- }
- qreal TermScorer::score(){
- int32_t f = freqs[pointer];
- qreal raw = // compute tf(f)*weight
- f < LUCENE_SCORE_CACHE_SIZE // check cache
- ? scoreCache[f] // cache hit
- : getSimilarity()->tf(f) * weightValue; // cache miss
- return raw * Similarity::decodeNorm(norms[_doc]); // normalize for field
- }
-
- CL_NS_END