PageRenderTime 603ms CodeModel.GetById 90ms app.highlight 440ms RepoModel.GetById 55ms app.codeStats 1ms

/src/libtomahawk/database/fuzzyindex.cpp

http://github.com/tomahawk-player/tomahawk
C++ | 355 lines | 265 code | 70 blank | 20 comment | 17 complexity | 08917fc3370de98328eced8a8acb8f4c MD5 | raw file
  1/* === This file is part of Tomahawk Player - <http://tomahawk-player.org> ===
  2 *
  3 *   Copyright 2010-2011, Christian Muehlhaeuser <muesli@tomahawk-player.org>
  4 *
  5 *   Tomahawk is free software: you can redistribute it and/or modify
  6 *   it under the terms of the GNU General Public License as published by
  7 *   the Free Software Foundation, either version 3 of the License, or
  8 *   (at your option) any later version.
  9 *
 10 *   Tomahawk is distributed in the hope that it will be useful,
 11 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 13 *   GNU General Public License for more details.
 14 *
 15 *   You should have received a copy of the GNU General Public License
 16 *   along with Tomahawk. If not, see <http://www.gnu.org/licenses/>.
 17 */
 18
 19#include "FuzzyIndex.h"
 20
 21#include <QDir>
 22#include <QTime>
 23
 24#include <CLucene.h>
 25#include <CLucene/queryParser/MultiFieldQueryParser.h>
 26
 27#include "DatabaseCommand_UpdateSearchIndex.h"
 28#include "DatabaseImpl.h"
 29#include "Database.h"
 30#include "utils/TomahawkUtils.h"
 31#include "utils/Logger.h"
 32#include "Source.h"
 33
 34using namespace lucene::analysis;
 35using namespace lucene::analysis::standard;
 36using namespace lucene::document;
 37using namespace lucene::store;
 38using namespace lucene::index;
 39using namespace lucene::queryParser;
 40using namespace lucene::search;
 41
 42
 43FuzzyIndex::FuzzyIndex( QObject* parent, bool wipe )
 44    : QObject( parent )
 45    , m_luceneReader( 0 )
 46    , m_luceneSearcher( 0 )
 47{
 48    QString m_lucenePath = TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" );
 49    QByteArray path = m_lucenePath.toUtf8();
 50    const char* cPath = path.constData();
 51
 52    bool failed = false;
 53    tDebug() << "Opening Lucene directory:" << path;
 54    try
 55    {
 56        m_analyzer = _CLNEW SimpleAnalyzer();
 57        m_luceneDir = FSDirectory::getDirectory( cPath );
 58    }
 59    catch ( CLuceneError& error )
 60    {
 61        tDebug() << "Caught CLucene error:" << error.what();
 62        failed = true;
 63    }
 64
 65    if ( failed )
 66    {
 67        tDebug() << "Initializing RAM directory instead.";
 68
 69        m_luceneDir = _CLNEW RAMDirectory();
 70        wipe = true;
 71    }
 72
 73    if ( wipe )
 74        wipeIndex();
 75}
 76
 77
 78FuzzyIndex::~FuzzyIndex()
 79{
 80    delete m_luceneSearcher;
 81    delete m_luceneReader;
 82    delete m_analyzer;
 83    delete m_luceneDir;
 84}
 85
 86
 87bool
 88FuzzyIndex::wipeIndex()
 89{
 90    tLog( LOGVERBOSE ) << "Wiping fuzzy index...";
 91    beginIndexing();
 92    endIndexing();
 93
 94    QTimer::singleShot( 0, this, SLOT( updateIndex() ) );
 95
 96    return true; // FIXME
 97}
 98
 99
100void
101FuzzyIndex::updateIndex()
102{
103    DatabaseCommand* cmd = new DatabaseCommand_UpdateSearchIndex();
104    Database::instance()->enqueue( QSharedPointer<DatabaseCommand>( cmd ) );
105}
106
107
108void
109FuzzyIndex::beginIndexing()
110{
111    m_mutex.lock();
112
113    try
114    {
115        qDebug() << Q_FUNC_INFO << "Starting indexing.";
116        if ( m_luceneReader != 0 )
117        {
118            qDebug() << "Deleting old lucene stuff.";
119            m_luceneSearcher->close();
120            m_luceneReader->close();
121            delete m_luceneSearcher;
122            delete m_luceneReader;
123            m_luceneSearcher = 0;
124            m_luceneReader = 0;
125        }
126
127        qDebug() << "Creating new index writer.";
128        IndexWriter luceneWriter( m_luceneDir, m_analyzer, true );
129    }
130    catch( CLuceneError& error )
131    {
132        tDebug() << "Caught CLucene error:" << error.what();
133        Q_ASSERT( false );
134    }
135}
136
137
138void
139FuzzyIndex::endIndexing()
140{
141    m_mutex.unlock();
142    emit indexReady();
143}
144
145
146void
147FuzzyIndex::appendFields( const QMap< unsigned int, QMap< QString, QString > >& trackData )
148{
149    try
150    {
151        tDebug() << "Appending to index:" << trackData.count();
152        bool create = !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() );
153        IndexWriter luceneWriter( m_luceneDir, m_analyzer, create );
154        Document doc;
155
156        QMapIterator< unsigned int, QMap< QString, QString > > it( trackData );
157        while ( it.hasNext() )
158        {
159            it.next();
160            unsigned int id = it.key();
161            QMap< QString, QString > values = it.value();
162
163            if ( values.contains( "track" ) )
164            {
165                doc.add( *( _CLNEW Field( _T( "fulltext" ), DatabaseImpl::sortname( QString( "%1 %2" ).arg( values.value( "artist" ) ).arg( values.value( "track" ) ) ).toStdWString().c_str(),
166                                          Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) );
167
168                doc.add( *( _CLNEW Field( _T( "track" ), DatabaseImpl::sortname( values.value( "track" ) ).toStdWString().c_str(),
169                                          Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) );
170
171                doc.add( *( _CLNEW Field( _T( "artist" ), DatabaseImpl::sortname( values.value( "artist" ) ).toStdWString().c_str(),
172                                          Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) );
173
174                doc.add( *( _CLNEW Field( _T( "artistid" ), values.value( "artistid" ).toStdWString().c_str(),
175                                          Field::STORE_YES | Field::INDEX_NO ) ) );
176
177                doc.add( *( _CLNEW Field( _T( "trackid" ), QString::number( id ).toStdWString().c_str(),
178                                          Field::STORE_YES | Field::INDEX_NO ) ) );
179            }
180            else if ( values.contains( "album" ) )
181            {
182                doc.add( *( _CLNEW Field( _T( "album" ), DatabaseImpl::sortname( values.value( "album" ) ).toStdWString().c_str(),
183                                          Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) );
184
185                doc.add( *( _CLNEW Field( _T( "albumid" ), QString::number( id ).toStdWString().c_str(),
186                                          Field::STORE_YES | Field::INDEX_NO ) ) );
187            }
188            else
189                Q_ASSERT( false );
190
191            luceneWriter.addDocument( &doc );
192            doc.clear();
193        }
194
195        luceneWriter.optimize();
196        luceneWriter.close();
197    }
198    catch( CLuceneError& error )
199    {
200        tDebug() << "Caught CLucene error:" << error.what();
201
202        QTimer::singleShot( 0, this, SLOT( wipeIndex() ) );
203    }
204}
205
206
207void
208FuzzyIndex::loadLuceneIndex()
209{
210    emit indexReady();
211}
212
213
214QMap< int, float >
215FuzzyIndex::search( const Tomahawk::query_ptr& query )
216{
217    QMutexLocker lock( &m_mutex );
218
219    QMap< int, float > resultsmap;
220    try
221    {
222        if ( !m_luceneReader )
223        {
224            if ( !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ) )
225            {
226                qDebug() << Q_FUNC_INFO << "index didn't exist.";
227                return resultsmap;
228            }
229
230            m_luceneReader = IndexReader::open( m_luceneDir );
231            m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
232        }
233
234        float minScore;
235        const TCHAR** fields = 0;
236        MultiFieldQueryParser parser( fields, m_analyzer );
237        BooleanQuery* qry = _CLNEW BooleanQuery();
238
239        if ( query->isFullTextQuery() )
240        {
241            QString escapedQuery = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) );
242
243            Term* term = _CLNEW Term( _T( "track" ), escapedQuery.toStdWString().c_str() );
244            Query* fqry = _CLNEW FuzzyQuery( term );
245            qry->add( fqry, true, BooleanClause::SHOULD );
246
247            term = _CLNEW Term( _T( "artist" ), escapedQuery.toStdWString().c_str() );
248            fqry = _CLNEW FuzzyQuery( term );
249            qry->add( fqry, true, BooleanClause::SHOULD );
250
251            term = _CLNEW Term( _T( "fulltext" ), escapedQuery.toStdWString().c_str() );
252            fqry = _CLNEW FuzzyQuery( term );
253            qry->add( fqry, true, BooleanClause::SHOULD );
254
255            minScore = 0.00;
256        }
257        else
258        {
259            QString track = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->track() ).toStdWString().c_str() ) );
260            QString artist = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->artist() ).toStdWString().c_str() ) );
261//            QString album = QString::fromWCharArray( parser.escape( query->album().toStdWString().c_str() ) );
262
263            Term* term = _CLNEW Term( _T( "track" ), track.toStdWString().c_str() );
264            Query* fqry = _CLNEW FuzzyQuery( term );
265            qry->add( fqry, true, BooleanClause::MUST );
266
267            term = _CLNEW Term( _T( "artist" ), artist.toStdWString().c_str() );
268            fqry = _CLNEW FuzzyQuery( term );
269            qry->add( fqry, true, BooleanClause::MUST );
270
271            minScore = 0.00;
272        }
273
274        Hits* hits = m_luceneSearcher->search( qry );
275        for ( uint i = 0; i < hits->length(); i++ )
276        {
277            Document* d = &hits->doc( i );
278
279            float score = hits->score( i );
280            int id = QString::fromWCharArray( d->get( _T( "trackid" ) ) ).toInt();
281
282            if ( score > minScore )
283            {
284                resultsmap.insert( id, score );
285//                tDebug() << "Index hit:" << id << score << QString::fromWCharArray( ((Query*)qry)->toString() );
286            }
287        }
288
289        delete hits;
290        delete qry;
291    }
292    catch( CLuceneError& error )
293    {
294        tDebug() << "Caught CLucene error:" << error.what() << query->toString();
295
296        QTimer::singleShot( 0, this, SLOT( wipeIndex() ) );
297    }
298
299    return resultsmap;
300}
301
302
303QMap< int, float >
304FuzzyIndex::searchAlbum( const Tomahawk::query_ptr& query )
305{
306    Q_ASSERT( query->isFullTextQuery() );
307
308    QMutexLocker lock( &m_mutex );
309
310    QMap< int, float > resultsmap;
311    try
312    {
313        if ( !m_luceneReader )
314        {
315            if ( !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ) )
316            {
317                qDebug() << Q_FUNC_INFO << "index didn't exist.";
318                return resultsmap;
319            }
320
321            m_luceneReader = IndexReader::open( m_luceneDir );
322            m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
323        }
324
325        QueryParser parser( _T( "album" ), m_analyzer );
326        QString escapedName = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) );
327
328        Query* qry = _CLNEW FuzzyQuery( _CLNEW Term( _T( "album" ), escapedName.toStdWString().c_str() ) );
329        Hits* hits = m_luceneSearcher->search( qry );
330        for ( uint i = 0; i < hits->length(); i++ )
331        {
332            Document* d = &hits->doc( i );
333
334            float score = hits->score( i );
335            int id = QString::fromWCharArray( d->get( _T( "albumid" ) ) ).toInt();
336
337            if ( score > 0.30 )
338            {
339                resultsmap.insert( id, score );
340//                tDebug() << "Index hit:" << id << score;
341            }
342        }
343
344        delete hits;
345        delete qry;
346    }
347    catch( CLuceneError& error )
348    {
349        tDebug() << "Caught CLucene error:" << error.what();
350
351        QTimer::singleShot( 0, this, SLOT( wipeIndex() ) );
352    }
353
354    return resultsmap;
355}