/src/libtomahawk/database/fuzzyindex.cpp

http://github.com/tomahawk-player/tomahawk · C++ · 355 lines · 265 code · 70 blank · 20 comment · 17 complexity · 08917fc3370de98328eced8a8acb8f4c MD5 · raw file

  1. /* === This file is part of Tomahawk Player - <http://tomahawk-player.org> ===
  2. *
  3. * Copyright 2010-2011, Christian Muehlhaeuser <muesli@tomahawk-player.org>
  4. *
  5. * Tomahawk is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * Tomahawk is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with Tomahawk. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. #include "FuzzyIndex.h"
  19. #include <QDir>
  20. #include <QTime>
  21. #include <CLucene.h>
  22. #include <CLucene/queryParser/MultiFieldQueryParser.h>
  23. #include "DatabaseCommand_UpdateSearchIndex.h"
  24. #include "DatabaseImpl.h"
  25. #include "Database.h"
  26. #include "utils/TomahawkUtils.h"
  27. #include "utils/Logger.h"
  28. #include "Source.h"
  29. using namespace lucene::analysis;
  30. using namespace lucene::analysis::standard;
  31. using namespace lucene::document;
  32. using namespace lucene::store;
  33. using namespace lucene::index;
  34. using namespace lucene::queryParser;
  35. using namespace lucene::search;
  36. FuzzyIndex::FuzzyIndex( QObject* parent, bool wipe )
  37. : QObject( parent )
  38. , m_luceneReader( 0 )
  39. , m_luceneSearcher( 0 )
  40. {
  41. QString m_lucenePath = TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" );
  42. QByteArray path = m_lucenePath.toUtf8();
  43. const char* cPath = path.constData();
  44. bool failed = false;
  45. tDebug() << "Opening Lucene directory:" << path;
  46. try
  47. {
  48. m_analyzer = _CLNEW SimpleAnalyzer();
  49. m_luceneDir = FSDirectory::getDirectory( cPath );
  50. }
  51. catch ( CLuceneError& error )
  52. {
  53. tDebug() << "Caught CLucene error:" << error.what();
  54. failed = true;
  55. }
  56. if ( failed )
  57. {
  58. tDebug() << "Initializing RAM directory instead.";
  59. m_luceneDir = _CLNEW RAMDirectory();
  60. wipe = true;
  61. }
  62. if ( wipe )
  63. wipeIndex();
  64. }
  65. FuzzyIndex::~FuzzyIndex()
  66. {
  67. delete m_luceneSearcher;
  68. delete m_luceneReader;
  69. delete m_analyzer;
  70. delete m_luceneDir;
  71. }
  72. bool
  73. FuzzyIndex::wipeIndex()
  74. {
  75. tLog( LOGVERBOSE ) << "Wiping fuzzy index...";
  76. beginIndexing();
  77. endIndexing();
  78. QTimer::singleShot( 0, this, SLOT( updateIndex() ) );
  79. return true; // FIXME
  80. }
  81. void
  82. FuzzyIndex::updateIndex()
  83. {
  84. DatabaseCommand* cmd = new DatabaseCommand_UpdateSearchIndex();
  85. Database::instance()->enqueue( QSharedPointer<DatabaseCommand>( cmd ) );
  86. }
  87. void
  88. FuzzyIndex::beginIndexing()
  89. {
  90. m_mutex.lock();
  91. try
  92. {
  93. qDebug() << Q_FUNC_INFO << "Starting indexing.";
  94. if ( m_luceneReader != 0 )
  95. {
  96. qDebug() << "Deleting old lucene stuff.";
  97. m_luceneSearcher->close();
  98. m_luceneReader->close();
  99. delete m_luceneSearcher;
  100. delete m_luceneReader;
  101. m_luceneSearcher = 0;
  102. m_luceneReader = 0;
  103. }
  104. qDebug() << "Creating new index writer.";
  105. IndexWriter luceneWriter( m_luceneDir, m_analyzer, true );
  106. }
  107. catch( CLuceneError& error )
  108. {
  109. tDebug() << "Caught CLucene error:" << error.what();
  110. Q_ASSERT( false );
  111. }
  112. }
  113. void
  114. FuzzyIndex::endIndexing()
  115. {
  116. m_mutex.unlock();
  117. emit indexReady();
  118. }
  119. void
  120. FuzzyIndex::appendFields( const QMap< unsigned int, QMap< QString, QString > >& trackData )
  121. {
  122. try
  123. {
  124. tDebug() << "Appending to index:" << trackData.count();
  125. bool create = !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() );
  126. IndexWriter luceneWriter( m_luceneDir, m_analyzer, create );
  127. Document doc;
  128. QMapIterator< unsigned int, QMap< QString, QString > > it( trackData );
  129. while ( it.hasNext() )
  130. {
  131. it.next();
  132. unsigned int id = it.key();
  133. QMap< QString, QString > values = it.value();
  134. if ( values.contains( "track" ) )
  135. {
  136. doc.add( *( _CLNEW Field( _T( "fulltext" ), DatabaseImpl::sortname( QString( "%1 %2" ).arg( values.value( "artist" ) ).arg( values.value( "track" ) ) ).toStdWString().c_str(),
  137. Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) );
  138. doc.add( *( _CLNEW Field( _T( "track" ), DatabaseImpl::sortname( values.value( "track" ) ).toStdWString().c_str(),
  139. Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) );
  140. doc.add( *( _CLNEW Field( _T( "artist" ), DatabaseImpl::sortname( values.value( "artist" ) ).toStdWString().c_str(),
  141. Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) );
  142. doc.add( *( _CLNEW Field( _T( "artistid" ), values.value( "artistid" ).toStdWString().c_str(),
  143. Field::STORE_YES | Field::INDEX_NO ) ) );
  144. doc.add( *( _CLNEW Field( _T( "trackid" ), QString::number( id ).toStdWString().c_str(),
  145. Field::STORE_YES | Field::INDEX_NO ) ) );
  146. }
  147. else if ( values.contains( "album" ) )
  148. {
  149. doc.add( *( _CLNEW Field( _T( "album" ), DatabaseImpl::sortname( values.value( "album" ) ).toStdWString().c_str(),
  150. Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) );
  151. doc.add( *( _CLNEW Field( _T( "albumid" ), QString::number( id ).toStdWString().c_str(),
  152. Field::STORE_YES | Field::INDEX_NO ) ) );
  153. }
  154. else
  155. Q_ASSERT( false );
  156. luceneWriter.addDocument( &doc );
  157. doc.clear();
  158. }
  159. luceneWriter.optimize();
  160. luceneWriter.close();
  161. }
  162. catch( CLuceneError& error )
  163. {
  164. tDebug() << "Caught CLucene error:" << error.what();
  165. QTimer::singleShot( 0, this, SLOT( wipeIndex() ) );
  166. }
  167. }
  168. void
  169. FuzzyIndex::loadLuceneIndex()
  170. {
  171. emit indexReady();
  172. }
  173. QMap< int, float >
  174. FuzzyIndex::search( const Tomahawk::query_ptr& query )
  175. {
  176. QMutexLocker lock( &m_mutex );
  177. QMap< int, float > resultsmap;
  178. try
  179. {
  180. if ( !m_luceneReader )
  181. {
  182. if ( !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ) )
  183. {
  184. qDebug() << Q_FUNC_INFO << "index didn't exist.";
  185. return resultsmap;
  186. }
  187. m_luceneReader = IndexReader::open( m_luceneDir );
  188. m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
  189. }
  190. float minScore;
  191. const TCHAR** fields = 0;
  192. MultiFieldQueryParser parser( fields, m_analyzer );
  193. BooleanQuery* qry = _CLNEW BooleanQuery();
  194. if ( query->isFullTextQuery() )
  195. {
  196. QString escapedQuery = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) );
  197. Term* term = _CLNEW Term( _T( "track" ), escapedQuery.toStdWString().c_str() );
  198. Query* fqry = _CLNEW FuzzyQuery( term );
  199. qry->add( fqry, true, BooleanClause::SHOULD );
  200. term = _CLNEW Term( _T( "artist" ), escapedQuery.toStdWString().c_str() );
  201. fqry = _CLNEW FuzzyQuery( term );
  202. qry->add( fqry, true, BooleanClause::SHOULD );
  203. term = _CLNEW Term( _T( "fulltext" ), escapedQuery.toStdWString().c_str() );
  204. fqry = _CLNEW FuzzyQuery( term );
  205. qry->add( fqry, true, BooleanClause::SHOULD );
  206. minScore = 0.00;
  207. }
  208. else
  209. {
  210. QString track = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->track() ).toStdWString().c_str() ) );
  211. QString artist = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->artist() ).toStdWString().c_str() ) );
  212. // QString album = QString::fromWCharArray( parser.escape( query->album().toStdWString().c_str() ) );
  213. Term* term = _CLNEW Term( _T( "track" ), track.toStdWString().c_str() );
  214. Query* fqry = _CLNEW FuzzyQuery( term );
  215. qry->add( fqry, true, BooleanClause::MUST );
  216. term = _CLNEW Term( _T( "artist" ), artist.toStdWString().c_str() );
  217. fqry = _CLNEW FuzzyQuery( term );
  218. qry->add( fqry, true, BooleanClause::MUST );
  219. minScore = 0.00;
  220. }
  221. Hits* hits = m_luceneSearcher->search( qry );
  222. for ( uint i = 0; i < hits->length(); i++ )
  223. {
  224. Document* d = &hits->doc( i );
  225. float score = hits->score( i );
  226. int id = QString::fromWCharArray( d->get( _T( "trackid" ) ) ).toInt();
  227. if ( score > minScore )
  228. {
  229. resultsmap.insert( id, score );
  230. // tDebug() << "Index hit:" << id << score << QString::fromWCharArray( ((Query*)qry)->toString() );
  231. }
  232. }
  233. delete hits;
  234. delete qry;
  235. }
  236. catch( CLuceneError& error )
  237. {
  238. tDebug() << "Caught CLucene error:" << error.what() << query->toString();
  239. QTimer::singleShot( 0, this, SLOT( wipeIndex() ) );
  240. }
  241. return resultsmap;
  242. }
  243. QMap< int, float >
  244. FuzzyIndex::searchAlbum( const Tomahawk::query_ptr& query )
  245. {
  246. Q_ASSERT( query->isFullTextQuery() );
  247. QMutexLocker lock( &m_mutex );
  248. QMap< int, float > resultsmap;
  249. try
  250. {
  251. if ( !m_luceneReader )
  252. {
  253. if ( !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ) )
  254. {
  255. qDebug() << Q_FUNC_INFO << "index didn't exist.";
  256. return resultsmap;
  257. }
  258. m_luceneReader = IndexReader::open( m_luceneDir );
  259. m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
  260. }
  261. QueryParser parser( _T( "album" ), m_analyzer );
  262. QString escapedName = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) );
  263. Query* qry = _CLNEW FuzzyQuery( _CLNEW Term( _T( "album" ), escapedName.toStdWString().c_str() ) );
  264. Hits* hits = m_luceneSearcher->search( qry );
  265. for ( uint i = 0; i < hits->length(); i++ )
  266. {
  267. Document* d = &hits->doc( i );
  268. float score = hits->score( i );
  269. int id = QString::fromWCharArray( d->get( _T( "albumid" ) ) ).toInt();
  270. if ( score > 0.30 )
  271. {
  272. resultsmap.insert( id, score );
  273. // tDebug() << "Index hit:" << id << score;
  274. }
  275. }
  276. delete hits;
  277. delete qry;
  278. }
  279. catch( CLuceneError& error )
  280. {
  281. tDebug() << "Caught CLucene error:" << error.what();
  282. QTimer::singleShot( 0, this, SLOT( wipeIndex() ) );
  283. }
  284. return resultsmap;
  285. }