PageRenderTime 62ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/tools/search/__init__.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 42 lines | 38 code | 2 blank | 2 comment | 0 complexity | c18583ced03cfc992b6771042145c883 MD5 | raw file
  1. from galaxy.eggs import require
  2. from galaxy.web.framework.helpers import to_unicode
  3. require( "Whoosh" )
  4. from whoosh.filedb.filestore import RamStorage
  5. from whoosh.fields import Schema, STORED, ID, KEYWORD, TEXT
  6. from whoosh.index import Index
  7. from whoosh.scoring import BM25F
  8. from whoosh.qparser import MultifieldParser
  9. schema = Schema( id = STORED, title = TEXT, description = TEXT, help = TEXT )
  10. class ToolBoxSearch( object ):
  11. """
  12. Support searching tools in a toolbox. This implementation uses
  13. the "whoosh" search library.
  14. """
  15. def __init__( self, toolbox ):
  16. """
  17. Create a searcher for `toolbox`.
  18. """
  19. self.toolbox = toolbox
  20. self.build_index()
  21. def build_index( self ):
  22. self.storage = RamStorage()
  23. self.index = self.storage.create_index( schema )
  24. writer = self.index.writer()
  25. ## TODO: would also be nice to search section headers.
  26. for id, tool in self.toolbox.tools_by_id.iteritems():
  27. writer.add_document( id=id, title=to_unicode(tool.name), description=to_unicode(tool.description), help=to_unicode(tool.help) )
  28. writer.commit()
  29. def search( self, query, return_attribute='id' ):
  30. # Change field boosts for searcher to place more weight on title, description than help.
  31. searcher = self.index.searcher( \
  32. weighting=BM25F( field_B={ 'title_B' : 3, 'description_B' : 2, 'help_B' : 1 } \
  33. ) )
  34. # Set query to search title, description, and help.
  35. parser = MultifieldParser( [ 'title', 'description', 'help' ], schema = schema )
  36. results = searcher.search( parser.parse( query ) )
  37. return [ result[ return_attribute ] for result in results ]