PageRenderTime 12ms CodeModel.GetById 2ms app.highlight 8ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/tools/search/__init__.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 42 lines | 38 code | 2 blank | 2 comment | 0 complexity | c18583ced03cfc992b6771042145c883 MD5 | raw file
 1from galaxy.eggs import require
 2from galaxy.web.framework.helpers import to_unicode
 3require( "Whoosh" )
 4
 5from whoosh.filedb.filestore import RamStorage
 6from whoosh.fields import Schema, STORED, ID, KEYWORD, TEXT
 7from whoosh.index import Index
 8from whoosh.scoring import BM25F
 9from whoosh.qparser import MultifieldParser
10schema = Schema( id = STORED, title = TEXT, description = TEXT, help = TEXT )
11
12class ToolBoxSearch( object ):
13    """
14    Support searching tools in a toolbox. This implementation uses
15    the "whoosh" search library.
16    """
17
18    def __init__( self, toolbox ):
19        """
20        Create a searcher for `toolbox`.
21        """
22        self.toolbox = toolbox
23        self.build_index()
24
25    def build_index( self ):
26        self.storage = RamStorage()
27        self.index = self.storage.create_index( schema )
28        writer = self.index.writer()
29        ## TODO: would also be nice to search section headers.
30        for id, tool in self.toolbox.tools_by_id.iteritems():
31            writer.add_document( id=id, title=to_unicode(tool.name), description=to_unicode(tool.description), help=to_unicode(tool.help) )
32        writer.commit()
33
34    def search( self, query, return_attribute='id' ):
35        # Change field boosts for searcher to place more weight on title, description than help.
36        searcher = self.index.searcher( \
37                        weighting=BM25F( field_B={ 'title_B' : 3, 'description_B' : 2, 'help_B' : 1 } \
38                                    ) )
39        # Set query to search title, description, and help.
40        parser = MultifieldParser( [ 'title', 'description', 'help' ], schema = schema )
41        results = searcher.search( parser.parse( query ) )
42        return [ result[ return_attribute ] for result in results ]