/src/Mobyle/SearchIndex.py

https://github.com/mobyle2-legacy/mobyle2.core · Python · 133 lines · 103 code · 8 blank · 22 comment · 12 complexity · 2af1a71b96a8cc282082259462485a4e MD5 · raw file

  1. #############################################################
  2. # #
  3. # Author: Herve Menager #
  4. # Organization:'Biological Software and Databases' Group, #
  5. # Institut Pasteur, Paris. #
  6. # Distributed under GPLv2 Licence. Please refer to the #
  7. # COPYING.LIB document. #
  8. # #
  9. #############################################################
  10. """
  11. Mobyle.Index
  12. This module manages the list of available programs to:
  13. - search them (using search fields)
  14. - classify them (building the categories tree)
  15. - cache this information (on disk as a JSON file)
  16. """
  17. from Mobyle.Registry import *
  18. import re
  19. from logging import getLogger
  20. r_log = getLogger(__name__)
  21. from Mobyle import IndexBase
  22. queries = {
  23. 'head': '/*/head',
  24. 'package': 'package',
  25. 'name': 'name//text()',
  26. 'title': 'doc/title/text()',
  27. 'description': 'doc/description/text/text()',
  28. 'categories': 'category/text()',
  29. 'comment': 'doc/comment/text/text()',
  30. 'authors': 'doc/authors/text()',
  31. 'references': 'doc/reference/text()',
  32. 'parameter': './/parameter[name]',
  33. 'parameter_name': 'name/text()',
  34. 'parameter_prompt': 'prompt/text()',
  35. 'parameter_comment': 'comment/text/text()',
  36. 'parameter_type': 'type',
  37. 'parameter_biotype': 'biotype/text()',
  38. 'parameter_datatype': 'datatype',
  39. 'parameter_class': 'class/text()',
  40. 'paragraph': '//paragraph',
  41. 'paragraph_name': 'name/text()',
  42. 'paragraph_prompt': 'prompt/text()',
  43. 'paragraph_comment': 'comment/text/text()'
  44. }
  45. class SearchIndex(IndexBase.Index):
  46. indexFileName = 'search.dat'
  47. def filterRegistry(self, keywordList):
  48. keywordList = [re.escape(k) for k in keywordList] # escape special re characters...
  49. keywordsRe = re.compile('(%s)' % '|'.join(keywordList), re.I)
  50. servicesList = getattr( registry, self.type + 's')[:]
  51. for s in servicesList:
  52. s.searchMatches = []
  53. if self.index.has_key(s.url):
  54. for field, value in self.index[s.url].items():
  55. if isinstance(value, basestring):
  56. self._searchFieldString(field, value, keywordsRe, s)
  57. if isinstance(value, list):
  58. for valueItem in value:
  59. self._searchFieldString(field, valueItem, keywordsRe, s)
  60. if len(s.searchMatches) == 0:
  61. registry.pruneService(s)
  62. def _searchFieldString(self, fieldName, fieldValue, rx, program):
  63. if len(rx.findall(fieldValue))>0:
  64. program.searchMatches.append((fieldName,rx.sub('<b>\\1</b>',fieldValue)))
  65. @classmethod
  66. def getIndexEntry(cls, doc, index):
  67. """
  68. Return an search index entry value
  69. @return: the index entry: value
  70. @rtype: object
  71. """
  72. head = IndexBase._XPathQuery(doc, queries['head'], 'rawResult')[0]
  73. fields = {}
  74. fields['name'] = IndexBase._XPathQuery(head, queries['name'])
  75. fields['title'] = IndexBase._XPathQuery(head, queries['title'])
  76. fields['description'] = IndexBase._XPathQuery(head, queries['description'])
  77. fields['categories'] = IndexBase._XPathQuery(head, \
  78. queries['categories'], \
  79. 'valueList')
  80. fields['comment'] = IndexBase._XPathQuery(head, queries['comment'])
  81. fields['authors'] = IndexBase._XPathQuery(head, queries['authors'])
  82. fields['references'] = IndexBase._XPathQuery(head, queries['references'])
  83. package = IndexBase._XPathQuery(head, queries['package'], 'rawResult')
  84. if package:
  85. package = package[0]
  86. fields['package name'] = IndexBase._XPathQuery(package, queries['name'])
  87. fields['package title'] = IndexBase._XPathQuery(package, queries['title'])
  88. fields['package description'] = IndexBase._XPathQuery(package, queries['description'])
  89. fields['package categories'] = IndexBase._XPathQuery(package, \
  90. queries['categories'], \
  91. 'valueList')
  92. fields['package comment'] = IndexBase._XPathQuery(package, queries['comment'])
  93. fields['package authors'] = IndexBase._XPathQuery(package, queries['authors'])
  94. fields['package references'] = IndexBase._XPathQuery(package, queries['references'])
  95. fields['parameter name'] = []
  96. fields['parameter prompt'] = []
  97. fields['parameter comment'] = []
  98. fields['parameter bioTypes'] = []
  99. fields['parameter class'] = []
  100. pars = IndexBase._XPathQuery(doc, queries['parameter'], 'rawResult')
  101. for p in pars:
  102. fields['parameter name'].append(IndexBase._XPathQuery(p, queries['parameter_name']))
  103. fields['parameter prompt'].append(IndexBase._XPathQuery(p, queries['parameter_prompt']))
  104. fields['parameter comment'].append(IndexBase._XPathQuery(p, queries['parameter_comment']))
  105. parType = IndexBase._XPathQuery(p, \
  106. queries['parameter_type'],
  107. 'rawResult')[0]
  108. fields['parameter bioTypes'].append(IndexBase._XPathQuery(parType, \
  109. queries['parameter_biotype']))
  110. parDataType = IndexBase._XPathQuery(parType, \
  111. queries['parameter_datatype'],\
  112. 'rawResult')[0]
  113. fields['parameter class'].append(IndexBase._XPathQuery(parDataType, \
  114. queries['parameter_class']))
  115. fields['paragraphs'] = []
  116. pars = IndexBase._XPathQuery(doc, queries['paragraph'], 'rawResult')
  117. fields['paragraph name'] = []
  118. fields['paragraph prompt'] = []
  119. fields['paragraph comment'] = []
  120. for p in pars:
  121. fields['paragraph name'].append(IndexBase._XPathQuery(p, queries['paragraph_name']))
  122. fields['paragraph prompt'].append(IndexBase._XPathQuery(p, queries['paragraph_prompt']))
  123. fields['paragraph comment'].append(IndexBase._XPathQuery(p, queries['paragraph_comment']))
  124. return fields