PageRenderTime 50ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/Source Code/Indexer/Searcher.vb

#
Visual Basic | 105 lines | 62 code | 25 blank | 18 comment | 0 complexity | 92eeb304f696c3549898a4d2f409a6d8 MD5 | raw file
  1. Public Class Searcher
  2. Private FIndex As Indexer = Nothing
  3. Sub New(ByVal index As Indexer)
  4. Me.FIndex = index
  5. End Sub
  6. Private Function SearchInternal(ByVal ParamArray terms() As String) As Dictionary(Of Integer, Score)
  7. ' Build a list of all the documents, each initially with a score of 0
  8. Dim Result As New Dictionary(Of Integer, Score)
  9. For Each DocumentNumber As Integer In FIndex.AllDocumentsNumbers
  10. Result.Add(DocumentNumber, New Score)
  11. Next
  12. ' Process all terms in the query
  13. Dim CompletedTerms As New List(Of String)
  14. For Each Term As String In terms
  15. Term = Term.ToUpper
  16. ' Process only unique terms
  17. If Not CompletedTerms.Contains(Term) Then
  18. Dim QueryWordFrequency As Integer = GetQueryTermFrequency(Term, terms)
  19. ' Calculate a score for the term and add it to the current score for the document
  20. Dim Documents As List(Of Integer) = FIndex.DocumentsContainingWord(Term)
  21. For Each DocumentIndex As Integer In Documents
  22. Dim TermFrequency As Double = FIndex.TermFrequency(DocumentIndex, Term)
  23. Dim LogNumber As Double = Math.Log(FIndex.DocumentCount / Documents.Count)
  24. Dim LogRuleScore As Double = TermFrequency * QueryWordFrequency * LogNumber
  25. Dim DocumentScore As Score = Result(DocumentIndex)
  26. DocumentScore.LogRuleScore += LogRuleScore
  27. DocumentScore.Match = True
  28. Next
  29. CompletedTerms.Add(Term)
  30. End If
  31. Next
  32. ' Return the result
  33. Return Result
  34. End Function
  35. ''' <summary>
  36. ''' Sorts the search results.
  37. ''' </summary>
  38. ''' <param name="results">The results (from Search() above) to sort.</param>
  39. ''' <returns>The list of document indices, ordered by score.</returns>
  40. Private Function GetSortedDocumentOrder(ByVal results As Dictionary(Of Integer, Score)) As List(Of Integer)
  41. ' Sort the scores
  42. Dim SortedScores As New List(Of Double)
  43. For Each Score As Score In results.Values
  44. If Not SortedScores.Contains(Score.LogRuleScore) Then SortedScores.Add(Score.LogRuleScore)
  45. Next
  46. SortedScores.Sort()
  47. ' Create an ordered list of document indices
  48. Dim SortedDocuments As New List(Of Integer)
  49. For Each Score As Double In SortedScores
  50. For Each SearchResult As KeyValuePair(Of Integer, Score) In results
  51. If SearchResult.Value.LogRuleScore = Score Then SortedDocuments.Add(SearchResult.Key)
  52. Next
  53. Next
  54. ' Reverse the list and return it
  55. SortedDocuments.Reverse()
  56. Return SortedDocuments
  57. End Function
  58. ''' <summary>
  59. ''' Determine the number of times the term appears in the query.
  60. ''' </summary>
  61. Private Function GetQueryTermFrequency(ByVal term As String, ByVal allTerms() As String) As Integer
  62. term = term.ToUpper
  63. Dim Frequency As Integer = 0
  64. For Each CurrentTerm As String In allTerms
  65. If CurrentTerm.ToUpper = term Then Frequency += 1
  66. Next
  67. Return Frequency
  68. End Function
  69. Public Function Search(ByVal query As String) As List(Of String)
  70. ' Perform the search
  71. Dim Result As New List(Of String)
  72. Dim SearchResults As Dictionary(Of Integer, Score) = SearchInternal(query.Split(" "c, "!"c, "."c, ":"c, """"c, "-"c, Environment.NewLine))
  73. ' Return the results in sorted order
  74. For Each DocumentIndex As Integer In GetSortedDocumentOrder(SearchResults)
  75. Dim Score As Score = SearchResults(DocumentIndex)
  76. If Score.Match Then Result.Add(FIndex.DocumentName(DocumentIndex))
  77. Next
  78. Return Result
  79. End Function
  80. End Class