PageRenderTime 37ms CodeModel.GetById 11ms app.highlight 20ms RepoModel.GetById 1ms app.codeStats 1ms

/Source Code/Indexer/Searcher.vb

#
Visual Basic | 105 lines | 62 code | 25 blank | 18 comment | 0 complexity | 92eeb304f696c3549898a4d2f409a6d8 MD5 | raw file
  1Public Class Searcher
  2
  3    Private FIndex As Indexer = Nothing
  4
  5    Sub New(ByVal index As Indexer)
  6        Me.FIndex = index
  7    End Sub
  8
  9    Private Function SearchInternal(ByVal ParamArray terms() As String) As Dictionary(Of Integer, Score)
 10
 11        ' Build a list of all the documents, each initially with a score of 0
 12        Dim Result As New Dictionary(Of Integer, Score)
 13        For Each DocumentNumber As Integer In FIndex.AllDocumentsNumbers
 14            Result.Add(DocumentNumber, New Score)
 15        Next
 16
 17        ' Process all terms in the query
 18        Dim CompletedTerms As New List(Of String)
 19        For Each Term As String In terms
 20            Term = Term.ToUpper
 21
 22            ' Process only unique terms
 23            If Not CompletedTerms.Contains(Term) Then
 24                Dim QueryWordFrequency As Integer = GetQueryTermFrequency(Term, terms)
 25
 26                ' Calculate a score for the term and add it to the current score for the document
 27                Dim Documents As List(Of Integer) = FIndex.DocumentsContainingWord(Term)
 28                For Each DocumentIndex As Integer In Documents
 29                    Dim TermFrequency As Double = FIndex.TermFrequency(DocumentIndex, Term)
 30                    Dim LogNumber As Double = Math.Log(FIndex.DocumentCount / Documents.Count)
 31                    Dim LogRuleScore As Double = TermFrequency * QueryWordFrequency * LogNumber
 32                    Dim DocumentScore As Score = Result(DocumentIndex)
 33                    DocumentScore.LogRuleScore += LogRuleScore
 34                    DocumentScore.Match = True
 35                Next
 36
 37                CompletedTerms.Add(Term)
 38            End If
 39
 40        Next
 41
 42        ' Return the result
 43        Return Result
 44
 45    End Function
 46
 47    ''' <summary>
 48    ''' Sorts the search results.
 49    ''' </summary>
 50    ''' <param name="results">The results (from Search() above) to sort.</param>
 51    ''' <returns>The list of document indices, ordered by score.</returns>
 52    Private Function GetSortedDocumentOrder(ByVal results As Dictionary(Of Integer, Score)) As List(Of Integer)
 53
 54        ' Sort the scores
 55        Dim SortedScores As New List(Of Double)
 56        For Each Score As Score In results.Values
 57            If Not SortedScores.Contains(Score.LogRuleScore) Then SortedScores.Add(Score.LogRuleScore)
 58        Next
 59        SortedScores.Sort()
 60
 61        ' Create an ordered list of document indices
 62        Dim SortedDocuments As New List(Of Integer)
 63        For Each Score As Double In SortedScores
 64            For Each SearchResult As KeyValuePair(Of Integer, Score) In results
 65                If SearchResult.Value.LogRuleScore = Score Then SortedDocuments.Add(SearchResult.Key)
 66            Next
 67        Next
 68
 69        ' Reverse the list and return it
 70        SortedDocuments.Reverse()
 71        Return SortedDocuments
 72
 73    End Function
 74
 75    ''' <summary>
 76    ''' Determine the number of times the term appears in the query.
 77    ''' </summary>
 78    Private Function GetQueryTermFrequency(ByVal term As String, ByVal allTerms() As String) As Integer
 79
 80        term = term.ToUpper
 81
 82        Dim Frequency As Integer = 0
 83        For Each CurrentTerm As String In allTerms
 84            If CurrentTerm.ToUpper = term Then Frequency += 1
 85        Next
 86        Return Frequency
 87
 88    End Function
 89
 90    Public Function Search(ByVal query As String) As List(Of String)
 91
 92        ' Perform the search
 93        Dim Result As New List(Of String)
 94        Dim SearchResults As Dictionary(Of Integer, Score) = SearchInternal(query.Split(" "c, "!"c, "."c, ":"c, """"c, "-"c, Environment.NewLine))
 95
 96        ' Return the results in sorted order
 97        For Each DocumentIndex As Integer In GetSortedDocumentOrder(SearchResults)
 98            Dim Score As Score = SearchResults(DocumentIndex)
 99            If Score.Match Then Result.Add(FIndex.DocumentName(DocumentIndex))
100        Next
101        Return Result
102
103    End Function
104
105End Class