/Source Code/Indexer/Searcher.vb
Visual Basic | 105 lines | 62 code | 25 blank | 18 comment | 0 complexity | 92eeb304f696c3549898a4d2f409a6d8 MD5 | raw file
- Public Class Searcher
-
- Private FIndex As Indexer = Nothing
-
- Sub New(ByVal index As Indexer)
- Me.FIndex = index
- End Sub
-
- Private Function SearchInternal(ByVal ParamArray terms() As String) As Dictionary(Of Integer, Score)
-
- ' Build a list of all the documents, each initially with a score of 0
- Dim Result As New Dictionary(Of Integer, Score)
- For Each DocumentNumber As Integer In FIndex.AllDocumentsNumbers
- Result.Add(DocumentNumber, New Score)
- Next
-
- ' Process all terms in the query
- Dim CompletedTerms As New List(Of String)
- For Each Term As String In terms
- Term = Term.ToUpper
-
- ' Process only unique terms
- If Not CompletedTerms.Contains(Term) Then
- Dim QueryWordFrequency As Integer = GetQueryTermFrequency(Term, terms)
-
- ' Calculate a score for the term and add it to the current score for the document
- Dim Documents As List(Of Integer) = FIndex.DocumentsContainingWord(Term)
- For Each DocumentIndex As Integer In Documents
- Dim TermFrequency As Double = FIndex.TermFrequency(DocumentIndex, Term)
- Dim LogNumber As Double = Math.Log(FIndex.DocumentCount / Documents.Count)
- Dim LogRuleScore As Double = TermFrequency * QueryWordFrequency * LogNumber
- Dim DocumentScore As Score = Result(DocumentIndex)
- DocumentScore.LogRuleScore += LogRuleScore
- DocumentScore.Match = True
- Next
-
- CompletedTerms.Add(Term)
- End If
-
- Next
-
- ' Return the result
- Return Result
-
- End Function
-
- ''' <summary>
- ''' Sorts the search results.
- ''' </summary>
- ''' <param name="results">The results (from Search() above) to sort.</param>
- ''' <returns>The list of document indices, ordered by score.</returns>
- Private Function GetSortedDocumentOrder(ByVal results As Dictionary(Of Integer, Score)) As List(Of Integer)
-
- ' Sort the scores
- Dim SortedScores As New List(Of Double)
- For Each Score As Score In results.Values
- If Not SortedScores.Contains(Score.LogRuleScore) Then SortedScores.Add(Score.LogRuleScore)
- Next
- SortedScores.Sort()
-
- ' Create an ordered list of document indices
- Dim SortedDocuments As New List(Of Integer)
- For Each Score As Double In SortedScores
- For Each SearchResult As KeyValuePair(Of Integer, Score) In results
- If SearchResult.Value.LogRuleScore = Score Then SortedDocuments.Add(SearchResult.Key)
- Next
- Next
-
- ' Reverse the list and return it
- SortedDocuments.Reverse()
- Return SortedDocuments
-
- End Function
-
- ''' <summary>
- ''' Determine the number of times the term appears in the query.
- ''' </summary>
- Private Function GetQueryTermFrequency(ByVal term As String, ByVal allTerms() As String) As Integer
-
- term = term.ToUpper
-
- Dim Frequency As Integer = 0
- For Each CurrentTerm As String In allTerms
- If CurrentTerm.ToUpper = term Then Frequency += 1
- Next
- Return Frequency
-
- End Function
-
- Public Function Search(ByVal query As String) As List(Of String)
-
- ' Perform the search
- Dim Result As New List(Of String)
- Dim SearchResults As Dictionary(Of Integer, Score) = SearchInternal(query.Split(" "c, "!"c, "."c, ":"c, """"c, "-"c, Environment.NewLine))
-
- ' Return the results in sorted order
- For Each DocumentIndex As Integer In GetSortedDocumentOrder(SearchResults)
- Dim Score As Score = SearchResults(DocumentIndex)
- If Score.Match Then Result.Add(FIndex.DocumentName(DocumentIndex))
- Next
- Return Result
-
- End Function
-
- End Class