/app/models/verb_finder.rb
Ruby | 132 lines | 100 code | 12 blank | 20 comment | 16 complexity | 0302714cd0e9e545ce72eec5c0f3bf8e MD5 | raw file
- class VerbFinder
-
- DEBUG = false
- OK = 200
- NEAR_MATCH = 300
- NOT_FOUND = 404
- BAD_INPUT = 405
-
- # attempts to find a verb from the global verbs db that matches the given input
- # - input must first be mapped to handle special characters, remove invalid characters,etc.
- # - exact match is sought first, then near matches
- def self.lookup_verb(input, options=nil)
- select = options && options[:select]
- new_input = input.strip
- # OOOPS! What about O?r???
- if new_input !~ /^[^ ]*(r|se)(_.*)?$/i
- return BAD_INPUT, nil
- end
- status = OK
- lookup = self.map_verb(new_input)
- verb = GlobalVerb.where(["lookup_key=?", lookup])
- verb = verb.select(select) if select
- verb = verb.first # this is where query is sent
- if !verb
- near_match = ''
- idx = lookup.index('_')
- if(idx)
- near_match = lookup[0...idx] # "sonar_n" => "sonar"
- else
- near_match = "#{lookup}\\_%"
- end
- verb = GlobalVerb.where("lookup_key LIKE '#{near_match}'")
- verb = verb.select(select) if select
- verb = verb.first
- status = NEAR_MATCH
- end
- status = NOT_FOUND if !verb
- return status, verb
- end
-
- # provides standard mapping of input, so that near matches can be easily found
- # Ex: gru??and gruñir both map to "grunir_n"
- # the "_n" indicates that the letter n was substituted
- # This mapping will allow easier lookup via SQL for input that doesn't
- # contain the special spanish characters. For example, someone with a
- # U.S. keyboard is likely to type "grunir". So, we would first look for
- # an exact match, and then search for 'grunir_%' if the exact match is not found.
- def self.map_verb(input)
- new_input = input.downcase
- # remove spaces and invalid chars
- #new_input = input.gsub(/\n|\s|\d|\r|,|'|"|=/m)
- if DEBUG
- buf = ''
- new_input.each_byte {|c| buf += "#{c} - " }
- print "#{buf}\n"
- end
-
- subs = [] # array of letters that are substituted for non-ascii equivalents
-
- # replace HTML entities w/ascii equivalent
- new_input.gsub!(/&([aeiou])acute;/) {|s| subs << $1; $1; }
- new_input.gsub!(/ñ/) {|s| subs << 'n'; 'n'; }
-
- # replace non-ascii chars w/ascii equivalent
- buffer = ''
- flag = false
- new_input.each_byte do |c|
- # replace extended ascii chars w/plain ascii equivalent
- # use ascii table for these (http://bignosebird.com/ascii.shtml)
- if c==241
- buffer += 'n'
- subs << 'n'
- elsif c==237
- buffer += 'i'
- subs << 'i'
- elsif c==243
- buffer += 'o'
- subs << 'o'
- elsif c==225
- buffer += 'a'
- subs << 'a'
- elsif c==233
- buffer += 'e'
- subs << 'e'
- elsif c==250
- buffer += 'u'
- subs << 'u'
- # replace UTF-8 chars w/plain ascii equivalent
- elsif c == 195
- flag = true
- elsif flag
- c2 = 'a' if c==161
- c2 = 'o' if c==179
- c2 = 'n' if c==177
- c2 = 'i' if c==173
- c2 = 'e' if c==169
- c2 = 'u' if c==186
- buffer += c2
- subs << c2
- flag=false
- else
- buffer += c.chr
- end
- end
-
- #append underscore before each sub
- subs.each {|s| buffer += "_#{s}"}
- buffer
- end
-
- # iterates over all records and saves lookup_keys
- def self.generate_lookup_keys
- GlobalVerb.find(:all).each do |gverb|
- gverb.lookup_key = self.map_verb(gverb.verb)
- gverb.save
- end
- end
- def self.log_search(word, ip)
- return # turn off logging for now
- begin
- log = VerbLog.new
- log.verb = word
- log.ip_addr = ip
- log.save
- rescue => e
- end
- end
-
- end