PageRenderTime 44ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/app/models/verb_finder.rb

https://bitbucket.org/kapilnakhwa/demo-teachme
Ruby | 132 lines | 100 code | 12 blank | 20 comment | 16 complexity | 0302714cd0e9e545ce72eec5c0f3bf8e MD5 | raw file
  1. class VerbFinder
  2. DEBUG = false
  3. OK = 200
  4. NEAR_MATCH = 300
  5. NOT_FOUND = 404
  6. BAD_INPUT = 405
  7. # attempts to find a verb from the global verbs db that matches the given input
  8. # - input must first be mapped to handle special characters, remove invalid characters,etc.
  9. # - exact match is sought first, then near matches
  10. def self.lookup_verb(input, options=nil)
  11. select = options && options[:select]
  12. new_input = input.strip
  13. # OOOPS! What about O?­r???
  14. if new_input !~ /^[^ ]*(r|se)(_.*)?$/i
  15. return BAD_INPUT, nil
  16. end
  17. status = OK
  18. lookup = self.map_verb(new_input)
  19. verb = GlobalVerb.where(["lookup_key=?", lookup])
  20. verb = verb.select(select) if select
  21. verb = verb.first # this is where query is sent
  22. if !verb
  23. near_match = ''
  24. idx = lookup.index('_')
  25. if(idx)
  26. near_match = lookup[0...idx] # "sonar_n" => "sonar"
  27. else
  28. near_match = "#{lookup}\\_%"
  29. end
  30. verb = GlobalVerb.where("lookup_key LIKE '#{near_match}'")
  31. verb = verb.select(select) if select
  32. verb = verb.first
  33. status = NEAR_MATCH
  34. end
  35. status = NOT_FOUND if !verb
  36. return status, verb
  37. end
  38. # provides standard mapping of input, so that near matches can be easily found
  39. # Ex: gru??and gruñir both map to "grunir_n"
  40. # the "_n" indicates that the letter n was substituted
  41. # This mapping will allow easier lookup via SQL for input that doesn't
  42. # contain the special spanish characters. For example, someone with a
  43. # U.S. keyboard is likely to type "grunir". So, we would first look for
  44. # an exact match, and then search for 'grunir_%' if the exact match is not found.
  45. def self.map_verb(input)
  46. new_input = input.downcase
  47. # remove spaces and invalid chars
  48. #new_input = input.gsub(/\n|\s|\d|\r|,|'|"|=/m)
  49. if DEBUG
  50. buf = ''
  51. new_input.each_byte {|c| buf += "#{c} - " }
  52. print "#{buf}\n"
  53. end
  54. subs = [] # array of letters that are substituted for non-ascii equivalents
  55. # replace HTML entities w/ascii equivalent
  56. new_input.gsub!(/&([aeiou])acute;/) {|s| subs << $1; $1; }
  57. new_input.gsub!(/&ntilde;/) {|s| subs << 'n'; 'n'; }
  58. # replace non-ascii chars w/ascii equivalent
  59. buffer = ''
  60. flag = false
  61. new_input.each_byte do |c|
  62. # replace extended ascii chars w/plain ascii equivalent
  63. # use ascii table for these (http://bignosebird.com/ascii.shtml)
  64. if c==241
  65. buffer += 'n'
  66. subs << 'n'
  67. elsif c==237
  68. buffer += 'i'
  69. subs << 'i'
  70. elsif c==243
  71. buffer += 'o'
  72. subs << 'o'
  73. elsif c==225
  74. buffer += 'a'
  75. subs << 'a'
  76. elsif c==233
  77. buffer += 'e'
  78. subs << 'e'
  79. elsif c==250
  80. buffer += 'u'
  81. subs << 'u'
  82. # replace UTF-8 chars w/plain ascii equivalent
  83. elsif c == 195
  84. flag = true
  85. elsif flag
  86. c2 = 'a' if c==161
  87. c2 = 'o' if c==179
  88. c2 = 'n' if c==177
  89. c2 = 'i' if c==173
  90. c2 = 'e' if c==169
  91. c2 = 'u' if c==186
  92. buffer += c2
  93. subs << c2
  94. flag=false
  95. else
  96. buffer += c.chr
  97. end
  98. end
  99. #append underscore before each sub
  100. subs.each {|s| buffer += "_#{s}"}
  101. buffer
  102. end
  103. # iterates over all records and saves lookup_keys
  104. def self.generate_lookup_keys
  105. GlobalVerb.find(:all).each do |gverb|
  106. gverb.lookup_key = self.map_verb(gverb.verb)
  107. gverb.save
  108. end
  109. end
  110. def self.log_search(word, ip)
  111. return # turn off logging for now
  112. begin
  113. log = VerbLog.new
  114. log.verb = word
  115. log.ip_addr = ip
  116. log.save
  117. rescue => e
  118. end
  119. end
  120. end