PageRenderTime 31ms CodeModel.GetById 9ms RepoModel.GetById 1ms app.codeStats 0ms

/app/models/spanish_dictionary.rb

https://bitbucket.org/kapilnakhwa/demo-teachme
Ruby | 230 lines | 200 code | 9 blank | 21 comment | 25 complexity | f1d37b3f54d20e6e5ee9a227ab7e5cfc MD5 | raw file
  1. class SpanishDictionary
  2. DEBUG = false
  3. OK = 200
  4. VERB_MATCH = 210
  5. MESSAGE_ONLY = 250
  6. NEAR_MATCH = 300
  7. VERB_NEAR_MATCH = 310
  8. RELATED_MATCH = 320 # used for a related form of a word (e.g., cars => car, walking => walk, etc.)
  9. NOT_FOUND = 404
  10. NIL_WORD = 405
  11. # first check for match in spanish_to_english table
  12. # next check for match in global words
  13. # finally, check for match using SpanishDictionary.english_to_spanish
  14. # returns result code as well as answer array
  15. def self.spanish_to_english(sp_word)
  16. return NIL_WORD, nil if !sp_word
  17. new_input = sp_word.strip
  18. #if new_input.index(' ')
  19. # raise Exception.new('Space was found in middle of word.')
  20. #end
  21. status = OK
  22. lookup = self.map_word(new_input)
  23. lookup_re = "^(.*,)?#{lookup}(,.*)?$"
  24. word = SpanishToEnglish.where(["lookup_key RLIKE ?", lookup_re]).first
  25. ## try finding a near match if the exact match was unsuccesful
  26. if !word
  27. near_match = ''
  28. idx = lookup.index('_')
  29. if(idx)
  30. near_match = lookup[0...idx] # "sonar_n" => "sonar"
  31. else
  32. near_match = lookup
  33. end
  34. near_match_re = "^(.*,)?#{near_match}((_|,).*)?$"
  35. word = SpanishToEnglish.where("lookup_key RLIKE '#{near_match_re}'").first
  36. status = NEAR_MATCH
  37. end
  38. ## try verbs database if it's not found in the words
  39. if !word
  40. status,word = VerbFinder.lookup_verb(sp_word, {:select => 'id, lookup_key, verb, meaning'})
  41. status = VERB_MATCH if status==OK
  42. status = VERB_NEAR_MATCH if status==NEAR_MATCH
  43. end
  44. ## try looking for a related form of the word
  45. if !word
  46. new_lookup = case lookup
  47. when /^(.*)s(_.*)?$/
  48. $1
  49. when /^(.*)mente(_.*)?$/
  50. $1
  51. else
  52. lookup
  53. end
  54. if new_lookup && (new_lookup != lookup)
  55. new_lookup_re = "^(.*,)?#{new_lookup}((_|,).*)?$"
  56. word = SpanishToEnglish.where(["lookup_key RLIKE ?", new_lookup_re]).first
  57. status = RELATED_MATCH if word
  58. end
  59. end
  60. status = NOT_FOUND if !word
  61. return status, word
  62. end
  63. def self.english_to_spanish(en_word)
  64. status = OK
  65. lookup = SpanishDictionary.map_word(en_word)
  66. idx = lookup.index('_')
  67. lookup = lookup[0...idx] if idx
  68. conditions = "lookup_key ='#{lookup}' OR lookup_key LIKE '#{lookup}\_' "
  69. conditions += " OR lookup_key RLIKE '^(.*,)?#{lookup}(,.*)?$'"
  70. new_lookup_re = "^(.*,)?#{lookup}((_|,).*)?$"
  71. word = SpanishWordFor.where(["lookup_key RLIKE ?", new_lookup_re]).first
  72. ## try looking for a related form of the word
  73. if !word
  74. new_lookup = case lookup
  75. when /(.*)ies/
  76. $1 + "y"
  77. when /(.*)s/
  78. $1
  79. when /(.*)ing/
  80. $1
  81. end
  82. if new_lookup && (new_lookup != lookup)
  83. word = SpanishWordFor.where(["english_word=?", new_lookup]).first
  84. status = RELATED_MATCH if word
  85. end
  86. end
  87. if !word && lookup =~ /^\d+$/
  88. word = SpanishWordFor.new
  89. message = "If you are looking for information on numbers in Spanish, please use the following link: <br />"
  90. message += "<a href=\"http://www.123teachme.com/learn_spanish/spanish_numbers\">Numbers in Spanish</a>"
  91. word.additional_info = message
  92. return MESSAGE_ONLY, word
  93. end
  94. if !word && en_word =~ /^\d+:\d+[ -]?(am|pm)?$/i # by this point spaces are replaced by hyphens
  95. word = SpanishWordFor.new
  96. message = "Want to tell time in Spanish? "
  97. message += "<a href=\"http://www.123teachme.com/learn_spanish/telling_time_spanish\">Telling Time in Spanish</a>"
  98. word.additional_info = message
  99. return MESSAGE_ONLY, word
  100. end
  101. status = NOT_FOUND if !word
  102. return status, word
  103. end
  104. # provides standard mapping of input, so that near matches can be easily found
  105. # Ex: gru?and gru&ntilde;ir both map to "grunir_n"
  106. # the "_n" indicates that the letter n was substituted
  107. # This mapping will allow easier lookup via SQL for input that doesn't
  108. # contain the special spanish characters. For example, someone with a
  109. # U.S. keyboard is likely to type "grunir". So, we would first look for
  110. # an exact match, and then search for 'grunir_%' if the exact match is not found.
  111. def self.map_word(input)
  112. return if !input
  113. new_input = input.downcase
  114. # replace spaces with hyphens
  115. new_input.gsub!(/ +/, '-')
  116. # remove invalid chars
  117. #new_input = new_input.gsub(/\n|\s|\d|\r|,|'|"|;|=/m)
  118. new_input.gsub!(/;|'/, '')
  119. if DEBUG
  120. buf = ''
  121. new_input.each_byte {|c| buf += "#{c} - " }
  122. print "#{buf}\n"
  123. end
  124. subs = [] # array of letters that are substituted for non-ascii equivalents
  125. # replace HTML entities w/ascii equivalent
  126. new_input.gsub!(/&([aeiou])acute;/) {|s| subs << $1; $1; }
  127. new_input.gsub!(/&ntilde;/) {|s| subs << 'n'; 'n'; }
  128. # replace non-ascii chars w/ascii equivalent
  129. buffer = ''
  130. flag = false
  131. new_input.each_byte do |c|
  132. # replace extended ascii chars w/plain ascii equivalent
  133. # use ascii table for these (http://bignosebird.com/ascii.shtml)
  134. if c==241
  135. buffer += 'n'
  136. subs << 'n'
  137. elsif c==237
  138. buffer += 'i'
  139. subs << 'i'
  140. elsif c==243
  141. buffer += 'o'
  142. subs << 'o'
  143. elsif c==225
  144. buffer += 'a'
  145. subs << 'a'
  146. elsif c==233
  147. buffer += 'e'
  148. subs << 'e'
  149. elsif c==250
  150. buffer += 'u'
  151. subs << 'u'
  152. # replace UTF-8 chars w/plain ascii equivalent
  153. elsif c == 195
  154. flag = true
  155. elsif flag
  156. c2 = 'a' if c==161
  157. c2 = 'o' if c==179
  158. c2 = 'n' if c==177
  159. c2 = 'i' if c==173
  160. c2 = 'e' if c==169
  161. c2 = 'u' if c==186
  162. c2 = '?' if !c2
  163. buffer += c2
  164. subs << c2
  165. flag=false
  166. else
  167. buffer += c.chr
  168. end
  169. end
  170. #append underscore before each sub
  171. subs.each {|s| buffer += "_#{s}"}
  172. buffer
  173. end
  174. def self.log_search(trans_type, word, ip)
  175. return # turn off logging for now
  176. begin
  177. log = DictionaryLog.new
  178. log.trans_type = trans_type
  179. log.word = word
  180. log.ip_addr = ip
  181. log.save
  182. rescue => e
  183. end
  184. end
  185. ## prepares the input for english to spanish lookup
  186. ## usually called in the controller or web tier
  187. def self.prep_e2s(input)
  188. return if !input
  189. s = input.downcase.strip
  190. s.gsub!(/ +/, ' ')
  191. # remove to ___ (e.g., to listen => listen)
  192. s.gsub!(/^to\s+(.*)/){|m| $1}
  193. # remove articles (the, a, an) if they precede a word
  194. s.gsub!(/^(a|an|the)\s+([a-z].*)+/i){|m| $2}
  195. s.gsub!(/ +/, '-')
  196. s
  197. end
  198. ## prepares the input for english to spanish lookup
  199. ## usually called in the controller or web tier
  200. def self.prep_s2e(input)
  201. return if !input
  202. s = input.downcase.strip
  203. s.gsub!(/ +/, ' ')
  204. # remove articles (los, las, el, la, un, una, unas, unos) if they precede a word
  205. s.gsub!(/^(el|la|los|las|un|una|unas|unos)\s+([a-z].*)+/i){|m| $2}
  206. # remove prepositions after a word (e.g., acostarse de, enojarse con)
  207. s.gsub!(/([a-z].*)\s+(de|con)$/){|m| $1}
  208. s.gsub!(/ +/, '-')
  209. s
  210. end
  211. end