/app/models/spanish_dictionary.rb
Ruby | 230 lines | 200 code | 9 blank | 21 comment | 25 complexity | f1d37b3f54d20e6e5ee9a227ab7e5cfc MD5 | raw file
- class SpanishDictionary
- DEBUG = false
- OK = 200
- VERB_MATCH = 210
- MESSAGE_ONLY = 250
- NEAR_MATCH = 300
- VERB_NEAR_MATCH = 310
- RELATED_MATCH = 320 # used for a related form of a word (e.g., cars => car, walking => walk, etc.)
- NOT_FOUND = 404
- NIL_WORD = 405
- # first check for match in spanish_to_english table
- # next check for match in global words
- # finally, check for match using SpanishDictionary.english_to_spanish
- # returns result code as well as answer array
- def self.spanish_to_english(sp_word)
- return NIL_WORD, nil if !sp_word
- new_input = sp_word.strip
- #if new_input.index(' ')
- # raise Exception.new('Space was found in middle of word.')
- #end
- status = OK
- lookup = self.map_word(new_input)
- lookup_re = "^(.*,)?#{lookup}(,.*)?$"
- word = SpanishToEnglish.where(["lookup_key RLIKE ?", lookup_re]).first
- ## try finding a near match if the exact match was unsuccesful
- if !word
- near_match = ''
- idx = lookup.index('_')
- if(idx)
- near_match = lookup[0...idx] # "sonar_n" => "sonar"
- else
- near_match = lookup
- end
- near_match_re = "^(.*,)?#{near_match}((_|,).*)?$"
- word = SpanishToEnglish.where("lookup_key RLIKE '#{near_match_re}'").first
- status = NEAR_MATCH
- end
- ## try verbs database if it's not found in the words
- if !word
- status,word = VerbFinder.lookup_verb(sp_word, {:select => 'id, lookup_key, verb, meaning'})
- status = VERB_MATCH if status==OK
- status = VERB_NEAR_MATCH if status==NEAR_MATCH
- end
- ## try looking for a related form of the word
- if !word
- new_lookup = case lookup
- when /^(.*)s(_.*)?$/
- $1
- when /^(.*)mente(_.*)?$/
- $1
- else
- lookup
- end
- if new_lookup && (new_lookup != lookup)
- new_lookup_re = "^(.*,)?#{new_lookup}((_|,).*)?$"
- word = SpanishToEnglish.where(["lookup_key RLIKE ?", new_lookup_re]).first
- status = RELATED_MATCH if word
- end
- end
- status = NOT_FOUND if !word
- return status, word
- end
- def self.english_to_spanish(en_word)
- status = OK
- lookup = SpanishDictionary.map_word(en_word)
- idx = lookup.index('_')
- lookup = lookup[0...idx] if idx
- conditions = "lookup_key ='#{lookup}' OR lookup_key LIKE '#{lookup}\_' "
- conditions += " OR lookup_key RLIKE '^(.*,)?#{lookup}(,.*)?$'"
- new_lookup_re = "^(.*,)?#{lookup}((_|,).*)?$"
- word = SpanishWordFor.where(["lookup_key RLIKE ?", new_lookup_re]).first
- ## try looking for a related form of the word
- if !word
- new_lookup = case lookup
- when /(.*)ies/
- $1 + "y"
- when /(.*)s/
- $1
- when /(.*)ing/
- $1
- end
- if new_lookup && (new_lookup != lookup)
- word = SpanishWordFor.where(["english_word=?", new_lookup]).first
- status = RELATED_MATCH if word
- end
- end
- if !word && lookup =~ /^\d+$/
- word = SpanishWordFor.new
- message = "If you are looking for information on numbers in Spanish, please use the following link: <br />"
- message += "<a href=\"http://www.123teachme.com/learn_spanish/spanish_numbers\">Numbers in Spanish</a>"
- word.additional_info = message
- return MESSAGE_ONLY, word
- end
- if !word && en_word =~ /^\d+:\d+[ -]?(am|pm)?$/i # by this point spaces are replaced by hyphens
- word = SpanishWordFor.new
- message = "Want to tell time in Spanish? "
- message += "<a href=\"http://www.123teachme.com/learn_spanish/telling_time_spanish\">Telling Time in Spanish</a>"
- word.additional_info = message
- return MESSAGE_ONLY, word
- end
- status = NOT_FOUND if !word
- return status, word
- end
- # provides standard mapping of input, so that near matches can be easily found
- # Ex: gru?and gruñir both map to "grunir_n"
- # the "_n" indicates that the letter n was substituted
- # This mapping will allow easier lookup via SQL for input that doesn't
- # contain the special spanish characters. For example, someone with a
- # U.S. keyboard is likely to type "grunir". So, we would first look for
- # an exact match, and then search for 'grunir_%' if the exact match is not found.
- def self.map_word(input)
- return if !input
- new_input = input.downcase
- # replace spaces with hyphens
- new_input.gsub!(/ +/, '-')
- # remove invalid chars
- #new_input = new_input.gsub(/\n|\s|\d|\r|,|'|"|;|=/m)
- new_input.gsub!(/;|'/, '')
- if DEBUG
- buf = ''
- new_input.each_byte {|c| buf += "#{c} - " }
- print "#{buf}\n"
- end
- subs = [] # array of letters that are substituted for non-ascii equivalents
- # replace HTML entities w/ascii equivalent
- new_input.gsub!(/&([aeiou])acute;/) {|s| subs << $1; $1; }
- new_input.gsub!(/ñ/) {|s| subs << 'n'; 'n'; }
- # replace non-ascii chars w/ascii equivalent
- buffer = ''
- flag = false
- new_input.each_byte do |c|
- # replace extended ascii chars w/plain ascii equivalent
- # use ascii table for these (http://bignosebird.com/ascii.shtml)
- if c==241
- buffer += 'n'
- subs << 'n'
- elsif c==237
- buffer += 'i'
- subs << 'i'
- elsif c==243
- buffer += 'o'
- subs << 'o'
- elsif c==225
- buffer += 'a'
- subs << 'a'
- elsif c==233
- buffer += 'e'
- subs << 'e'
- elsif c==250
- buffer += 'u'
- subs << 'u'
- # replace UTF-8 chars w/plain ascii equivalent
- elsif c == 195
- flag = true
- elsif flag
- c2 = 'a' if c==161
- c2 = 'o' if c==179
- c2 = 'n' if c==177
- c2 = 'i' if c==173
- c2 = 'e' if c==169
- c2 = 'u' if c==186
- c2 = '?' if !c2
- buffer += c2
- subs << c2
- flag=false
- else
- buffer += c.chr
- end
- end
- #append underscore before each sub
- subs.each {|s| buffer += "_#{s}"}
- buffer
- end
- def self.log_search(trans_type, word, ip)
- return # turn off logging for now
- begin
- log = DictionaryLog.new
- log.trans_type = trans_type
- log.word = word
- log.ip_addr = ip
- log.save
- rescue => e
- end
- end
- ## prepares the input for english to spanish lookup
- ## usually called in the controller or web tier
- def self.prep_e2s(input)
- return if !input
- s = input.downcase.strip
- s.gsub!(/ +/, ' ')
- # remove to ___ (e.g., to listen => listen)
- s.gsub!(/^to\s+(.*)/){|m| $1}
- # remove articles (the, a, an) if they precede a word
- s.gsub!(/^(a|an|the)\s+([a-z].*)+/i){|m| $2}
- s.gsub!(/ +/, '-')
- s
- end
- ## prepares the input for english to spanish lookup
- ## usually called in the controller or web tier
- def self.prep_s2e(input)
- return if !input
- s = input.downcase.strip
- s.gsub!(/ +/, ' ')
- # remove articles (los, las, el, la, un, una, unas, unos) if they precede a word
- s.gsub!(/^(el|la|los|las|un|una|unas|unos)\s+([a-z].*)+/i){|m| $2}
- # remove prepositions after a word (e.g., acostarse de, enojarse con)
- s.gsub!(/([a-z].*)\s+(de|con)$/){|m| $1}
- s.gsub!(/ +/, '-')
- s
- end
- end