spanish_dictionary.rb

/app/models/spanish_dictionary.rb

https://bitbucket.org/kapilnakhwa/demo-teachme · Ruby · 230 lines · 200 code · 9 blank · 21 comment · 25 complexity · f1d37b3f54d20e6e5ee9a227ab7e5cfc MD5 · raw file

class SpanishDictionary

  DEBUG = false
  OK = 200
  VERB_MATCH = 210
  MESSAGE_ONLY = 250
  NEAR_MATCH = 300
  VERB_NEAR_MATCH = 310
  RELATED_MATCH = 320  # used for a related form of a word (e.g., cars => car, walking => walk, etc.)
  NOT_FOUND = 404
  NIL_WORD = 405

  # first check for match in spanish_to_english table
  # next check for match in global words
  # finally, check for match using SpanishDictionary.english_to_spanish
  # returns result code as well as answer array
  def self.spanish_to_english(sp_word)
    return NIL_WORD, nil if !sp_word
    new_input = sp_word.strip
    #if new_input.index(' ')
    #  raise Exception.new('Space was found in middle of word.')
    #end
    status = OK
    lookup = self.map_word(new_input)
    lookup_re = "^(.*,)?#{lookup}(,.*)?$"
    word = SpanishToEnglish.where(["lookup_key RLIKE ?", lookup_re]).first
    ## try finding a near match if the exact match was unsuccesful
    if !word
      near_match = ''
      idx = lookup.index('_')
      if(idx)
        near_match = lookup[0...idx] # "sonar_n" => "sonar"
      else
        near_match = lookup
      end
      near_match_re = "^(.*,)?#{near_match}((_|,).*)?$"
      word = SpanishToEnglish.where("lookup_key RLIKE '#{near_match_re}'").first
      status = NEAR_MATCH
    end
    ## try verbs database if it's not found in the words
    if !word
      status,word = VerbFinder.lookup_verb(sp_word, {:select => 'id, lookup_key, verb, meaning'})
      status = VERB_MATCH if status==OK
      status = VERB_NEAR_MATCH if status==NEAR_MATCH
    end
    ## try looking for a related form of the word
    if !word
      new_lookup = case lookup
      when /^(.*)s(_.*)?$/
        $1
      when /^(.*)mente(_.*)?$/
        $1
      else
        lookup
      end
      if new_lookup && (new_lookup != lookup)
        new_lookup_re = "^(.*,)?#{new_lookup}((_|,).*)?$"
        word = SpanishToEnglish.where(["lookup_key RLIKE ?", new_lookup_re]).first
        status = RELATED_MATCH if word
      end
    end

    status = NOT_FOUND if !word
    return status, word
  end


  def self.english_to_spanish(en_word)
    status = OK
    lookup = SpanishDictionary.map_word(en_word)
    idx = lookup.index('_')
    lookup = lookup[0...idx] if idx
    conditions = "lookup_key ='#{lookup}' OR lookup_key  LIKE '#{lookup}\_' "
    conditions += " OR lookup_key RLIKE '^(.*,)?#{lookup}(,.*)?$'"
    new_lookup_re = "^(.*,)?#{lookup}((_|,).*)?$"
    word = SpanishWordFor.where(["lookup_key RLIKE ?", new_lookup_re]).first
    ## try looking for a related form of the word
    if !word
      new_lookup = case lookup
      when /(.*)ies/
        $1 + "y"
      when /(.*)s/
        $1
      when /(.*)ing/
        $1
      end
      if new_lookup && (new_lookup != lookup)
        word = SpanishWordFor.where(["english_word=?", new_lookup]).first
        status = RELATED_MATCH if word
      end
    end
    if !word && lookup =~ /^\d+$/
      word = SpanishWordFor.new
      message = "If you are looking for information on numbers in Spanish, please use the following link: <br />"
      message += "<a href=\"http://www.123teachme.com/learn_spanish/spanish_numbers\">Numbers in Spanish</a>"
      word.additional_info = message
      return MESSAGE_ONLY, word
    end
    if !word && en_word =~ /^\d+:\d+[ -]?(am|pm)?$/i  # by this point spaces are replaced by hyphens
      word = SpanishWordFor.new
      message = "Want to tell time in Spanish? "
      message += "<a href=\"http://www.123teachme.com/learn_spanish/telling_time_spanish\">Telling Time in Spanish</a>"
      word.additional_info = message
      return MESSAGE_ONLY, word
    end
    status = NOT_FOUND if !word
    return status, word
  end


  # provides standard mapping of input, so that near matches can be easily found
  # Ex: gru?and gru&ntilde;ir both map to "grunir_n"
  # the "_n" indicates that the letter n was substituted
  # This mapping will allow easier lookup via SQL for input that doesn't
  # contain the special spanish characters.  For example, someone with a
  # U.S. keyboard is likely to type "grunir".  So, we would first look for
  # an exact match, and then search for 'grunir_%' if the exact match is not found.
  def self.map_word(input)
    return if !input
    new_input = input.downcase
    # replace spaces with hyphens
    new_input.gsub!(/ +/, '-')
    # remove invalid chars
    #new_input = new_input.gsub(/\n|\s|\d|\r|,|'|"|;|=/m)
    new_input.gsub!(/;|'/, '')
    if DEBUG
      buf = ''
      new_input.each_byte {|c| buf += "#{c} - " }
      print "#{buf}\n"
    end

    subs = []  # array of letters that are substituted for non-ascii equivalents

    # replace HTML entities w/ascii equivalent
    new_input.gsub!(/&([aeiou])acute;/) {|s| subs << $1; $1; }
    new_input.gsub!(/&ntilde;/) {|s| subs << 'n'; 'n'; }

    # replace non-ascii chars w/ascii equivalent
    buffer = ''
    flag = false
    new_input.each_byte do |c|
      # replace extended ascii chars w/plain ascii equivalent
      # use ascii table for these (http://bignosebird.com/ascii.shtml)
      if c==241
        buffer += 'n'
        subs << 'n'
      elsif c==237
        buffer += 'i'
        subs << 'i'
      elsif c==243
        buffer += 'o'
        subs << 'o'
      elsif c==225
        buffer += 'a'
        subs << 'a'
      elsif c==233
        buffer += 'e'
        subs << 'e'
      elsif c==250
        buffer += 'u'
        subs << 'u'
        # replace UTF-8 chars w/plain ascii equivalent
      elsif c == 195
        flag = true
      elsif flag
        c2 = 'a' if c==161
        c2 = 'o' if c==179
        c2 = 'n' if c==177
        c2 = 'i' if c==173
        c2 = 'e' if c==169
        c2 = 'u' if c==186
        c2 = '?' if !c2
        buffer += c2
        subs << c2
        flag=false
      else
        buffer += c.chr
      end
    end

    #append underscore before each sub
    subs.each {|s| buffer += "_#{s}"}
    buffer
  end

  def self.log_search(trans_type, word, ip)
    return  # turn off logging for now
    begin
      log = DictionaryLog.new
      log.trans_type = trans_type
      log.word = word
      log.ip_addr = ip
      log.save
    rescue => e
    end
  end



  ## prepares the input for english to spanish lookup
  ## usually called in the controller or web tier
  def self.prep_e2s(input)
    return if !input
    s = input.downcase.strip
    s.gsub!(/ +/, ' ')
    # remove to ___ (e.g., to listen => listen) 
    s.gsub!(/^to\s+(.*)/){|m| $1}
    # remove articles (the, a, an) if they precede a word
    s.gsub!(/^(a|an|the)\s+([a-z].*)+/i){|m| $2}
    s.gsub!(/ +/, '-')
    s
  end

  ## prepares the input for english to spanish lookup
  ## usually called in the controller or web tier
  def self.prep_s2e(input)
    return if !input
    s = input.downcase.strip    
    s.gsub!(/ +/, ' ')
    # remove articles (los, las, el, la, un, una, unas, unos) if they precede a word
    s.gsub!(/^(el|la|los|las|un|una|unas|unos)\s+([a-z].*)+/i){|m| $2}
    # remove prepositions after a word (e.g., acostarse de, enojarse con)
    s.gsub!(/([a-z].*)\s+(de|con)$/){|m| $1}
    s.gsub!(/ +/, '-')
    s
  end

end