/script/teachme/extract_verbs.rb
Ruby | 55 lines | 44 code | 4 blank | 7 comment | 12 complexity | ad61e9813253335c6c4f65c0720d0ed6 MD5 | raw file
- #!/usr/bin/env ruby
- require File.dirname(__FILE__) + '/../../config/boot'
- enviro = 'production'
- ENV["RAILS_ENV"] = enviro
- RAILS_ENV.replace(enviro) if defined?(RAILS_ENV)
- require RAILS_ROOT + '/config/environment'
- #path = File.dirname(__FILE__) + "/harvest1B.txt"
- path = File.dirname(__FILE__) + "/latin1.txt"
- lines = IO.readlines(path)
- lines.each do |line|
- begin
- line.chomp!
- next if line !~ /=(.*?)(,=|=,)\s?(.*)/m
- sp = $1
- defn = $3
- next if !sp || !defn
- ## prepare definition part
- defn.gsub!(/_/, '')
- defn.gsub!(/\.$/, '')
- #defn.gsub!(/=(\w.+)=/, "_\1_")
- defn = defn.gsub(/=(\w.+)=/) {|m| "[#{$1}]"}
- ## prepare spanish part
- is_verb = false
- if sp =~ /(ar|er|ir|se),\s*\(/ # e.g., volar, (ue)
- is_verb = true
- end
- if (sp =~ /(ar|er|ir|se)$/ && defn =~ /to /)
- is_verb = true
- end
- next if !is_verb
- next if sp =~ /^-+/ # skip entries starting with ---
- next if sp =~ /--+/ # skip entries with --- anywhere (although, these may be useful to store as phrases/idioms)
- ## generate lookup
- lk_input = sp.gsub(/,?\s?\(.*\)\s?/, '').strip
- lookup = SpanishDictionary.map_word( lk_input )
- verb, code = VerbFinder.lookup_verb(lk_input)
- if !verb || code >= 400
- ## print
- print "#{sp} || #{defn} || #{lookup}\n"
- gv = GlobalVerb.new
- gv.verb = sp
- gv.meaning = defn
- gv.conjugations = ''
- gv.lookup_key = lookup
- gv.source = 'drupal s2e'
- gv.save
- end
- rescue Exception => e
- p e
- end
- end