/lib/global_verb_parser.rb
Ruby | 296 lines | 268 code | 28 blank | 0 comment | 7 complexity | 5e8a8386ffa5e6b454113f4c2444dc52 MD5 | raw file
- class StructuredVerb
- attr_accessor :notes,:tiempos,:global_verb
-
- def initialize(global_verb)
- self.global_verb=global_verb
-
- @personas=[["yo"],["tú"],["vos"],["él","ella","usted"],["nosotros","-as"],["vosotros","-as"],["ellos","ellas","ustedes"]]
-
- @personas_labels=@personas.map {|cur_p| cur_p.join("/") }
-
-
- self.tiempos={}
-
- self.notes=[]
-
- end
-
- def parse_new()
- nkg=Nokogiri::HTML.fragment("<basic>"+self.global_verb.conjugations+"</basic>");
-
- mode=nil;
-
- imperative_person_count=1;
-
- imperative_r={};
- neg_imperative_r={};
-
- new_personas_labels=["inglés"].concat(@personas_labels[0..1]).concat(@personas_labels[3..-1]);
-
- new_imperative_labels=["inglés",@personas_labels[1],@personas_labels[5],@personas_labels[3],@personas_labels[6]]
-
- nkg.css("tr").each do |node|
- tds=node.css("td");
- if(tds.length==1) then
- mode=tds.first.inner_text
- mode="Normal" if(mode=="Indicativo")
- else
-
-
- if(mode=="Normal" || mode=="Subjuntivo") then
- if(tds.first.inner_text.strip.length>2) then
- r={};
- r[:label]="new_format";
- r[:cat]=mode;
- r[:name]=tds.first.inner_text.strip;
- r[:personas]={};
-
- cur_i=0;
- tds[1..-1].each do |cur_td|
- r[:personas][new_personas_labels[cur_i]]=cur_td.inner_text.strip
- cur_i+=1;
- end
- self.tiempos[r[:cat]]||=Array.new();
- self.tiempos[r[:cat]]<< r
- end
-
- elsif(mode=="Imperativo") then
-
- if(tds[0].inner_text!=tds[1].inner_text) then #strange unicode character?
- iperson=new_imperative_labels[imperative_person_count];
- imperative_person_count+=1;
- if(imperative_r.keys.length==0) then
- imperative_r[:label]=neg_imperative_r[:label]="new_format";
- imperative_r[:name]="Imperative afirmativo"
- neg_imperative_r[:name]="Imperative negativo"
-
- imperative_r[:cat]=neg_imperative_r[:cat]=mode
-
- eng_split=tds[1].inner_text.split(/!/u).map{|ei| ei+"!" }
-
- imperative_r[:personas]={new_imperative_labels[0]=>eng_split[0]}
- neg_imperative_r[:personas]={new_imperative_labels[0]=>eng_split[0]}
- end
-
- puts iperson
-
- imperative_r[:personas][iperson]=tds[2].inner_text
- neg_imperative_r[:personas][iperson]=tds[3].inner_text
- end
-
- elsif(mode=="Otras formas") then
- r={}
- r[:cat]=mode
- r[:name]=tds.first.inner_text.strip
- r[:label]="new_format";
- r[:personas]={
- new_personas_labels[0]=>tds[1].inner_text.strip,
- "single"=>tds[2].inner_text.strip
- }
- self.tiempos[r[:cat]]||=Array.new();
- self.tiempos[r[:cat]]<< r
-
- else
- puts "Shouldn't be in here with #{self.global_verb.id}"
- end
-
- end
- end
-
- self.tiempos["Imperativo"]=[imperative_r,neg_imperative_r]
-
- save_cat(self.tiempos["Normal"],self.global_verb);
- save_cat(self.tiempos["Subjuntivo"],self.global_verb);
- save_cat(self.tiempos["Imperativo"],self.global_verb);
- save_cat(self.tiempos["Otras formas"],self.global_verb);
-
-
- end
-
- def parse_old()
- nkg=Nokogiri::HTML.fragment("<basic>"+self.global_verb.conjugations+"</basic>");
- personas=@personas
- def parse_personas(node)
- personas_txt=[["yo"],["tú"],["**vos"],["él"],["nosotros","-as"],["vosotros","-as"],["ellos","ellas","ustedes"]]
- cur=[];
- single=true;
- last="";
- ans=[];
- node.children.each{|c|
- cont=c.content;
- if(!c.text?) then
- personas_txt.each_with_index{|pr,i| pr.each{|ppr| cur<< i if (!cont.index(ppr).nil?)}}
- single=false;
- else
- cur.each{|c|
- ans[c]=cont;
- }
- last=cont;
- cur=[];
- end
- }
- ans_map={};
- str=if(single) then
- ans_map["single"]=node.content
- else
- ans.each_with_index{|a,i| ans_map[@personas_labels[i]]=a.to_s }
- end
- return ans_map;
- end
- def make_tiempo(node)
- r={};
- node.css("p").each do |p|
- if p[:class].blank? then
- a=r[:txt] || "";
- r[:txt]=a+"\n\n"+p.inner_html.gsub('<p>',"\n").gsub("</p>","\n");#to_s;
- elsif (p[:class]=="cabTiempo0" || p[:class]=="cabTiempo1") then
- r[:name]=p.content;
- elsif p[:class]=="cabTiempoINT" then
- r[:label]=p.content;
- elsif p[:class]=="negrita" then
- if(!r[:label].nil?) then
- if(r[:personas].nil?) then
- r[:personas]=parse_personas(p);
- else
- if(r[:or].nil?) then
- r[:or]=p.inner_text;#content;
- else
- r[:or]+=p.inner_html.gsub("<br>"," - ").gsub("</span>",":").gsub(/<span.*?>/,'');
- end
- end
- else
- a=r[:txt] || "";
- r[:txt]=a+"\n\n"+p.inner_html.gsub('<p>',"\n").gsub("</p>","\n");
- end
- end
- end
- r[:cat]=
- if(r[:name].nil?) then
- "Before"
- else
- if(!r[:name].index("Subjuntivo").nil?) then
- "Subjuntivo"
- elsif (!r[:name].index("Imperativo").nil?) then
- "Imperativo"
- elsif (r[:personas].keys.length==0) then
- "Before"
- elsif(r[:personas].keys.length==1) then
- "Otras formas"
- else
- "Normal"
- end
- end
- return r;
- end
- cats=["Normal","Subjuntivo","Imperativo","Otras formas"];
- nkg.css("div").each do |node|
- t=make_tiempo(node);
- self.tiempos[t[:cat]]||=Array.new();
- self.tiempos[t[:cat]]<<t;
- end
-
- save_cat(tiempos["Before"],self.global_verb);
- save_cat(tiempos["Normal"],self.global_verb);
- save_cat(tiempos["Subjuntivo"],self.global_verb);
- save_cat(tiempos["Imperativo"],self.global_verb);
- save_cat(tiempos["Otras formas"],self.global_verb);
-
- end
-
- def save_cat(ar,gv)
- istr="";
- estr="";
- bstr="";
- if(!ar.nil?) then
- ar.each do |t|
- if(!t[:label].blank?) then
-
- vt=VerbTime.first(:conditions=>["category=? and name=?",t[:cat],t[:name]]);
- if(vt.nil?) then
- vt=VerbTime.create(:category=>t[:cat],:name=>t[:name],:explanation=>t[:label]);
- elsif(vt.explanation.blank?) then
- vt.explanation=t[:label];
- vt.save
- end
-
-
-
- t[:personas].each do |k,i|
- vp=VerbPerson.first(:conditions=>["spanish=?",k.strip]);
- if(vp.nil?) then
- vp=VerbPerson.create(:spanish=>k.strip);
- end
- cv=ConjugatedVerb.first(:conditions=>["alternate=0 and verb_time_id=? and verb_person_id=? and global_verb_id=?",vt.id,vp.id,gv.id]);
- if(cv.nil?) then
- cv=ConjugatedVerb.create(:verb_time=>vt,:verb_person=>vp,:global_verb=>gv,:alternate=>0,:conjugation=>i.strip);
- else
- cv.conjugation=i.strip
- cv.save
- end
- end
-
- if(!t[:or].blank?) then
- ors=t[:or].split(' - ');
- ors.each do |cur_or|
-
- tmp=cur_or.split(':');
-
- if( !tmp[-2].nil? &&
- !tmp[-1].nil? &&
- !(k_or=tmp[-2].strip).blank? &&
- !(i_or=tmp[-1].strip).blank? ) then
- vp_or=VerbPerson.first(:conditions=>["spanish=?",k_or]);
- if(vp_or.nil?) then
- vp_or=VerbPerson.create(:spanish=>k_or);
- end
- cv_or=ConjugatedVerb.first(:conditions=>["alternate=1 and verb_time_id=? and verb_person_id=? and global_verb_id=?",vt.id,vp_or.id,gv.id]);
- if(cv_or.nil?) then
- cv_or=ConjugatedVerb.create(:verb_time=>vt,:verb_person=>vp_or,:global_verb=>gv,:alternate=>1,:conjugation=>i_or);
- else
- cv_or.conjugation=i_or
- cv_or.save
- end
- end
- end
- end
-
- else
- bstr+=t[:txt].to_s;
- end
- end
- end
-
- if( bstr.length>0 ) then
- gv.notes=bstr;
- gv.save;
- end
-
- return bstr,istr,estr;
- end
-
- end