/C/src/em_stl.cpp
C++ | 295 lines | 286 code | 9 blank | 0 comment | 40 complexity | b2cb5fdafffe7423d2a9d88acc16a80f MD5 | raw file
- #include "general.h"
- #include "opt_t.h"
- #include "opt_st.h"
- #include "em_macro.h"
- #include "em.h"
- vector<string> s_lst, t_lst;
- map<pair<string,string>,int> symap;
- map<pair<string,string>, pair<double,double> > stphi;
- map<string, double> tphi;
- map<string, double*> tfea;
- map<pair<string,string>, double*> stfea;
- double* tcoeff = new double[TFEATURE_NUM];
- double* stcoeff_p = new double[SFEATURE_NUM];
- double* stcoeff_m = new double[SFEATURE_NUM];
- double ori = 0.5;
- double lap = 0.001;
- string dir;
- int iteration=0;
- double em::likelihood()
- {
- double res = 0.0;
- map<pair<string,string>,int>::iterator it;
- for (int i = 0; i < (int)s_lst.size(); ++ i)
- {
- for (int j = 0; j < (int)t_lst.size(); ++ j)
- {
- string s = s_lst[i], t = t_lst[j];
- pair<double, double> p = stphi[make_pair(s,t)];
- double ap = p.first, am = p.second;
- double b = tphi[t];
- it = symap.find(make_pair(s,t));
- if (it != symap.end())
- res += log(ap*b+am*(1-b))*it->second;
- else
- res += log(1-ap*b-am*(1-b));
- }
- }
- return res;
- }
- em::em(int argc, const char* argv[])
- {
- char s[MAX_STRLENGTH],y[MAX_STRLENGTH]; int e;
- dir = argv[5];
- iteration = 0;
- while(1)
- {
- stringstream md;
- md << dir << "/model." << iteration + 1 << "/succ";
- ifstream ifile(md.str().c_str());
- if(!ifile) break;
- iteration ++;
- }
- if(iteration > 0)
- {
- stringstream cmd, wss, wts;
- cmd << dir << "/model." << iteration;
- cerr << "Loading Model: " << cmd.str() << endl;
- wss << cmd.str() << "/weights_stphi.csv";
- FILE* smodel_in = fopen(wss.str().c_str(), "r");
- int i = 0;
- while(!feof(smodel_in))
- {
- fscanf(smodel_in,"%lf%*c%lf\n", &stcoeff_p[i], &stcoeff_m[i]);
- i++;
- }
- fclose(smodel_in);
- wts << cmd.str() << "/weights_tphi.csv";
- FILE* tmodel_in = fopen(wts.str().c_str(), "r");
- i = 0;
- while(!feof(tmodel_in))
- fscanf(tmodel_in,"%lf\n",&tcoeff[i++]);
- fclose(tmodel_in);
- }
- else{
- for(int i = 0; i < SFEATURE_NUM; ++ i)
- {
- stcoeff_p[i] = ori;
- stcoeff_m[i] = ori;
- }
- if (argc == 6)
- {
- cerr << "Loading Model: Empty\n";
- for(int i = 0; i < TFEATURE_NUM; ++ i)
- tcoeff[i] = ori;
- }
- if (argc == 7)
- {
- cerr << "Loading Model: " << argv[6] << endl;
- FILE* model_in = fopen(argv[6], "r");
- int i = 0;
- while(!feof(model_in))
- fscanf(model_in,"%lf\n",&tcoeff[i++]);
- fclose (model_in);
- }
- }
- cerr << "Loading Events: " << argv[1] << endl;
- FILE* triple_in = fopen(argv[1], "r");
- while(!feof(triple_in))
- {
- fscanf(triple_in,"%s %s %d\n",s, y, &e);
- symap[make_pair(s,y)] = e;
- }
- fclose (triple_in);
- cerr << "Loading Sturcture List: " << argv[2] << endl;
- FILE* str_in = fopen(argv[2], "r");
- while(!feof(str_in))
- {
- fscanf(str_in,"%s\n",s);
- s_lst.push_back(s);
- }
- fclose (str_in);
- cerr << "Loading Structure Feature: " << argv[3] << endl;
- FILE* stf_in = fopen(argv[3], "r");
- while(!feof(stf_in))
- {
- fscanf(stf_in,"%s\n",y);
- double* f = new double[SFEATURE_NUM];
- char* sid = strtok(y, ","); char* ptk = strtok (NULL, ",");
- char* t = ptk; ptk=strtok (NULL, ",");
- int i = 0; double pdct = 0.0;
- while (ptk != NULL)
- {
- f[i] = atof(ptk);
- pdct += f[i]*stcoeff_p[i];
- ptk = strtok (NULL, ","); i++;
- }
- pdct=exp(pdct); pdct = pdct/(1+pdct); stfea[make_pair(sid,t)] = f;
- stphi[make_pair(sid,t)] = make_pair(pdct,pdct);
- }
- fclose (stf_in);
- cerr << "Loading Time Feature: " << argv[4] << endl;
- FILE* year_in = fopen(argv[4], "r");
- while(!feof(year_in))
- {
- fscanf(year_in,"%s\n",y);
- double* f = new double[TFEATURE_NUM];
- char* ym = strtok(y, ","); char* ptk = strtok (NULL, ",");
- t_lst.push_back(ym);
- int i = 0; double pdct = 0.0;
- while (ptk != NULL)
- {
- f[i] = atof(ptk);
- pdct += f[i]*tcoeff[i];
- ptk = strtok (NULL, ","); i++;
- }
- pdct=exp(pdct); tphi[ym] = pdct/(1+pdct); tfea[ym] = f;
- }
- fclose (year_in);
- cerr << "Loading Complete\n";
- }
- map<pair<string,string>, t_struct> tau;
- void em::E()
- {
- tau.clear();
- for (int i = 0; i < (int)s_lst.size(); ++ i)
- for (int j = 0; j < (int)t_lst.size(); ++ j)
- {
- string s = s_lst[i];
- string y = t_lst[j];
- pair<double, double> a = stphi[make_pair(s,y)];
- double ap = a.first, am = a.second, b = tphi[y], cp = ap*b, cm = am*b;
- t_struct t;
- t.t11 = b==1?1:cp/(cp+am-cm);
- t.t10 = (b-cp)==(1-am+cm-cp)?1:(b-cp)/(1-am+cm-cp);
- t.t01 = 1-t.t11;
- t.t00 = 1-t.t10;
- tau[make_pair(s,y)] = t;
- }
- }
- void em::M()
- {
- map<pair<string,string>,int>::iterator it;
- map<string, int> N_s;
- for (int j = 0; j < (int)t_lst.size(); ++ j)
- {
- string y = t_lst[j];
- double b = 0.0;
- int cnt = 0;
- for (int i = 0; i < (int)s_lst.size(); ++ i)
- {
- string s = s_lst[i];
- t_struct t = tau[make_pair(s,y)];
- it = symap.find(make_pair(s,y));
- if (it != symap.end())
- {
- b += t.t11*it->second;
- cnt += it->second;
- }
- else
- {
- b += t.t10;
- cnt += 1;
- }
- tphi[y] = b;
- N_s[y] = cnt;
- }
- }
- int N_max = 0;
- for (int j = 0; j < (int)t_lst.size(); ++ j)
- {
- int cur_n = N_s[t_lst[j]];
- if(cur_n > N_max) N_max = cur_n;
- }
- for (int j = 0; j < (int)t_lst.size(); ++ j)
- {
- string y = t_lst[j];
- double b = tphi[y];
- b/=N_max;
- tphi[y] = b;
- }
- opt_t llt(tphi, tfea, TFEATURE_NUM, TREG_END);
- llt.run_loglin(tcoeff);
- for (int j = 0; j < (int)t_lst.size(); ++ j)
- {
- string y = t_lst[j]; double comb = 0.0;
- double* fvec = tfea[y];
- for (int i = 0; i < TFEATURE_NUM; ++ i)
- comb += fvec[i]*tcoeff[i];
- comb = exp(comb);
- tphi[y] = comb/(1 + comb);
- }
- opt_st llst(symap, tau, s_lst, t_lst, stfea, SFEATURE_NUM, SREG_END);
- llst.run_loglin(stcoeff_p, stcoeff_m);
- for (int i = 0; i < (int)s_lst.size(); ++ i)
- for (int j = 0; j < (int)t_lst.size(); ++ j)
- {
- string s = s_lst[i], t = t_lst[j];
- double combp = 0.0, combm = 0.0;
- double* fvec = stfea[make_pair(s,t)];
- for (int k = 0; k < SFEATURE_NUM; ++ k)
- {
- combp += fvec[k]*stcoeff_p[k];
- combm += fvec[k]*stcoeff_m[k];
- }
- combp = exp(combp); combm = exp(combm);
- stphi[make_pair(s,t)] = make_pair(combp/(1+combp), combm/(1+combm));
- }
- }
- void em::gen_model(int i)
- {
- stringstream cmd, md, stf, yf, pf, succf;
- md << dir << "/model." << i;
- struct stat myStat;
- if(!(stat(md.str().c_str(), &myStat) == 0) || !((myStat.st_mode) & S_IFMT) == S_IFDIR)
- cmd << "mkdir " << md.str();
- system(cmd.str().c_str());
- stf << md.str() << "/weights_stphi.csv";
- FILE* stcoeff_out = fopen(stf.str().c_str(), "w");
- for (int j = 0; j < SFEATURE_NUM; ++ j)
- fprintf(stcoeff_out, "%lf,%lf\n", stcoeff_p[j], stcoeff_m[j]);
- fclose(stcoeff_out);
- yf << md.str() << "/weights_tphi.csv";
- FILE* tcoeff_out = fopen(yf.str().c_str(), "w");
- for (int j = 0; j < TFEATURE_NUM; ++ j)
- fprintf(tcoeff_out, "%lf\n", tcoeff[j]);
- fclose(tcoeff_out);
- pf << md.str() << "/tphi.csv";
- map<string,double>::iterator tphi_itr;
- FILE* year_out = fopen(pf.str().c_str(), "w");
- for (tphi_itr = tphi.begin(); tphi_itr != tphi.end(); ++ tphi_itr)
- fprintf(year_out, "%s,%lf\n", tphi_itr->first.c_str(), tphi_itr->second);
- fclose(year_out);
- succf << md.str() << "/succ";
- FILE* succf_out = fopen(succf.str().c_str(), "w");
- fclose(succf_out);
- }
- void em::run()
- {
- RUN_EM(1)
- }
- em::~em()
- {
- for (int i = 0; i < (int)t_lst.size(); ++ i)
- {
- string t = t_lst[i];
- delete []tfea[t];
- for (int j = 0; j < (int)s_lst.size(); ++j)
- {
- string s = s_lst[j];
- delete []stfea[make_pair(s,t)];
- }
- }
- delete []tcoeff;
- delete []stcoeff_p;
- delete []stcoeff_m;
- }