PageRenderTime 66ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/C/src/em_stl.cpp

https://gitlab.com/Laplace_wdd/secondary
C++ | 295 lines | 286 code | 9 blank | 0 comment | 40 complexity | b2cb5fdafffe7423d2a9d88acc16a80f MD5 | raw file
  1. #include "general.h"
  2. #include "opt_t.h"
  3. #include "opt_st.h"
  4. #include "em_macro.h"
  5. #include "em.h"
  6. vector<string> s_lst, t_lst;
  7. map<pair<string,string>,int> symap;
  8. map<pair<string,string>, pair<double,double> > stphi;
  9. map<string, double> tphi;
  10. map<string, double*> tfea;
  11. map<pair<string,string>, double*> stfea;
  12. double* tcoeff = new double[TFEATURE_NUM];
  13. double* stcoeff_p = new double[SFEATURE_NUM];
  14. double* stcoeff_m = new double[SFEATURE_NUM];
  15. double ori = 0.5;
  16. double lap = 0.001;
  17. string dir;
  18. int iteration=0;
  19. double em::likelihood()
  20. {
  21. double res = 0.0;
  22. map<pair<string,string>,int>::iterator it;
  23. for (int i = 0; i < (int)s_lst.size(); ++ i)
  24. {
  25. for (int j = 0; j < (int)t_lst.size(); ++ j)
  26. {
  27. string s = s_lst[i], t = t_lst[j];
  28. pair<double, double> p = stphi[make_pair(s,t)];
  29. double ap = p.first, am = p.second;
  30. double b = tphi[t];
  31. it = symap.find(make_pair(s,t));
  32. if (it != symap.end())
  33. res += log(ap*b+am*(1-b))*it->second;
  34. else
  35. res += log(1-ap*b-am*(1-b));
  36. }
  37. }
  38. return res;
  39. }
  40. em::em(int argc, const char* argv[])
  41. {
  42. char s[MAX_STRLENGTH],y[MAX_STRLENGTH]; int e;
  43. dir = argv[5];
  44. iteration = 0;
  45. while(1)
  46. {
  47. stringstream md;
  48. md << dir << "/model." << iteration + 1 << "/succ";
  49. ifstream ifile(md.str().c_str());
  50. if(!ifile) break;
  51. iteration ++;
  52. }
  53. if(iteration > 0)
  54. {
  55. stringstream cmd, wss, wts;
  56. cmd << dir << "/model." << iteration;
  57. cerr << "Loading Model: " << cmd.str() << endl;
  58. wss << cmd.str() << "/weights_stphi.csv";
  59. FILE* smodel_in = fopen(wss.str().c_str(), "r");
  60. int i = 0;
  61. while(!feof(smodel_in))
  62. {
  63. fscanf(smodel_in,"%lf%*c%lf\n", &stcoeff_p[i], &stcoeff_m[i]);
  64. i++;
  65. }
  66. fclose(smodel_in);
  67. wts << cmd.str() << "/weights_tphi.csv";
  68. FILE* tmodel_in = fopen(wts.str().c_str(), "r");
  69. i = 0;
  70. while(!feof(tmodel_in))
  71. fscanf(tmodel_in,"%lf\n",&tcoeff[i++]);
  72. fclose(tmodel_in);
  73. }
  74. else{
  75. for(int i = 0; i < SFEATURE_NUM; ++ i)
  76. {
  77. stcoeff_p[i] = ori;
  78. stcoeff_m[i] = ori;
  79. }
  80. if (argc == 6)
  81. {
  82. cerr << "Loading Model: Empty\n";
  83. for(int i = 0; i < TFEATURE_NUM; ++ i)
  84. tcoeff[i] = ori;
  85. }
  86. if (argc == 7)
  87. {
  88. cerr << "Loading Model: " << argv[6] << endl;
  89. FILE* model_in = fopen(argv[6], "r");
  90. int i = 0;
  91. while(!feof(model_in))
  92. fscanf(model_in,"%lf\n",&tcoeff[i++]);
  93. fclose (model_in);
  94. }
  95. }
  96. cerr << "Loading Events: " << argv[1] << endl;
  97. FILE* triple_in = fopen(argv[1], "r");
  98. while(!feof(triple_in))
  99. {
  100. fscanf(triple_in,"%s %s %d\n",s, y, &e);
  101. symap[make_pair(s,y)] = e;
  102. }
  103. fclose (triple_in);
  104. cerr << "Loading Sturcture List: " << argv[2] << endl;
  105. FILE* str_in = fopen(argv[2], "r");
  106. while(!feof(str_in))
  107. {
  108. fscanf(str_in,"%s\n",s);
  109. s_lst.push_back(s);
  110. }
  111. fclose (str_in);
  112. cerr << "Loading Structure Feature: " << argv[3] << endl;
  113. FILE* stf_in = fopen(argv[3], "r");
  114. while(!feof(stf_in))
  115. {
  116. fscanf(stf_in,"%s\n",y);
  117. double* f = new double[SFEATURE_NUM];
  118. char* sid = strtok(y, ","); char* ptk = strtok (NULL, ",");
  119. char* t = ptk; ptk=strtok (NULL, ",");
  120. int i = 0; double pdct = 0.0;
  121. while (ptk != NULL)
  122. {
  123. f[i] = atof(ptk);
  124. pdct += f[i]*stcoeff_p[i];
  125. ptk = strtok (NULL, ","); i++;
  126. }
  127. pdct=exp(pdct); pdct = pdct/(1+pdct); stfea[make_pair(sid,t)] = f;
  128. stphi[make_pair(sid,t)] = make_pair(pdct,pdct);
  129. }
  130. fclose (stf_in);
  131. cerr << "Loading Time Feature: " << argv[4] << endl;
  132. FILE* year_in = fopen(argv[4], "r");
  133. while(!feof(year_in))
  134. {
  135. fscanf(year_in,"%s\n",y);
  136. double* f = new double[TFEATURE_NUM];
  137. char* ym = strtok(y, ","); char* ptk = strtok (NULL, ",");
  138. t_lst.push_back(ym);
  139. int i = 0; double pdct = 0.0;
  140. while (ptk != NULL)
  141. {
  142. f[i] = atof(ptk);
  143. pdct += f[i]*tcoeff[i];
  144. ptk = strtok (NULL, ","); i++;
  145. }
  146. pdct=exp(pdct); tphi[ym] = pdct/(1+pdct); tfea[ym] = f;
  147. }
  148. fclose (year_in);
  149. cerr << "Loading Complete\n";
  150. }
  151. map<pair<string,string>, t_struct> tau;
  152. void em::E()
  153. {
  154. tau.clear();
  155. for (int i = 0; i < (int)s_lst.size(); ++ i)
  156. for (int j = 0; j < (int)t_lst.size(); ++ j)
  157. {
  158. string s = s_lst[i];
  159. string y = t_lst[j];
  160. pair<double, double> a = stphi[make_pair(s,y)];
  161. double ap = a.first, am = a.second, b = tphi[y], cp = ap*b, cm = am*b;
  162. t_struct t;
  163. t.t11 = b==1?1:cp/(cp+am-cm);
  164. t.t10 = (b-cp)==(1-am+cm-cp)?1:(b-cp)/(1-am+cm-cp);
  165. t.t01 = 1-t.t11;
  166. t.t00 = 1-t.t10;
  167. tau[make_pair(s,y)] = t;
  168. }
  169. }
  170. void em::M()
  171. {
  172. map<pair<string,string>,int>::iterator it;
  173. map<string, int> N_s;
  174. for (int j = 0; j < (int)t_lst.size(); ++ j)
  175. {
  176. string y = t_lst[j];
  177. double b = 0.0;
  178. int cnt = 0;
  179. for (int i = 0; i < (int)s_lst.size(); ++ i)
  180. {
  181. string s = s_lst[i];
  182. t_struct t = tau[make_pair(s,y)];
  183. it = symap.find(make_pair(s,y));
  184. if (it != symap.end())
  185. {
  186. b += t.t11*it->second;
  187. cnt += it->second;
  188. }
  189. else
  190. {
  191. b += t.t10;
  192. cnt += 1;
  193. }
  194. tphi[y] = b;
  195. N_s[y] = cnt;
  196. }
  197. }
  198. int N_max = 0;
  199. for (int j = 0; j < (int)t_lst.size(); ++ j)
  200. {
  201. int cur_n = N_s[t_lst[j]];
  202. if(cur_n > N_max) N_max = cur_n;
  203. }
  204. for (int j = 0; j < (int)t_lst.size(); ++ j)
  205. {
  206. string y = t_lst[j];
  207. double b = tphi[y];
  208. b/=N_max;
  209. tphi[y] = b;
  210. }
  211. opt_t llt(tphi, tfea, TFEATURE_NUM, TREG_END);
  212. llt.run_loglin(tcoeff);
  213. for (int j = 0; j < (int)t_lst.size(); ++ j)
  214. {
  215. string y = t_lst[j]; double comb = 0.0;
  216. double* fvec = tfea[y];
  217. for (int i = 0; i < TFEATURE_NUM; ++ i)
  218. comb += fvec[i]*tcoeff[i];
  219. comb = exp(comb);
  220. tphi[y] = comb/(1 + comb);
  221. }
  222. opt_st llst(symap, tau, s_lst, t_lst, stfea, SFEATURE_NUM, SREG_END);
  223. llst.run_loglin(stcoeff_p, stcoeff_m);
  224. for (int i = 0; i < (int)s_lst.size(); ++ i)
  225. for (int j = 0; j < (int)t_lst.size(); ++ j)
  226. {
  227. string s = s_lst[i], t = t_lst[j];
  228. double combp = 0.0, combm = 0.0;
  229. double* fvec = stfea[make_pair(s,t)];
  230. for (int k = 0; k < SFEATURE_NUM; ++ k)
  231. {
  232. combp += fvec[k]*stcoeff_p[k];
  233. combm += fvec[k]*stcoeff_m[k];
  234. }
  235. combp = exp(combp); combm = exp(combm);
  236. stphi[make_pair(s,t)] = make_pair(combp/(1+combp), combm/(1+combm));
  237. }
  238. }
  239. void em::gen_model(int i)
  240. {
  241. stringstream cmd, md, stf, yf, pf, succf;
  242. md << dir << "/model." << i;
  243. struct stat myStat;
  244. if(!(stat(md.str().c_str(), &myStat) == 0) || !((myStat.st_mode) & S_IFMT) == S_IFDIR)
  245. cmd << "mkdir " << md.str();
  246. system(cmd.str().c_str());
  247. stf << md.str() << "/weights_stphi.csv";
  248. FILE* stcoeff_out = fopen(stf.str().c_str(), "w");
  249. for (int j = 0; j < SFEATURE_NUM; ++ j)
  250. fprintf(stcoeff_out, "%lf,%lf\n", stcoeff_p[j], stcoeff_m[j]);
  251. fclose(stcoeff_out);
  252. yf << md.str() << "/weights_tphi.csv";
  253. FILE* tcoeff_out = fopen(yf.str().c_str(), "w");
  254. for (int j = 0; j < TFEATURE_NUM; ++ j)
  255. fprintf(tcoeff_out, "%lf\n", tcoeff[j]);
  256. fclose(tcoeff_out);
  257. pf << md.str() << "/tphi.csv";
  258. map<string,double>::iterator tphi_itr;
  259. FILE* year_out = fopen(pf.str().c_str(), "w");
  260. for (tphi_itr = tphi.begin(); tphi_itr != tphi.end(); ++ tphi_itr)
  261. fprintf(year_out, "%s,%lf\n", tphi_itr->first.c_str(), tphi_itr->second);
  262. fclose(year_out);
  263. succf << md.str() << "/succ";
  264. FILE* succf_out = fopen(succf.str().c_str(), "w");
  265. fclose(succf_out);
  266. }
  267. void em::run()
  268. {
  269. RUN_EM(1)
  270. }
  271. em::~em()
  272. {
  273. for (int i = 0; i < (int)t_lst.size(); ++ i)
  274. {
  275. string t = t_lst[i];
  276. delete []tfea[t];
  277. for (int j = 0; j < (int)s_lst.size(); ++j)
  278. {
  279. string s = s_lst[j];
  280. delete []stfea[make_pair(s,t)];
  281. }
  282. }
  283. delete []tcoeff;
  284. delete []stcoeff_p;
  285. delete []stcoeff_m;
  286. }