/ghmm-0.7.0/ghmm/discrime.c

http://github.com/jcnossen/hmmgenefinder · C · 1283 lines · 883 code · 219 blank · 181 comment · 225 complexity · 332c05a6fbdf4aa4414bae754c4cfe3d MD5 · raw file

  1. /*******************************************************************************
  2. *
  3. * This file is part of the General Hidden Markov Model Library,
  4. * GHMM version __VERSION__, see http://ghmm.org
  5. *
  6. * Filename: ghmm/ghmm/discrime.c
  7. * Authors: Janne Grunau
  8. *
  9. * Copyright (C) 1998-2004 Alexander Schliep
  10. * Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln
  11. * Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,
  12. * Berlin
  13. *
  14. * Contact: schliep@ghmm.org
  15. *
  16. * This library is free software; you can redistribute it and/or
  17. * modify it under the terms of the GNU Library General Public
  18. * License as published by the Free Software Foundation; either
  19. * version 2 of the License, or (at your option) any later version.
  20. *
  21. * This library is distributed in the hope that it will be useful,
  22. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  23. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  24. * Library General Public License for more details.
  25. *
  26. * You should have received a copy of the GNU Library General Public
  27. * License along with this library; if not, write to the Free
  28. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  29. *
  30. *
  31. * This file is version $Revision: 1322 $
  32. * from $Date: 2005-09-09 12:29:17 +0200 (Fri, 09 Sep 2005) $
  33. * last change by $Author: grunau $.
  34. *
  35. *******************************************************************************/
  36. #include <float.h>
  37. #include <stdio.h>
  38. #include <stdlib.h>
  39. #include <math.h>
  40. #include "ghmm.h"
  41. #include "mes.h"
  42. #include "matrix.h"
  43. #include "model.h"
  44. #include "foba.h"
  45. #include "reestimate.h"
  46. #include "gradescent.h"
  47. #include "discrime.h"
  48. #include <ghmm/internal.h>
  49. #define LAMBDA 0.14
  50. #define TRIM(o, n) ((1-LAMBDA)*(o) + LAMBDA*(n))
  51. #ifdef __STRICT_ANSI__
  52. #define logl(A) log(A)
  53. #endif
  54. #ifdef __STRICT_ANSI__
  55. #define expl(A) exp(A)
  56. #endif
  57. /* forward declaration */
  58. static int discrime_galloc (model ** mo, sequence_t ** sqs, int noC,
  59. double ******matrix_b, double *****matrix_a,
  60. double *****matrix_pi, long double ***omega,
  61. long double ****omegati, double ****log_p);
  62. static void discrime_gfree (model ** mo, sequence_t ** sqs, int noC,
  63. double *****matrix_b, double ****matrix_a,
  64. double ****matrix_pi, long double **omega,
  65. long double ***omegati, double ***log_p);
  66. static void discrime_trim_gradient (double *new, int length);
  67. double discrime_lambda = 0.0;
  68. double discrime_alpha = 1.0;
  69. /*----------------------------------------------------------------------------*/
  70. /** allocates memory for m and n matrices: */
  71. static int discrime_galloc (model ** mo, sequence_t ** sqs, int noC,
  72. double ******matrix_b, double *****matrix_a,
  73. double *****matrix_pi, long double ***omega,
  74. long double ****omegati, double ****log_p)
  75. {
  76. #define CUR_PROC "discrime_galloc"
  77. int i, k, l, m;
  78. /* first allocate memory for matrix_b: */
  79. ARRAY_CALLOC (*matrix_b, noC);
  80. for (k = 0; k < noC; k++) {
  81. ARRAY_CALLOC ((*matrix_b)[k], sqs[k]->seq_number);
  82. for (l = 0; l < sqs[k]->seq_number; l++) {
  83. ARRAY_CALLOC ((*matrix_b)[k][l], noC);
  84. for (m = 0; m < noC; m++) {
  85. ARRAY_CALLOC ((*matrix_b)[k][l][m], mo[m]->N);
  86. for (i = 0; i < mo[m]->N; i++)
  87. ARRAY_CALLOC ((*matrix_b)[k][l][m][i], model_ipow (mo[m], mo[m]->M, mo[m]->s[i].order + 1));
  88. }
  89. }
  90. }
  91. /* matrix_a(k,l,i,j) = matrix_a[k][l][i*mo->N + j] */
  92. ARRAY_CALLOC (*matrix_a, noC);
  93. for (k = 0; k < noC; k++) {
  94. ARRAY_CALLOC ((*matrix_a)[k], sqs[k]->seq_number);
  95. for (l = 0; l < sqs[k]->seq_number; l++) {
  96. ARRAY_CALLOC ((*matrix_a)[k][l], noC);
  97. for (m = 0; m < noC; m++)
  98. ARRAY_CALLOC ((*matrix_a)[k][l][m], mo[m]->N * mo[m]->N);
  99. }
  100. }
  101. /* allocate memory for matrix_pi */
  102. ARRAY_CALLOC (*matrix_pi, noC);
  103. for (k = 0; k < noC; k++) {
  104. ARRAY_CALLOC ((*matrix_pi)[k], sqs[k]->seq_number);
  105. for (l = 0; l < sqs[k]->seq_number; l++) {
  106. ARRAY_CALLOC ((*matrix_pi)[k][l], noC);
  107. for (m = 0; m < noC; m++)
  108. ARRAY_CALLOC ((*matrix_pi)[k][l][m], mo[m]->N);
  109. }
  110. }
  111. /* allocate memory for matrices of likelihoods
  112. log_p[k][l][m] =
  113. log_prob of l-th sequence of k-th class under the m-th model */
  114. ARRAY_CALLOC (*log_p, noC);
  115. for (k = 0; k < noC; k++) {
  116. ARRAY_CALLOC ((*log_p)[k], sqs[k]->seq_number);
  117. for (l = 0; l < sqs[k]->seq_number; l++)
  118. ARRAY_CALLOC ((*log_p)[k][l], noC);
  119. }
  120. /* allocate memory for outer derivatives */
  121. ARRAY_CALLOC (*omega, noC);
  122. for (k = 0; k < noC; k++)
  123. ARRAY_CALLOC ((*omega)[k], sqs[k]->seq_number);
  124. /* allocate memory for omega tilde. NB: size(omega)*noC == size(omegati) */
  125. ARRAY_CALLOC (*omegati, noC);
  126. for (k = 0; k < noC; k++) {
  127. ARRAY_CALLOC ((*omegati)[k], sqs[k]->seq_number);
  128. for (l = 0; l < sqs[k]->seq_number; l++)
  129. ARRAY_CALLOC ((*omegati)[k][l], noC);
  130. }
  131. return 0;
  132. STOP: /* Label STOP from ARRAY_[CM]ALLOC */
  133. discrime_gfree (mo, sqs, noC, *matrix_b, *matrix_a, *matrix_pi,
  134. *omega, *omegati, *log_p);
  135. return -1;
  136. #undef CUR_PROC
  137. }
  138. /*----------------------------------------------------------------------------*/
  139. /** frees memory for expectation matrices for all classes & training sequences*/
  140. static void discrime_gfree (model ** mo, sequence_t ** sqs, int noC,
  141. double *****matrix_b, double ****matrix_a,
  142. double ****matrix_pi, long double **omega,
  143. long double ***omegati, double ***log_p)
  144. {
  145. #define CUR_PROC "discrime_gfree"
  146. int i, k, l, m;
  147. for (k = 0; k < noC; k++) {
  148. for (l = 0; l < sqs[k]->seq_number; l++) {
  149. for (m = 0; m < noC; m++) {
  150. for (i = 0; i < mo[m]->N; i++)
  151. m_free (matrix_b[k][l][m][i]);
  152. m_free (matrix_b[k][l][m]);
  153. }
  154. m_free (matrix_b[k][l]);
  155. }
  156. m_free (matrix_b[k]);
  157. }
  158. m_free (matrix_b);
  159. for (k = 0; k < noC; k++) {
  160. for (l = 0; l < sqs[k]->seq_number; l++) {
  161. for (m = 0; m < noC; m++)
  162. m_free (matrix_a[k][l][m]);
  163. m_free (matrix_a[k][l]);
  164. }
  165. m_free (matrix_a[k]);
  166. }
  167. m_free (matrix_a);
  168. for (k = 0; k < noC; k++) {
  169. for (l = 0; l < sqs[k]->seq_number; l++) {
  170. for (m = 0; m < noC; m++)
  171. m_free (matrix_pi[k][l][m]);
  172. m_free (matrix_pi[k][l]);
  173. }
  174. m_free (matrix_pi[k]);
  175. }
  176. m_free (matrix_pi);
  177. for (k = 0; k < noC; k++) {
  178. for (l = 0; l < sqs[k]->seq_number; l++)
  179. m_free (log_p[k][l]);
  180. m_free (log_p[k]);
  181. }
  182. m_free (log_p);
  183. for (k = 0; k < noC; k++)
  184. m_free (omega[k]);
  185. m_free (omega);
  186. for (k = 0; k < noC; k++) {
  187. for (l = 0; l < sqs[k]->seq_number; l++)
  188. m_free (omegati[k][l]);
  189. m_free (omegati[k]);
  190. }
  191. m_free (omegati);
  192. return;
  193. #undef CUR_PROC
  194. }
  195. /*----------------------------------------------------------------------------*/
  196. static int discrime_calculate_omega (model ** mo, sequence_t ** sqs, int noC,
  197. long double **omega, long double ***omegati,
  198. double ***log_p)
  199. {
  200. #define CUR_PROC "discrime_calculate_omega"
  201. int k, l, m;
  202. int seq_no, argmax = 0;
  203. double sum, max;
  204. double exponent;
  205. long double sigmoid;
  206. /* compute outer derivatives */
  207. /* iterate over all classes */
  208. for (k = 0; k < noC; k++) {
  209. seq_no = sqs[k]->seq_number;
  210. /*iterate over all training sequences */
  211. for (l = 0; l < seq_no; l++) {
  212. max = 1.0;
  213. /* calculate log(sum[prob]) for logarithmic probabilities
  214. first determine the maximum */
  215. for (m = 0; m < noC; m++) {
  216. if (m != k
  217. && (1.0 == max || max < (log_p[k][l][m] + log (mo[m]->prior)))) {
  218. max = log_p[k][l][m] + log (mo[m]->prior);
  219. argmax = m;
  220. }
  221. }
  222. /* sum */
  223. sum = 1.0;
  224. for (m = 0; m < noC; m++) {
  225. if (m != k && m != argmax)
  226. sum += exp (log_p[k][l][m] + log (mo[m]->prior) - max);
  227. }
  228. sum = log (sum) + max;
  229. exponent = log_p[k][l][k] + log (mo[k]->prior) - sum;
  230. if (exponent < logl (LDBL_MIN)) {
  231. printf ("exponent was too large (%g) cut it down!\n", exponent);
  232. exponent = (double) logl (LDBL_MIN);
  233. }
  234. sigmoid = 1.0 / (1.0 + expl ((-discrime_alpha) * exponent));
  235. omega[k][l] = discrime_alpha * sigmoid * (1.0 - sigmoid);
  236. /* printf("omega[%d][%d] = %Lg\n",k ,l, omega[k][l]); */
  237. /* omegati is not useful for m==k equation (2.9) */
  238. for (m = 0; m < noC; m++) {
  239. if (m == k)
  240. omegati[k][l][m] = 0;
  241. else
  242. omegati[k][l][m] =
  243. omega[k][l] * expl (log_p[k][l][m] -
  244. sum) * (long double) mo[m]->prior;
  245. /* printf("%Lg, ", expl(log_p[k][l][m] - sum) * (long double)mo[m]->prior); */
  246. /* printf("omegati[%d][%d][%d] = %Lg\n",k ,l, m, omegati[k][l][m]); */
  247. }
  248. /* printf("\n"); */
  249. }
  250. }
  251. return 0;
  252. #undef CUR_PROC
  253. }
  254. /*----------------------------------------------------------------------------*/
  255. static int discrime_precompute (model ** mo, sequence_t ** sqs, int noC,
  256. double *****expect_b, double ****expect_a,
  257. double ****expect_pi, double ***log_p)
  258. {
  259. #define CUR_PROC "discrime_precompute"
  260. int k, l, m;
  261. int seq_len, success = 1;
  262. /* forward, backward variables and scaler */
  263. double **alpha, **beta, *scale;
  264. sequence_t *sq;
  265. /* iterate over all classes */
  266. for (k = 0; k < noC; k++) {
  267. sq = sqs[k];
  268. /*iterate over all training sequences */
  269. for (l = 0; l < sq->seq_number; l++) {
  270. seq_len = sq->seq_len[l];
  271. /* iterate over all classes */
  272. for (m = 0; m < noC; m++) {
  273. if (-1 ==
  274. reestimate_alloc_matvek (&alpha, &beta, &scale, seq_len,
  275. mo[m]->N))
  276. return -1;
  277. /* calculate forward and backward variables without labels: */
  278. if (-1 == foba_forward (mo[m], sq->seq[l], seq_len, alpha, scale,
  279. &(log_p[k][l][m]))) {
  280. success = 0;
  281. printf ("forward\n");
  282. goto FREE;
  283. }
  284. if (-1 == foba_backward (mo[m], sq->seq[l], seq_len, beta, scale)) {
  285. success = 0;
  286. printf ("backward\n");
  287. goto FREE;
  288. }
  289. /* compute expectation matrices
  290. expect_*[k][l][m] holds the expected number how ofter a particular
  291. parameter of model m is used by l-th training sequence of class k */
  292. gradescent_compute_expectations (mo[m], alpha, beta, scale,
  293. sq->seq[l], seq_len,
  294. expect_b[k][l][m], expect_a[k][l][m],
  295. expect_pi[k][l][m]);
  296. FREE:
  297. reestimate_free_matvek (alpha, beta, scale, seq_len);
  298. if (!success)
  299. return -1;
  300. }
  301. }
  302. }
  303. return 0;
  304. #undef CUR_PROC
  305. }
  306. /*----------------------------------------------------------------------------*/
  307. double discrime_compute_performance (model ** mo, sequence_t ** sqs, int noC)
  308. {
  309. #define CUR_PROC "discrime_compute_performance"
  310. int k, l, m, temp;
  311. int argmax = 0;
  312. double sum, max;
  313. double *logp;
  314. double exponent;
  315. long double sigmoid;
  316. double performance = 0.0;
  317. sequence_t *sq;
  318. ARRAY_CALLOC (logp, noC);
  319. /* iterate over all classes */
  320. for (k = 0; k < noC; k++) {
  321. sq = sqs[k];
  322. /*iterate over all training sequences */
  323. for (l = 0; l < sq->seq_number; l++) {
  324. /* iterate over all classes */
  325. for (m = 0; m < noC; m++) {
  326. temp = foba_logp (mo[m], sq->seq[l], sq->seq_len[l], &(logp[m]));
  327. if (0 != temp)
  328. printf ("foba_logp error in sequence[%d][%d] under model %d (%g)\n",
  329. k, l, m, logp[m]);
  330. /*printf("foba_logp sequence[%d][%d] under model %d (%g)\n", k, l, m, logp[m]);*/
  331. }
  332. max = 1.0;
  333. for (m = 0; m < noC; m++) {
  334. if (m != k && (1.0 == max || max < (logp[m] + log (mo[m]->prior)))) {
  335. max = logp[m] + log (mo[m]->prior);
  336. argmax = m;
  337. }
  338. }
  339. /* sum */
  340. sum = 1.0;
  341. for (m = 0; m < noC; m++) {
  342. if (m != k && m != argmax)
  343. sum += exp (logp[m] + log (mo[m]->prior) - max);
  344. }
  345. sum = log (sum) + max;
  346. exponent = logp[k] + log (mo[k]->prior) - sum;
  347. if (exponent < logl (LDBL_MIN)) {
  348. printf ("exponent was too large (%g) cut it down!\n", exponent);
  349. exponent = (double) logl (LDBL_MIN);
  350. }
  351. sigmoid = 1.0 / (1.0 + expl ((-discrime_alpha) * exponent));
  352. performance += (double) sigmoid;
  353. }
  354. }
  355. m_free (logp);
  356. STOP: /* Label STOP from ARRAY_[CM]ALLOC */
  357. return performance;
  358. #undef CUR_PROC
  359. }
  360. /*----------------------------------------------------------------------------*/
  361. static void discrime_print_statistics (model ** mo, sequence_t ** sqs, int noC,
  362. int *falseP, int *falseN)
  363. {
  364. #define CUR_PROC "discrime_print_statistics"
  365. int k, l, m;
  366. int argmax;
  367. double *logp, max;
  368. sequence_t *sq;
  369. ARRAY_CALLOC (logp, noC);
  370. for (k = 0; k < noC; k++) {
  371. falseP[k] = 0;
  372. falseN[k] = 0;
  373. }
  374. for (k = 0; k < noC; k++) {
  375. sq = sqs[k];
  376. printf ("Looking at training tokens of Class %d\n", k);
  377. for (l = 0; l < sq->seq_number; l++) {
  378. argmax = 0, max = -DBL_MAX;
  379. for (m = 0; m < noC; m++) {
  380. foba_logp (mo[m], sq->seq[l], sq->seq_len[l], &(logp[m]));
  381. if (m == 0 || max < logp[m]) {
  382. max = logp[m];
  383. argmax = m;
  384. }
  385. }
  386. if (sq->seq_number < 11 && noC < 8) {
  387. /* printing fancy statistics */
  388. printf ("%2d: %8.4g", l, logp[0] - logp[argmax]);
  389. for (m = 1; m < noC; m++)
  390. printf (", %8.4g", logp[m] - logp[argmax]);
  391. printf (" \t+(%g)\n", logp[argmax]);
  392. }
  393. /* counting false positives and false negatives */
  394. if (argmax != k) {
  395. falseP[argmax]++;
  396. falseN[k]++;
  397. }
  398. }
  399. printf ("%d false negatives in class %d.\n", falseN[k], k);
  400. }
  401. STOP: /* Label STOP from ARRAY_[CM]ALLOC */
  402. m_free (logp);
  403. return;
  404. #undef CUR_PROC
  405. }
  406. /*----------------------------------------------------------------------------*/
  407. static void discrime_trim_gradient (double *new, int length)
  408. {
  409. #define CUR_PROC "discrime_trim_gradient"
  410. int i, argmin = 0;
  411. double min, sum = 0;
  412. /* printf("unnormalised: %g", new[0]); */
  413. /* for (i=1; i<length; i++) */
  414. /* printf(", %g", new[i]); */
  415. /* printf("\n"); */
  416. for (i = 1; i < length; i++)
  417. if (new[i] < new[argmin])
  418. argmin = i;
  419. min = new[argmin];
  420. if (new[argmin] < 0.0)
  421. for (i = 0; i < length; i++)
  422. new[i] -= (1.1 * min);
  423. for (i = 0; i < length; i++)
  424. sum += new[i];
  425. for (i = 0; i < length; i++)
  426. new[i] /= sum;
  427. /* printf(" normalised: %g", new[0]); */
  428. /* for (i=1; i<length; i++) */
  429. /* printf(", %g", new[i]); */
  430. /* printf("\n"); */
  431. return;
  432. #undef CUR_PROC
  433. }
  434. /*----------------------------------------------------------------------------*/
  435. static void discrime_update_pi_gradient (model ** mo, sequence_t ** sqs, int noC,
  436. int class, double ****expect_pi,
  437. long double **omega, long double ***omegati)
  438. {
  439. #define CUR_PROC "discrime_update_pi_gradient"
  440. int i, l, m;
  441. double *pi_old = NULL;
  442. double *pi_new = NULL;
  443. double sum;
  444. sequence_t *sq = NULL;
  445. ARRAY_CALLOC (pi_old, mo[class]->N);
  446. ARRAY_CALLOC (pi_new, mo[class]->N);
  447. /* itarate over alls state of the current model */
  448. for (i = 0; i < mo[class]->N; i++) {
  449. /* iterate over all classes */
  450. sum = 0.0;
  451. for (m = 0; m < noC; m++) {
  452. sq = sqs[m];
  453. /*iterate over all training sequences */
  454. if (m == class)
  455. for (l = 0; l < sq->seq_number; l++)
  456. sum += omega[class][l] * expect_pi[class][l][class][i];
  457. else
  458. for (l = 0; l < sq->seq_number; l++)
  459. sum -= omegati[m][l][class] * expect_pi[m][l][class][i];
  460. }
  461. /* check for valid new parameter */
  462. pi_old[i] = mo[class]->s[i].pi;
  463. if (fabs (pi_old[i]) == 0.0)
  464. pi_new[i] = pi_old[i];
  465. else
  466. pi_new[i] = pi_old[i] + discrime_lambda * (sum / pi_old[i]);
  467. }
  468. /* change paramters to fit into valid parameter range */
  469. discrime_trim_gradient (pi_new, mo[class]->N);
  470. /* update parameters */
  471. for (i = 0; i < mo[class]->N; i++) {
  472. /* printf("update parameter Pi in state %d of model %d with: %5.3g",
  473. i, class, pi_new[i]);
  474. printf(" (%5.3g) old value.\n", pi_old[i]); */
  475. mo[class]->s[i].pi = pi_new[i];
  476. }
  477. STOP: /* Label STOP from ARRAY_[CM]ALLOC */
  478. m_free (pi_old);
  479. m_free (pi_new);
  480. return;
  481. #undef CUR_PROC
  482. }
  483. /*----------------------------------------------------------------------------*/
  484. static void discrime_update_a_gradient (model ** mo, sequence_t ** sqs, int noC,
  485. int class, double ****expect_a,
  486. long double **omega, long double ***omegati)
  487. {
  488. #define CUR_PROC "discrime_update_a_gradient"
  489. int i, j, g, l, m;
  490. int j_id;
  491. double *a_old = NULL;
  492. double *a_new = NULL;
  493. double sum;
  494. sequence_t *sq = NULL;
  495. ARRAY_CALLOC (a_old, mo[class]->N);
  496. ARRAY_CALLOC (a_new, mo[class]->N);
  497. /* updating current class */
  498. /* itarate over all states of the current model */
  499. for (i = 0; i < mo[class]->N; i++) {
  500. for (j = 0; j < mo[class]->s[i].out_states; j++) {
  501. j_id = mo[class]->s[i].out_id[j];
  502. sum = 0.0;
  503. /* iterate over all classes and training sequences */
  504. for (m = 0; m < noC; m++) {
  505. sq = sqs[m];
  506. if (m == class)
  507. for (l = 0; l < sq->seq_number; l++)
  508. sum +=
  509. omega[class][l] * expect_a[class][l][+class][i * mo[class]->N +
  510. j_id];
  511. else
  512. for (l = 0; l < sq->seq_number; l++)
  513. sum -=
  514. omegati[m][l][class] * expect_a[m][l][class][i * mo[class]->N +
  515. j_id];
  516. }
  517. /* check for valid new parameter */
  518. a_old[j] = mo[class]->s[i].out_a[j];
  519. if (fabs (a_old[j]) == 0.0)
  520. a_new[j] = a_old[j];
  521. else
  522. a_new[j] = a_old[j] + discrime_lambda * (sum / a_old[j]);
  523. }
  524. /* change paramters to fit into valid parameter range */
  525. discrime_trim_gradient (a_new, mo[class]->s[i].out_states);
  526. /* update parameter */
  527. for (j = 0; j < mo[class]->s[i].out_states; j++) {
  528. /* printf("update parameter A[%d] in state %d of model %d with: %5.3g",
  529. j, i, class, a_new[j]);
  530. printf(" (%5.3g) old value.\n", a_old[j]); */
  531. mo[class]->s[i].out_a[j] = a_new[j];
  532. /* mirror out_a to corresponding in_a */
  533. j_id = mo[class]->s[i].out_id[j];
  534. for (g = 0; g < mo[class]->s[j_id].in_states; g++)
  535. if (i == mo[class]->s[j_id].in_id[g]) {
  536. mo[class]->s[j_id].in_a[g] = mo[class]->s[i].out_a[j];
  537. break;
  538. }
  539. }
  540. }
  541. STOP: /* Label STOP from ARRAY_[CM]ALLOC */
  542. m_free (a_old);
  543. m_free (a_new);
  544. return;
  545. #undef CUR_PROC
  546. }
  547. /*----------------------------------------------------------------------------*/
  548. static void discrime_update_b_gradient (model ** mo, sequence_t ** sqs, int noC,
  549. int class, double *****expect_b,
  550. long double **omega, long double ***omegati)
  551. {
  552. #define CUR_PROC "discrime_update_b_gradient"
  553. int i, h, l, m;
  554. int hist, size;
  555. double *b_old = NULL;
  556. double *b_new = NULL;
  557. double sum;
  558. ARRAY_CALLOC (b_old, mo[class]->M);
  559. ARRAY_CALLOC (b_new, mo[class]->M);
  560. /* updating current class */
  561. /* itarate over alls state of the current model */
  562. for (i = 0; i < mo[class]->N; i++) {
  563. if (mo[class]->s[i].fix)
  564. continue;
  565. size = (int) pow (mo[class]->M, mo[class]->s[i].order + 1);
  566. for (hist = 0; hist < size; hist += mo[class]->M) {
  567. for (h = hist; h < hist + mo[class]->M; h++) {
  568. /* iterate over all classes and training sequences */
  569. sum = 0.0;
  570. for (m = 0; m < noC; m++) {
  571. if (m == class)
  572. for (l = 0; l < sqs[m]->seq_number; l++)
  573. sum += omega[class][l] * expect_b[class][l][class][i][h];
  574. else
  575. for (l = 0; l < sqs[m]->seq_number; l++)
  576. sum -= omegati[m][l][class] * expect_b[m][l][class][i][h];
  577. }
  578. /* check for valid new parameters */
  579. b_old[h] = mo[class]->s[i].b[h];
  580. if (fabs (b_old[h]) == 0.0)
  581. b_new[h] = b_old[h];
  582. else
  583. b_new[h] = b_old[h] + discrime_lambda * (sum / b_old[h]);
  584. }
  585. /* change parameters to fit into valid parameter range */
  586. discrime_trim_gradient (b_new, mo[0]->M);
  587. for (h = hist; h < hist + mo[class]->M; h++) {
  588. /* update parameters */
  589. /*printf("update parameter B[%d] in state %d of model %d with: %5.3g",
  590. h, i, class, b_new[h]);
  591. printf(" (%5.3g) old value.\n", b_old[h]); */
  592. mo[class]->s[i].b[h] = b_new[h];
  593. }
  594. }
  595. }
  596. STOP: /* Label STOP from ARRAY_[CM]ALLOC */
  597. m_free (b_old);
  598. m_free (b_new);
  599. return;
  600. #undef CUR_PROC
  601. }
  602. /*----------------------------------------------------------------------------*/
  603. static void discrime_update_pi_closed (model ** mo, sequence_t ** sqs, int noC,
  604. int class, double lfactor,
  605. double ****expect_pi, long double **omega,
  606. long double ***omegati)
  607. {
  608. #define CUR_PROC "discrime_update_pi_closed"
  609. int i, l, m;
  610. double *pi_old = NULL;
  611. double *pi_new = NULL;
  612. double sum, lagrangian;
  613. sequence_t *sq = NULL;
  614. ARRAY_CALLOC (pi_old, mo[class]->N);
  615. ARRAY_CALLOC (pi_new, mo[class]->N);
  616. /* updating current class (all or only specified) */
  617. /* itarate over alls state of the k-th model to compute lagrangian multiplier */
  618. lagrangian = 0.0;
  619. for (i = 0; i < mo[class]->N; i++) {
  620. /* iterate over all classes */
  621. for (m = 0; m < noC; m++) {
  622. sq = sqs[m];
  623. /* if current class and class of training sequence match, add the first term */
  624. if (m == class)
  625. for (l = 0; l < sq->seq_number; l++)
  626. lagrangian -= omega[class][l] * expect_pi[class][l][class][i];
  627. /* otherwise the second */
  628. else
  629. for (l = 0; l < sq->seq_number; l++)
  630. lagrangian +=
  631. lfactor * omegati[m][l][class] * expect_pi[m][l][class][i];
  632. }
  633. }
  634. for (i = 0; i < mo[class]->N; i++) {
  635. /* iterate over all classes */
  636. sum = 0.0;
  637. for (m = 0; m < noC; m++) {
  638. sq = sqs[m];
  639. /*iterate over all training sequences */
  640. if (m == class)
  641. for (l = 0; l < sq->seq_number; l++)
  642. sum -= omega[class][l] * expect_pi[class][l][class][i];
  643. else
  644. for (l = 0; l < sq->seq_number; l++)
  645. sum += lfactor * omegati[m][l][class] * expect_pi[m][l][class][i];
  646. }
  647. /* check for valid new parameter */
  648. pi_old[i] = mo[class]->s[i].pi;
  649. if (fabs (lagrangian) == 0.0)
  650. pi_new[i] = pi_old[i];
  651. else
  652. pi_new[i] = sum / lagrangian;
  653. }
  654. /* update parameters */
  655. for (i = 0; i < mo[class]->N; i++) {
  656. /* printf("update parameter Pi in state %d of model %d with: %5.3g",
  657. i, class, pi_new[i]);
  658. printf(" (%5.3g) old value.\n", pi_old[i]); */
  659. mo[class]->s[i].pi = TRIM (pi_old[i], pi_new[i]);
  660. }
  661. STOP: /* Label STOP from ARRAY_[CM]ALLOC */
  662. m_free (pi_old);
  663. m_free (pi_new);
  664. return;
  665. #undef CUR_PROC
  666. }
  667. /*----------------------------------------------------------------------------*/
  668. static void discrime_update_a_closed (model ** mo, sequence_t ** sqs, int noC,
  669. int class, double lfactor, double ****expect_a,
  670. long double **omega, long double ***omegati)
  671. {
  672. #define CUR_PROC "discrime_update_a_closed"
  673. int i, j, g, l, m;
  674. int j_id;
  675. double *a_old = NULL;
  676. double *a_new = NULL;
  677. double sum, lagrangian;
  678. sequence_t *sq = NULL;
  679. ARRAY_CALLOC (a_old, mo[class]->N);
  680. ARRAY_CALLOC (a_new, mo[class]->N);
  681. /* updating current class (all or only specified) */
  682. /* itarate over all states of the k-th model */
  683. for (i = 0; i < mo[class]->N; i++) {
  684. /* compute lagrangian multiplier */
  685. lagrangian = 0.0;
  686. for (j = 0; j < mo[class]->s[i].out_states; j++) {
  687. j_id = mo[class]->s[i].out_id[j];
  688. /* iterate over all classes and training sequences */
  689. for (m = 0; m < noC; m++) {
  690. sq = sqs[m];
  691. if (m == class)
  692. for (l = 0; l < sq->seq_number; l++)
  693. lagrangian -=
  694. omega[class][l] * expect_a[class][l][class][i * mo[class]->N +
  695. j_id];
  696. else
  697. for (l = 0; l < sq->seq_number; l++)
  698. lagrangian +=
  699. lfactor * omegati[m][l][class] * expect_a[m][l][class][i *
  700. mo
  701. [class]->
  702. N +
  703. j_id];
  704. }
  705. }
  706. for (j = 0; j < mo[class]->s[i].out_states; j++) {
  707. j_id = mo[class]->s[i].out_id[j];
  708. sum = 0.0;
  709. /* iterate over all classes and training sequences */
  710. for (m = 0; m < noC; m++) {
  711. sq = sqs[m];
  712. if (m == class)
  713. for (l = 0; l < sq->seq_number; l++)
  714. sum -=
  715. omega[class][l] * expect_a[class][l][+class][i * mo[class]->N +
  716. j_id];
  717. else
  718. for (l = 0; l < sq->seq_number; l++)
  719. sum +=
  720. lfactor * omegati[m][l][class] * expect_a[m][l][class][i *
  721. mo
  722. [class]->
  723. N +
  724. j_id];
  725. }
  726. /* check for valid new parameter */
  727. a_old[j] = mo[class]->s[i].out_a[j];
  728. if (fabs (lagrangian) == 0.0)
  729. a_new[j] = a_old[j];
  730. else
  731. a_new[j] = sum / lagrangian;
  732. }
  733. /* update parameter */
  734. for (j = 0; j < mo[class]->s[i].out_states; j++) {
  735. mo[class]->s[i].out_a[j] = TRIM (a_old[j], a_new[j]);
  736. /*printf("update parameter A[%d] in state %d of model %d with: %5.3g",
  737. j, i, class, mo[class]->s[i].out_a[j]);
  738. printf(" (%5.3g) old value, %5.3g estimted\n", a_old[j], a_new[j]); */
  739. /* mirror out_a to corresponding in_a */
  740. j_id = mo[class]->s[i].out_id[j];
  741. for (g = 0; g < mo[class]->s[j_id].in_states; g++)
  742. if (i == mo[class]->s[j_id].in_id[g]) {
  743. mo[class]->s[j_id].in_a[g] = mo[class]->s[i].out_a[j];
  744. break;
  745. }
  746. }
  747. }
  748. STOP: /* Label STOP from ARRAY_[CM]ALLOC */
  749. m_free (a_old);
  750. m_free (a_new);
  751. return;
  752. #undef CUR_PROC
  753. }
  754. /*----------------------------------------------------------------------------*/
  755. static void discrime_update_b_closed (model ** mo, sequence_t ** sqs, int noC,
  756. int class, double lfactor,
  757. double *****expect_b, long double **omega,
  758. long double ***omegati)
  759. {
  760. #define CUR_PROC "discrime_update_b_closed"
  761. int i, h, l, m;
  762. int hist, size;
  763. double *b_old = NULL;
  764. double *b_new = NULL;
  765. double sum, lagrangian;
  766. ARRAY_CALLOC (b_old, mo[class]->M);
  767. ARRAY_CALLOC (b_new, mo[class]->M);
  768. /* itarate over alls state of the k-th model */
  769. for (i = 0; i < mo[class]->N; i++) {
  770. if (mo[class]->s[i].fix)
  771. continue;
  772. size = (int) pow (mo[class]->M, mo[class]->s[i].order + 1);
  773. for (hist = 0; hist < size; hist += mo[class]->M) {
  774. /* compute lagrangian multiplier */
  775. lagrangian = 0.0;
  776. for (h = hist; h < hist + mo[class]->M; h++) {
  777. /* iterate over all classes and training sequences */
  778. for (m = 0; m < noC; m++) {
  779. if (m == class)
  780. for (l = 0; l < sqs[m]->seq_number; l++)
  781. lagrangian -= omega[class][l] * expect_b[class][l][class][i][h];
  782. else
  783. for (l = 0; l < sqs[m]->seq_number; l++)
  784. lagrangian +=
  785. lfactor * omegati[m][l][class] * expect_b[m][l][class][i][h];
  786. }
  787. }
  788. for (h = hist; h < hist + mo[class]->M; h++) {
  789. /* iterate over all classes and training sequences */
  790. sum = 0.0;
  791. for (m = 0; m < noC; m++) {
  792. if (m == class)
  793. for (l = 0; l < sqs[m]->seq_number; l++)
  794. sum -= omega[class][l] * expect_b[class][l][class][i][h];
  795. else
  796. for (l = 0; l < sqs[m]->seq_number; l++)
  797. sum +=
  798. lfactor * omegati[m][l][class] * expect_b[m][l][class][i][h];
  799. }
  800. /* check for valid new parameters */
  801. b_old[h] = mo[class]->s[i].b[h];
  802. if (fabs (lagrangian) == 0.0)
  803. b_new[h] = b_old[h];
  804. else
  805. b_new[h] = sum / lagrangian;
  806. }
  807. /* update parameters */
  808. for (h = hist; h < hist + mo[class]->M; h++) {
  809. mo[class]->s[i].b[h] = TRIM (b_old[h], b_new[h]);
  810. /*printf("update parameter B[%d] in state %d of model %d with: %5.3g",
  811. h, i, class, mo[class]->s[i].b[h]);
  812. printf(" (%5.3g) old value, estimate %5.3g\n", b_old[h], b_new[h]); */
  813. }
  814. }
  815. }
  816. STOP: /* Label STOP from ARRAY_[CM]ALLOC */
  817. m_free (b_old);
  818. m_free (b_new);
  819. return;
  820. #undef CUR_PROC
  821. }
  822. /*----------------------------------------------------------------------------*/
  823. static void discrime_find_factor (model * mo, sequence_t ** sqs, int noC, int k,
  824. double lfactor, double ****expect_pi,
  825. double ****expect_a, double *****expect_b,
  826. long double **omega, long double ***omegati)
  827. {
  828. #define CUR_PROC "driscrime_find_factor"
  829. int h, i, j, l, m;
  830. int size, hist, j_id;
  831. double self, other;
  832. sequence_t *sq;
  833. /* itarate over all states of the k-th model */
  834. for (i = 0; i < mo->N; i++) {
  835. /* PI */
  836. /* iterate over all classes */
  837. self = other = 0.0;
  838. for (m = 0; m < noC; m++) {
  839. sq = sqs[m];
  840. /* if current class and class of training sequence match, add the first term */
  841. if (m == k)
  842. for (l = 0; l < sq->seq_number; l++)
  843. self += omega[k][l] * expect_pi[k][l][k][i];
  844. /* otherwise the second */
  845. else
  846. for (l = 0; l < sq->seq_number; l++)
  847. other += lfactor * omegati[m][l][k] * expect_pi[m][l][k][i];
  848. }
  849. if (self < other)
  850. lfactor *= self / other;
  851. /*printf("PI[%d]: %g - %g = %g \t %g\n", k, self, other, self-other, lfactor);*/
  852. /* A */
  853. for (j = 0; j < mo->s[i].out_states; j++) {
  854. j_id = mo->s[i].out_id[j];
  855. /* iterate over all classes and training sequences */
  856. self = other = 0.0;
  857. for (m = 0; m < noC; m++) {
  858. sq = sqs[m];
  859. if (m == k)
  860. for (l = 0; l < sq->seq_number; l++)
  861. self += omega[k][l] * expect_a[k][l][k][i * mo->N + j_id];
  862. else
  863. for (l = 0; l < sq->seq_number; l++)
  864. other +=
  865. lfactor * omegati[m][l][k] * expect_a[m][l][k][i * mo->N +
  866. j_id];
  867. }
  868. if (self < other)
  869. lfactor *= self / other;
  870. /*printf(" A[%d]: %g - %g = %g \t %g\n", k, self, other, self-other, lfactor);*/
  871. }
  872. /* B */
  873. if (mo->s[i].fix)
  874. continue;
  875. size = (int) pow (mo->M, mo->s[i].order + 1);
  876. for (hist = 0; hist < size; hist += mo->M) {
  877. for (h = hist; h < hist + mo->M; h++) {
  878. self = other = 0.0;
  879. /* iterate over all classes and training sequences */
  880. for (m = 0; m < noC; m++) {
  881. if (m == k)
  882. for (l = 0; l < sqs[m]->seq_number; l++)
  883. self += omega[k][l] * expect_b[k][l][k][i][h];
  884. else
  885. for (l = 0; l < sqs[m]->seq_number; l++)
  886. other += lfactor * omegati[m][l][k] * expect_b[m][l][k][i][h];
  887. }
  888. if (self < other)
  889. lfactor *= self / other;
  890. /*printf(" B[%d]: %g - %g = %g \t %g\n", k, self, other, self-other, lfactor);*/
  891. }
  892. }
  893. }
  894. return;
  895. #undef CUR_PROC
  896. }
  897. /*----------------------------------------------------------------------------*/
  898. static int discrime_onestep (model ** mo, sequence_t ** sqs, int noC, int grad,
  899. int class)
  900. {
  901. #define CUR_PROC "driscrime_onestep"
  902. int j, l, retval = -1;
  903. /* expected use of parameter (for partial derivatives) */
  904. double *****expect_b, ****expect_a, ****expect_pi;
  905. /* matrix of log probabilities */
  906. double ***log_p;
  907. /* matrix of outer derivatives */
  908. long double **omega, ***omegati;
  909. long double psum = 0, nsum = 0;
  910. double lfactor;
  911. if (-1 == discrime_galloc (mo, sqs, noC, &expect_b, &expect_a,
  912. &expect_pi, &omega, &omegati, &log_p)) {
  913. printf ("Allocation Error.\n");
  914. return -1;
  915. }
  916. if (-1 ==
  917. discrime_precompute (mo, sqs, noC, expect_b, expect_a, expect_pi,
  918. log_p)) {
  919. printf ("precompute Error.\n");
  920. goto FREE;
  921. }
  922. if (-1 == discrime_calculate_omega (mo, sqs, noC, omega, omegati, log_p)) {
  923. printf ("omega Error.\n");
  924. goto FREE;
  925. }
  926. if (grad) {
  927. /* updateing parameters */
  928. discrime_update_pi_gradient (mo, sqs, noC, class, expect_pi, omega,
  929. omegati);
  930. discrime_update_a_gradient (mo, sqs, noC, class, expect_a, omega,
  931. omegati);
  932. discrime_update_b_gradient (mo, sqs, noC, class, expect_b, omega,
  933. omegati);
  934. }
  935. else {
  936. /* calculate a pleaseant lfactor */
  937. for (j = 0; j < noC; j++) {
  938. for (l = 0; l < sqs[j]->seq_number; l++) {
  939. if (j == class)
  940. psum += omega[class][l];
  941. else
  942. nsum += omegati[j][l][class];
  943. }
  944. }
  945. if (fabs (nsum) > 1e-200 && psum < nsum)
  946. lfactor = (psum / nsum);
  947. else
  948. lfactor = 1.0;
  949. /* determing maximum lfactor */
  950. discrime_find_factor (mo[class], sqs, noC, class, lfactor, expect_pi,
  951. expect_a, expect_b, omega, omegati);
  952. lfactor *= .99;
  953. printf ("Calculated L: %g\n", lfactor);
  954. /* updating parameters */
  955. discrime_update_pi_closed (mo, sqs, noC, class, lfactor, expect_pi, omega,
  956. omegati);
  957. discrime_update_a_closed (mo, sqs, noC, class, lfactor, expect_a, omega,
  958. omegati);
  959. discrime_update_b_closed (mo, sqs, noC, class, lfactor, expect_b, omega,
  960. omegati);
  961. }
  962. retval = 0;
  963. FREE:
  964. discrime_gfree (mo, sqs, noC, expect_b, expect_a, expect_pi, omega,
  965. omegati, log_p);
  966. return retval;
  967. #undef CUR_PROC
  968. }
  969. /*----------------------------------------------------------------------------*/
  970. int discriminative (model ** mo, sequence_t ** sqs, int noC, int max_steps,
  971. int gradient)
  972. {
  973. #define CUR_PROC "driscriminative"
  974. double last_perf, cur_perf;
  975. int retval=-1, last_cer, cur_cer;
  976. double lambda=0.0;
  977. double noiselevel = 0.0667;
  978. int fp, fn, totalseqs = 0, totalsyms = 0;
  979. int *falseP=NULL, *falseN=NULL;
  980. double * prior_backup=NULL;
  981. int i, k, step;
  982. model * last;
  983. ARRAY_CALLOC (falseP, noC);
  984. ARRAY_CALLOC (falseN, noC);
  985. ARRAY_CALLOC (prior_backup, noC);
  986. for (i = 0; i < noC; i++) {
  987. totalseqs += sqs[i]->seq_number;
  988. for (k = 0; k < sqs[i]->seq_number; k++)
  989. totalsyms += sqs[i]->seq_len[k];
  990. }
  991. /* setting priors from number of training sequences and
  992. remember the old values */
  993. for (i=0; i<noC; i++) {
  994. prior_backup[i] = mo[i]->prior;
  995. mo[i]->prior = (double)sqs[i]->seq_number / totalseqs;
  996. printf("original prior: %g \t new prior %g\n", prior_backup[i], mo[i]->prior);
  997. }
  998. last_perf = discrime_compute_performance (mo, sqs, noC);
  999. cur_perf = last_perf;
  1000. discrime_print_statistics (mo, sqs, noC, falseP, falseN);
  1001. fp = fn = 0;
  1002. for (i = 0; i < noC; i++) {
  1003. printf ("Model %d likelihood: %g, \t false positives: %d\n", i,
  1004. model_likelihood (mo[i], sqs[i]), falseP[i]);
  1005. fn += falseN[i];
  1006. fp += falseP[i];
  1007. }
  1008. printf
  1009. ("\n%d false positives and %d false negatives after ML-initialisation; Performance: %g.\n",
  1010. fp, fn, last_perf);
  1011. last_cer = cur_cer = fp;
  1012. for (k = 0; k < noC; k++) {
  1013. last = NULL;
  1014. step = 0;
  1015. if (gradient)
  1016. lambda = .3;
  1017. do {
  1018. if (last)
  1019. model_free (&last);
  1020. /* save a copy of the currently trained model */
  1021. last = model_copy (mo[k]);
  1022. last_cer = cur_cer;
  1023. last_perf = cur_perf;
  1024. noiselevel /= 1.8;
  1025. discrime_lambda = lambda / totalsyms;
  1026. printf
  1027. ("==============================================================\n");
  1028. printf
  1029. ("Optimising class %d, current step %d, lambda: %g noise: %g, gradient: %d\n",
  1030. k, step, discrime_lambda, noiselevel, gradient);
  1031. /* if (gradient) */
  1032. /* model_add_noise(mo[k], noiselevel, 0); */
  1033. discrime_onestep (mo, sqs, noC, gradient, k);
  1034. cur_perf = discrime_compute_performance (mo, sqs, noC);
  1035. discrime_print_statistics (mo, sqs, noC, falseP, falseN);
  1036. fp = fn = 0;
  1037. for (i = 0; i < noC; i++) {
  1038. printf ("Model %d likelihood: %g, \t false positives: %d\n", i,
  1039. model_likelihood (mo[i], sqs[i]), falseP[i]);
  1040. fn += falseN[i];
  1041. fp += falseP[i];
  1042. }
  1043. cur_cer = fp;
  1044. printf ("MAP=%12g -> training -> MAP=%12g", last_perf, cur_perf);
  1045. printf (" %d false positives, %d false negatives\n", fp, fn);
  1046. printf
  1047. ("==============================================================\n");
  1048. } while ((last_perf < cur_perf || cur_cer < last_cer) && (step++ < max_steps));
  1049. mo[k] = model_copy (last);
  1050. model_free (&last);
  1051. cur_perf = last_perf;
  1052. cur_cer = last_cer;
  1053. }
  1054. /* resetting priors to old values */
  1055. for (i = 0; i < noC; i++)
  1056. mo[i]->prior = prior_backup[i];
  1057. retval = 0;
  1058. STOP: /* Label STOP from ARRAY_[CM]ALLOC */
  1059. m_free (prior_backup);
  1060. m_free (falseP);
  1061. m_free (falseN);
  1062. return retval;
  1063. #undef CUR_PROC
  1064. }