/ghmm-0.7.0/ghmm/discrime.c
C | 1283 lines | 883 code | 219 blank | 181 comment | 225 complexity | 332c05a6fbdf4aa4414bae754c4cfe3d MD5 | raw file
Possible License(s): GPL-2.0
1/******************************************************************************* 2* 3* This file is part of the General Hidden Markov Model Library, 4* GHMM version __VERSION__, see http://ghmm.org 5* 6* Filename: ghmm/ghmm/discrime.c 7* Authors: Janne Grunau 8* 9* Copyright (C) 1998-2004 Alexander Schliep 10* Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln 11* Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik, 12* Berlin 13* 14* Contact: schliep@ghmm.org 15* 16* This library is free software; you can redistribute it and/or 17* modify it under the terms of the GNU Library General Public 18* License as published by the Free Software Foundation; either 19* version 2 of the License, or (at your option) any later version. 20* 21* This library is distributed in the hope that it will be useful, 22* but WITHOUT ANY WARRANTY; without even the implied warranty of 23* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 24* Library General Public License for more details. 25* 26* You should have received a copy of the GNU Library General Public 27* License along with this library; if not, write to the Free 28* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 29* 30* 31* This file is version $Revision: 1322 $ 32* from $Date: 2005-09-09 12:29:17 +0200 (Fri, 09 Sep 2005) $ 33* last change by $Author: grunau $. 34* 35*******************************************************************************/ 36 37#include <float.h> 38#include <stdio.h> 39#include <stdlib.h> 40#include <math.h> 41#include "ghmm.h" 42#include "mes.h" 43#include "matrix.h" 44#include "model.h" 45#include "foba.h" 46#include "reestimate.h" 47#include "gradescent.h" 48#include "discrime.h" 49#include <ghmm/internal.h> 50 51#define LAMBDA 0.14 52#define TRIM(o, n) ((1-LAMBDA)*(o) + LAMBDA*(n)) 53 54#ifdef __STRICT_ANSI__ 55#define logl(A) log(A) 56#endif 57#ifdef __STRICT_ANSI__ 58#define expl(A) exp(A) 59#endif 60 61/* forward declaration */ 62static int discrime_galloc (model ** mo, sequence_t ** sqs, int noC, 63 double ******matrix_b, double *****matrix_a, 64 double *****matrix_pi, long double ***omega, 65 long double ****omegati, double ****log_p); 66 67static void discrime_gfree (model ** mo, sequence_t ** sqs, int noC, 68 double *****matrix_b, double ****matrix_a, 69 double ****matrix_pi, long double **omega, 70 long double ***omegati, double ***log_p); 71 72static void discrime_trim_gradient (double *new, int length); 73 74double discrime_lambda = 0.0; 75double discrime_alpha = 1.0; 76 77 78/*----------------------------------------------------------------------------*/ 79/** allocates memory for m and n matrices: */ 80static int discrime_galloc (model ** mo, sequence_t ** sqs, int noC, 81 double ******matrix_b, double *****matrix_a, 82 double *****matrix_pi, long double ***omega, 83 long double ****omegati, double ****log_p) 84{ 85#define CUR_PROC "discrime_galloc" 86 87 int i, k, l, m; 88 89 /* first allocate memory for matrix_b: */ 90 ARRAY_CALLOC (*matrix_b, noC); 91 for (k = 0; k < noC; k++) { 92 ARRAY_CALLOC ((*matrix_b)[k], sqs[k]->seq_number); 93 for (l = 0; l < sqs[k]->seq_number; l++) { 94 ARRAY_CALLOC ((*matrix_b)[k][l], noC); 95 for (m = 0; m < noC; m++) { 96 ARRAY_CALLOC ((*matrix_b)[k][l][m], mo[m]->N); 97 for (i = 0; i < mo[m]->N; i++) 98 ARRAY_CALLOC ((*matrix_b)[k][l][m][i], model_ipow (mo[m], mo[m]->M, mo[m]->s[i].order + 1)); 99 } 100 } 101 } 102 103 /* matrix_a(k,l,i,j) = matrix_a[k][l][i*mo->N + j] */ 104 ARRAY_CALLOC (*matrix_a, noC); 105 for (k = 0; k < noC; k++) { 106 ARRAY_CALLOC ((*matrix_a)[k], sqs[k]->seq_number); 107 for (l = 0; l < sqs[k]->seq_number; l++) { 108 ARRAY_CALLOC ((*matrix_a)[k][l], noC); 109 for (m = 0; m < noC; m++) 110 ARRAY_CALLOC ((*matrix_a)[k][l][m], mo[m]->N * mo[m]->N); 111 } 112 } 113 114 /* allocate memory for matrix_pi */ 115 ARRAY_CALLOC (*matrix_pi, noC); 116 for (k = 0; k < noC; k++) { 117 ARRAY_CALLOC ((*matrix_pi)[k], sqs[k]->seq_number); 118 for (l = 0; l < sqs[k]->seq_number; l++) { 119 ARRAY_CALLOC ((*matrix_pi)[k][l], noC); 120 for (m = 0; m < noC; m++) 121 ARRAY_CALLOC ((*matrix_pi)[k][l][m], mo[m]->N); 122 } 123 } 124 125 /* allocate memory for matrices of likelihoods 126 log_p[k][l][m] = 127 log_prob of l-th sequence of k-th class under the m-th model */ 128 ARRAY_CALLOC (*log_p, noC); 129 for (k = 0; k < noC; k++) { 130 ARRAY_CALLOC ((*log_p)[k], sqs[k]->seq_number); 131 for (l = 0; l < sqs[k]->seq_number; l++) 132 ARRAY_CALLOC ((*log_p)[k][l], noC); 133 } 134 135 /* allocate memory for outer derivatives */ 136 ARRAY_CALLOC (*omega, noC); 137 for (k = 0; k < noC; k++) 138 ARRAY_CALLOC ((*omega)[k], sqs[k]->seq_number); 139 140 /* allocate memory for omega tilde. NB: size(omega)*noC == size(omegati) */ 141 ARRAY_CALLOC (*omegati, noC); 142 for (k = 0; k < noC; k++) { 143 ARRAY_CALLOC ((*omegati)[k], sqs[k]->seq_number); 144 for (l = 0; l < sqs[k]->seq_number; l++) 145 ARRAY_CALLOC ((*omegati)[k][l], noC); 146 } 147 148 return 0; 149STOP: /* Label STOP from ARRAY_[CM]ALLOC */ 150 discrime_gfree (mo, sqs, noC, *matrix_b, *matrix_a, *matrix_pi, 151 *omega, *omegati, *log_p); 152 return -1; 153#undef CUR_PROC 154} 155 156 157/*----------------------------------------------------------------------------*/ 158/** frees memory for expectation matrices for all classes & training sequences*/ 159static void discrime_gfree (model ** mo, sequence_t ** sqs, int noC, 160 double *****matrix_b, double ****matrix_a, 161 double ****matrix_pi, long double **omega, 162 long double ***omegati, double ***log_p) 163{ 164#define CUR_PROC "discrime_gfree" 165 166 int i, k, l, m; 167 168 for (k = 0; k < noC; k++) { 169 for (l = 0; l < sqs[k]->seq_number; l++) { 170 for (m = 0; m < noC; m++) { 171 for (i = 0; i < mo[m]->N; i++) 172 m_free (matrix_b[k][l][m][i]); 173 m_free (matrix_b[k][l][m]); 174 } 175 m_free (matrix_b[k][l]); 176 } 177 m_free (matrix_b[k]); 178 } 179 m_free (matrix_b); 180 181 for (k = 0; k < noC; k++) { 182 for (l = 0; l < sqs[k]->seq_number; l++) { 183 for (m = 0; m < noC; m++) 184 m_free (matrix_a[k][l][m]); 185 m_free (matrix_a[k][l]); 186 } 187 m_free (matrix_a[k]); 188 } 189 m_free (matrix_a); 190 191 for (k = 0; k < noC; k++) { 192 for (l = 0; l < sqs[k]->seq_number; l++) { 193 for (m = 0; m < noC; m++) 194 m_free (matrix_pi[k][l][m]); 195 m_free (matrix_pi[k][l]); 196 } 197 m_free (matrix_pi[k]); 198 } 199 m_free (matrix_pi); 200 201 for (k = 0; k < noC; k++) { 202 for (l = 0; l < sqs[k]->seq_number; l++) 203 m_free (log_p[k][l]); 204 m_free (log_p[k]); 205 } 206 m_free (log_p); 207 208 for (k = 0; k < noC; k++) 209 m_free (omega[k]); 210 m_free (omega); 211 212 for (k = 0; k < noC; k++) { 213 for (l = 0; l < sqs[k]->seq_number; l++) 214 m_free (omegati[k][l]); 215 m_free (omegati[k]); 216 } 217 m_free (omegati); 218 219 return; 220#undef CUR_PROC 221} 222 223 224/*----------------------------------------------------------------------------*/ 225static int discrime_calculate_omega (model ** mo, sequence_t ** sqs, int noC, 226 long double **omega, long double ***omegati, 227 double ***log_p) 228{ 229#define CUR_PROC "discrime_calculate_omega" 230 int k, l, m; 231 int seq_no, argmax = 0; 232 double sum, max; 233 double exponent; 234 long double sigmoid; 235 236 /* compute outer derivatives */ 237 /* iterate over all classes */ 238 for (k = 0; k < noC; k++) { 239 seq_no = sqs[k]->seq_number; 240 /*iterate over all training sequences */ 241 for (l = 0; l < seq_no; l++) { 242 243 max = 1.0; 244 /* calculate log(sum[prob]) for logarithmic probabilities 245 first determine the maximum */ 246 for (m = 0; m < noC; m++) { 247 if (m != k 248 && (1.0 == max || max < (log_p[k][l][m] + log (mo[m]->prior)))) { 249 max = log_p[k][l][m] + log (mo[m]->prior); 250 argmax = m; 251 } 252 } 253 254 /* sum */ 255 sum = 1.0; 256 for (m = 0; m < noC; m++) { 257 if (m != k && m != argmax) 258 sum += exp (log_p[k][l][m] + log (mo[m]->prior) - max); 259 } 260 sum = log (sum) + max; 261 262 exponent = log_p[k][l][k] + log (mo[k]->prior) - sum; 263 if (exponent < logl (LDBL_MIN)) { 264 printf ("exponent was too large (%g) cut it down!\n", exponent); 265 exponent = (double) logl (LDBL_MIN); 266 } 267 268 sigmoid = 1.0 / (1.0 + expl ((-discrime_alpha) * exponent)); 269 270 omega[k][l] = discrime_alpha * sigmoid * (1.0 - sigmoid); 271/* printf("omega[%d][%d] = %Lg\n",k ,l, omega[k][l]); */ 272 /* omegati is not useful for m==k equation (2.9) */ 273 for (m = 0; m < noC; m++) { 274 if (m == k) 275 omegati[k][l][m] = 0; 276 else 277 omegati[k][l][m] = 278 omega[k][l] * expl (log_p[k][l][m] - 279 sum) * (long double) mo[m]->prior; 280/* printf("%Lg, ", expl(log_p[k][l][m] - sum) * (long double)mo[m]->prior); */ 281/* printf("omegati[%d][%d][%d] = %Lg\n",k ,l, m, omegati[k][l][m]); */ 282 } 283/* printf("\n"); */ 284 } 285 } 286 return 0; 287#undef CUR_PROC 288} 289 290 291/*----------------------------------------------------------------------------*/ 292static int discrime_precompute (model ** mo, sequence_t ** sqs, int noC, 293 double *****expect_b, double ****expect_a, 294 double ****expect_pi, double ***log_p) 295{ 296#define CUR_PROC "discrime_precompute" 297 298 int k, l, m; 299 int seq_len, success = 1; 300 301 /* forward, backward variables and scaler */ 302 double **alpha, **beta, *scale; 303 304 sequence_t *sq; 305 306 /* iterate over all classes */ 307 for (k = 0; k < noC; k++) { 308 309 sq = sqs[k]; 310 311 /*iterate over all training sequences */ 312 for (l = 0; l < sq->seq_number; l++) { 313 314 seq_len = sq->seq_len[l]; 315 316 /* iterate over all classes */ 317 for (m = 0; m < noC; m++) { 318 319 if (-1 == 320 reestimate_alloc_matvek (&alpha, &beta, &scale, seq_len, 321 mo[m]->N)) 322 return -1; 323 324 /* calculate forward and backward variables without labels: */ 325 if (-1 == foba_forward (mo[m], sq->seq[l], seq_len, alpha, scale, 326 &(log_p[k][l][m]))) { 327 success = 0; 328 printf ("forward\n"); 329 goto FREE; 330 } 331 if (-1 == foba_backward (mo[m], sq->seq[l], seq_len, beta, scale)) { 332 success = 0; 333 printf ("backward\n"); 334 goto FREE; 335 } 336 337 /* compute expectation matrices 338 expect_*[k][l][m] holds the expected number how ofter a particular 339 parameter of model m is used by l-th training sequence of class k */ 340 gradescent_compute_expectations (mo[m], alpha, beta, scale, 341 sq->seq[l], seq_len, 342 expect_b[k][l][m], expect_a[k][l][m], 343 expect_pi[k][l][m]); 344 345 FREE: 346 reestimate_free_matvek (alpha, beta, scale, seq_len); 347 if (!success) 348 return -1; 349 } 350 } 351 } 352 return 0; 353#undef CUR_PROC 354} 355 356 357/*----------------------------------------------------------------------------*/ 358double discrime_compute_performance (model ** mo, sequence_t ** sqs, int noC) 359{ 360#define CUR_PROC "discrime_compute_performance" 361 362 int k, l, m, temp; 363 int argmax = 0; 364 double sum, max; 365 double *logp; 366 double exponent; 367 long double sigmoid; 368 double performance = 0.0; 369 370 sequence_t *sq; 371 372 ARRAY_CALLOC (logp, noC); 373 374 /* iterate over all classes */ 375 for (k = 0; k < noC; k++) { 376 sq = sqs[k]; 377 /*iterate over all training sequences */ 378 for (l = 0; l < sq->seq_number; l++) { 379 380 /* iterate over all classes */ 381 for (m = 0; m < noC; m++) { 382 temp = foba_logp (mo[m], sq->seq[l], sq->seq_len[l], &(logp[m])); 383 if (0 != temp) 384 printf ("foba_logp error in sequence[%d][%d] under model %d (%g)\n", 385 k, l, m, logp[m]); 386 /*printf("foba_logp sequence[%d][%d] under model %d (%g)\n", k, l, m, logp[m]);*/ 387 } 388 389 max = 1.0; 390 for (m = 0; m < noC; m++) { 391 if (m != k && (1.0 == max || max < (logp[m] + log (mo[m]->prior)))) { 392 max = logp[m] + log (mo[m]->prior); 393 argmax = m; 394 } 395 } 396 397 /* sum */ 398 sum = 1.0; 399 for (m = 0; m < noC; m++) { 400 if (m != k && m != argmax) 401 sum += exp (logp[m] + log (mo[m]->prior) - max); 402 } 403 sum = log (sum) + max; 404 405 exponent = logp[k] + log (mo[k]->prior) - sum; 406 407 if (exponent < logl (LDBL_MIN)) { 408 printf ("exponent was too large (%g) cut it down!\n", exponent); 409 exponent = (double) logl (LDBL_MIN); 410 } 411 412 sigmoid = 1.0 / (1.0 + expl ((-discrime_alpha) * exponent)); 413 414 performance += (double) sigmoid; 415 } 416 } 417 m_free (logp); 418STOP: /* Label STOP from ARRAY_[CM]ALLOC */ 419 return performance; 420#undef CUR_PROC 421} 422 423 424/*----------------------------------------------------------------------------*/ 425static void discrime_print_statistics (model ** mo, sequence_t ** sqs, int noC, 426 int *falseP, int *falseN) 427{ 428#define CUR_PROC "discrime_print_statistics" 429 430 int k, l, m; 431 int argmax; 432 double *logp, max; 433 434 sequence_t *sq; 435 436 ARRAY_CALLOC (logp, noC); 437 438 for (k = 0; k < noC; k++) { 439 falseP[k] = 0; 440 falseN[k] = 0; 441 } 442 443 for (k = 0; k < noC; k++) { 444 sq = sqs[k]; 445 printf ("Looking at training tokens of Class %d\n", k); 446 for (l = 0; l < sq->seq_number; l++) { 447 argmax = 0, max = -DBL_MAX; 448 for (m = 0; m < noC; m++) { 449 foba_logp (mo[m], sq->seq[l], sq->seq_len[l], &(logp[m])); 450 if (m == 0 || max < logp[m]) { 451 max = logp[m]; 452 argmax = m; 453 } 454 } 455 456 if (sq->seq_number < 11 && noC < 8) { 457 /* printing fancy statistics */ 458 printf ("%2d: %8.4g", l, logp[0] - logp[argmax]); 459 for (m = 1; m < noC; m++) 460 printf (", %8.4g", logp[m] - logp[argmax]); 461 printf (" \t+(%g)\n", logp[argmax]); 462 } 463 464 /* counting false positives and false negatives */ 465 if (argmax != k) { 466 falseP[argmax]++; 467 falseN[k]++; 468 } 469 } 470 printf ("%d false negatives in class %d.\n", falseN[k], k); 471 } 472STOP: /* Label STOP from ARRAY_[CM]ALLOC */ 473 m_free (logp); 474 return; 475#undef CUR_PROC 476} 477 478 479/*----------------------------------------------------------------------------*/ 480static void discrime_trim_gradient (double *new, int length) 481{ 482#define CUR_PROC "discrime_trim_gradient" 483 484 int i, argmin = 0; 485 double min, sum = 0; 486 487/* printf("unnormalised: %g", new[0]); */ 488/* for (i=1; i<length; i++) */ 489/* printf(", %g", new[i]); */ 490/* printf("\n"); */ 491 492 for (i = 1; i < length; i++) 493 if (new[i] < new[argmin]) 494 argmin = i; 495 496 min = new[argmin]; 497 498 if (new[argmin] < 0.0) 499 for (i = 0; i < length; i++) 500 new[i] -= (1.1 * min); 501 502 for (i = 0; i < length; i++) 503 sum += new[i]; 504 505 for (i = 0; i < length; i++) 506 new[i] /= sum; 507 508/* printf(" normalised: %g", new[0]); */ 509/* for (i=1; i<length; i++) */ 510/* printf(", %g", new[i]); */ 511/* printf("\n"); */ 512 513 return; 514#undef CUR_PROC 515} 516 517 518/*----------------------------------------------------------------------------*/ 519static void discrime_update_pi_gradient (model ** mo, sequence_t ** sqs, int noC, 520 int class, double ****expect_pi, 521 long double **omega, long double ***omegati) 522{ 523#define CUR_PROC "discrime_update_pi_gradient" 524 525 int i, l, m; 526 527 double *pi_old = NULL; 528 double *pi_new = NULL; 529 530 double sum; 531 532 sequence_t *sq = NULL; 533 534 ARRAY_CALLOC (pi_old, mo[class]->N); 535 ARRAY_CALLOC (pi_new, mo[class]->N); 536 537 /* itarate over alls state of the current model */ 538 for (i = 0; i < mo[class]->N; i++) { 539 /* iterate over all classes */ 540 sum = 0.0; 541 for (m = 0; m < noC; m++) { 542 sq = sqs[m]; 543 /*iterate over all training sequences */ 544 if (m == class) 545 for (l = 0; l < sq->seq_number; l++) 546 sum += omega[class][l] * expect_pi[class][l][class][i]; 547 else 548 for (l = 0; l < sq->seq_number; l++) 549 sum -= omegati[m][l][class] * expect_pi[m][l][class][i]; 550 } 551 /* check for valid new parameter */ 552 pi_old[i] = mo[class]->s[i].pi; 553 if (fabs (pi_old[i]) == 0.0) 554 pi_new[i] = pi_old[i]; 555 else 556 pi_new[i] = pi_old[i] + discrime_lambda * (sum / pi_old[i]); 557 } 558 559 /* change paramters to fit into valid parameter range */ 560 discrime_trim_gradient (pi_new, mo[class]->N); 561 562 /* update parameters */ 563 for (i = 0; i < mo[class]->N; i++) { 564 /* printf("update parameter Pi in state %d of model %d with: %5.3g", 565 i, class, pi_new[i]); 566 printf(" (%5.3g) old value.\n", pi_old[i]); */ 567 mo[class]->s[i].pi = pi_new[i]; 568 } 569 570STOP: /* Label STOP from ARRAY_[CM]ALLOC */ 571 m_free (pi_old); 572 m_free (pi_new); 573 return; 574#undef CUR_PROC 575} 576 577 578/*----------------------------------------------------------------------------*/ 579static void discrime_update_a_gradient (model ** mo, sequence_t ** sqs, int noC, 580 int class, double ****expect_a, 581 long double **omega, long double ***omegati) 582{ 583#define CUR_PROC "discrime_update_a_gradient" 584 585 int i, j, g, l, m; 586 587 int j_id; 588 589 double *a_old = NULL; 590 double *a_new = NULL; 591 592 double sum; 593 594 sequence_t *sq = NULL; 595 596 ARRAY_CALLOC (a_old, mo[class]->N); 597 ARRAY_CALLOC (a_new, mo[class]->N); 598 599 /* updating current class */ 600 /* itarate over all states of the current model */ 601 for (i = 0; i < mo[class]->N; i++) { 602 603 for (j = 0; j < mo[class]->s[i].out_states; j++) { 604 j_id = mo[class]->s[i].out_id[j]; 605 sum = 0.0; 606 /* iterate over all classes and training sequences */ 607 for (m = 0; m < noC; m++) { 608 sq = sqs[m]; 609 if (m == class) 610 for (l = 0; l < sq->seq_number; l++) 611 sum += 612 omega[class][l] * expect_a[class][l][+class][i * mo[class]->N + 613 j_id]; 614 else 615 for (l = 0; l < sq->seq_number; l++) 616 sum -= 617 omegati[m][l][class] * expect_a[m][l][class][i * mo[class]->N + 618 j_id]; 619 } 620 /* check for valid new parameter */ 621 a_old[j] = mo[class]->s[i].out_a[j]; 622 if (fabs (a_old[j]) == 0.0) 623 a_new[j] = a_old[j]; 624 else 625 a_new[j] = a_old[j] + discrime_lambda * (sum / a_old[j]); 626 } 627 628 /* change paramters to fit into valid parameter range */ 629 discrime_trim_gradient (a_new, mo[class]->s[i].out_states); 630 631 /* update parameter */ 632 for (j = 0; j < mo[class]->s[i].out_states; j++) { 633 /* printf("update parameter A[%d] in state %d of model %d with: %5.3g", 634 j, i, class, a_new[j]); 635 printf(" (%5.3g) old value.\n", a_old[j]); */ 636 mo[class]->s[i].out_a[j] = a_new[j]; 637 638 /* mirror out_a to corresponding in_a */ 639 j_id = mo[class]->s[i].out_id[j]; 640 for (g = 0; g < mo[class]->s[j_id].in_states; g++) 641 if (i == mo[class]->s[j_id].in_id[g]) { 642 mo[class]->s[j_id].in_a[g] = mo[class]->s[i].out_a[j]; 643 break; 644 } 645 } 646 } 647 648STOP: /* Label STOP from ARRAY_[CM]ALLOC */ 649 m_free (a_old); 650 m_free (a_new); 651 return; 652#undef CUR_PROC 653} 654 655 656/*----------------------------------------------------------------------------*/ 657static void discrime_update_b_gradient (model ** mo, sequence_t ** sqs, int noC, 658 int class, double *****expect_b, 659 long double **omega, long double ***omegati) 660{ 661#define CUR_PROC "discrime_update_b_gradient" 662 663 int i, h, l, m; 664 665 int hist, size; 666 667 double *b_old = NULL; 668 double *b_new = NULL; 669 670 double sum; 671 672 ARRAY_CALLOC (b_old, mo[class]->M); 673 ARRAY_CALLOC (b_new, mo[class]->M); 674 675 /* updating current class */ 676 677 /* itarate over alls state of the current model */ 678 for (i = 0; i < mo[class]->N; i++) { 679 if (mo[class]->s[i].fix) 680 continue; 681 682 size = (int) pow (mo[class]->M, mo[class]->s[i].order + 1); 683 for (hist = 0; hist < size; hist += mo[class]->M) { 684 685 for (h = hist; h < hist + mo[class]->M; h++) { 686 687 /* iterate over all classes and training sequences */ 688 sum = 0.0; 689 for (m = 0; m < noC; m++) { 690 if (m == class) 691 for (l = 0; l < sqs[m]->seq_number; l++) 692 sum += omega[class][l] * expect_b[class][l][class][i][h]; 693 else 694 for (l = 0; l < sqs[m]->seq_number; l++) 695 sum -= omegati[m][l][class] * expect_b[m][l][class][i][h]; 696 } 697 /* check for valid new parameters */ 698 b_old[h] = mo[class]->s[i].b[h]; 699 if (fabs (b_old[h]) == 0.0) 700 b_new[h] = b_old[h]; 701 else 702 b_new[h] = b_old[h] + discrime_lambda * (sum / b_old[h]); 703 } 704 705 /* change parameters to fit into valid parameter range */ 706 discrime_trim_gradient (b_new, mo[0]->M); 707 708 for (h = hist; h < hist + mo[class]->M; h++) { 709 /* update parameters */ 710 /*printf("update parameter B[%d] in state %d of model %d with: %5.3g", 711 h, i, class, b_new[h]); 712 printf(" (%5.3g) old value.\n", b_old[h]); */ 713 mo[class]->s[i].b[h] = b_new[h]; 714 } 715 } 716 } 717 718STOP: /* Label STOP from ARRAY_[CM]ALLOC */ 719 m_free (b_old); 720 m_free (b_new); 721 return; 722#undef CUR_PROC 723} 724 725 726/*----------------------------------------------------------------------------*/ 727static void discrime_update_pi_closed (model ** mo, sequence_t ** sqs, int noC, 728 int class, double lfactor, 729 double ****expect_pi, long double **omega, 730 long double ***omegati) 731{ 732#define CUR_PROC "discrime_update_pi_closed" 733 734 int i, l, m; 735 736 double *pi_old = NULL; 737 double *pi_new = NULL; 738 739 double sum, lagrangian; 740 741 sequence_t *sq = NULL; 742 743 ARRAY_CALLOC (pi_old, mo[class]->N); 744 ARRAY_CALLOC (pi_new, mo[class]->N); 745 746 /* updating current class (all or only specified) */ 747 748 /* itarate over alls state of the k-th model to compute lagrangian multiplier */ 749 lagrangian = 0.0; 750 for (i = 0; i < mo[class]->N; i++) { 751 752 /* iterate over all classes */ 753 for (m = 0; m < noC; m++) { 754 sq = sqs[m]; 755 /* if current class and class of training sequence match, add the first term */ 756 if (m == class) 757 for (l = 0; l < sq->seq_number; l++) 758 lagrangian -= omega[class][l] * expect_pi[class][l][class][i]; 759 /* otherwise the second */ 760 else 761 for (l = 0; l < sq->seq_number; l++) 762 lagrangian += 763 lfactor * omegati[m][l][class] * expect_pi[m][l][class][i]; 764 } 765 } 766 767 for (i = 0; i < mo[class]->N; i++) { 768 /* iterate over all classes */ 769 sum = 0.0; 770 for (m = 0; m < noC; m++) { 771 sq = sqs[m]; 772 /*iterate over all training sequences */ 773 if (m == class) 774 for (l = 0; l < sq->seq_number; l++) 775 sum -= omega[class][l] * expect_pi[class][l][class][i]; 776 else 777 for (l = 0; l < sq->seq_number; l++) 778 sum += lfactor * omegati[m][l][class] * expect_pi[m][l][class][i]; 779 } 780 /* check for valid new parameter */ 781 pi_old[i] = mo[class]->s[i].pi; 782 if (fabs (lagrangian) == 0.0) 783 pi_new[i] = pi_old[i]; 784 else 785 pi_new[i] = sum / lagrangian; 786 } 787 788 /* update parameters */ 789 for (i = 0; i < mo[class]->N; i++) { 790 /* printf("update parameter Pi in state %d of model %d with: %5.3g", 791 i, class, pi_new[i]); 792 printf(" (%5.3g) old value.\n", pi_old[i]); */ 793 mo[class]->s[i].pi = TRIM (pi_old[i], pi_new[i]); 794 } 795 796STOP: /* Label STOP from ARRAY_[CM]ALLOC */ 797 m_free (pi_old); 798 m_free (pi_new); 799 return; 800#undef CUR_PROC 801} 802 803 804/*----------------------------------------------------------------------------*/ 805static void discrime_update_a_closed (model ** mo, sequence_t ** sqs, int noC, 806 int class, double lfactor, double ****expect_a, 807 long double **omega, long double ***omegati) 808{ 809#define CUR_PROC "discrime_update_a_closed" 810 811 int i, j, g, l, m; 812 813 int j_id; 814 815 double *a_old = NULL; 816 double *a_new = NULL; 817 818 double sum, lagrangian; 819 820 sequence_t *sq = NULL; 821 822 ARRAY_CALLOC (a_old, mo[class]->N); 823 ARRAY_CALLOC (a_new, mo[class]->N); 824 825 /* updating current class (all or only specified) */ 826 827 /* itarate over all states of the k-th model */ 828 for (i = 0; i < mo[class]->N; i++) { 829 830 /* compute lagrangian multiplier */ 831 lagrangian = 0.0; 832 for (j = 0; j < mo[class]->s[i].out_states; j++) { 833 j_id = mo[class]->s[i].out_id[j]; 834 835 /* iterate over all classes and training sequences */ 836 for (m = 0; m < noC; m++) { 837 sq = sqs[m]; 838 if (m == class) 839 for (l = 0; l < sq->seq_number; l++) 840 lagrangian -= 841 omega[class][l] * expect_a[class][l][class][i * mo[class]->N + 842 j_id]; 843 else 844 for (l = 0; l < sq->seq_number; l++) 845 lagrangian += 846 lfactor * omegati[m][l][class] * expect_a[m][l][class][i * 847 mo 848 [class]-> 849 N + 850 j_id]; 851 } 852 } 853 854 for (j = 0; j < mo[class]->s[i].out_states; j++) { 855 j_id = mo[class]->s[i].out_id[j]; 856 sum = 0.0; 857 /* iterate over all classes and training sequences */ 858 for (m = 0; m < noC; m++) { 859 sq = sqs[m]; 860 if (m == class) 861 for (l = 0; l < sq->seq_number; l++) 862 sum -= 863 omega[class][l] * expect_a[class][l][+class][i * mo[class]->N + 864 j_id]; 865 else 866 for (l = 0; l < sq->seq_number; l++) 867 sum += 868 lfactor * omegati[m][l][class] * expect_a[m][l][class][i * 869 mo 870 [class]-> 871 N + 872 j_id]; 873 } 874 /* check for valid new parameter */ 875 a_old[j] = mo[class]->s[i].out_a[j]; 876 if (fabs (lagrangian) == 0.0) 877 a_new[j] = a_old[j]; 878 else 879 a_new[j] = sum / lagrangian; 880 } 881 882 /* update parameter */ 883 for (j = 0; j < mo[class]->s[i].out_states; j++) { 884 mo[class]->s[i].out_a[j] = TRIM (a_old[j], a_new[j]); 885 /*printf("update parameter A[%d] in state %d of model %d with: %5.3g", 886 j, i, class, mo[class]->s[i].out_a[j]); 887 printf(" (%5.3g) old value, %5.3g estimted\n", a_old[j], a_new[j]); */ 888 889 /* mirror out_a to corresponding in_a */ 890 j_id = mo[class]->s[i].out_id[j]; 891 for (g = 0; g < mo[class]->s[j_id].in_states; g++) 892 if (i == mo[class]->s[j_id].in_id[g]) { 893 mo[class]->s[j_id].in_a[g] = mo[class]->s[i].out_a[j]; 894 break; 895 } 896 } 897 } 898 899STOP: /* Label STOP from ARRAY_[CM]ALLOC */ 900 m_free (a_old); 901 m_free (a_new); 902 return; 903#undef CUR_PROC 904} 905 906 907/*----------------------------------------------------------------------------*/ 908static void discrime_update_b_closed (model ** mo, sequence_t ** sqs, int noC, 909 int class, double lfactor, 910 double *****expect_b, long double **omega, 911 long double ***omegati) 912{ 913#define CUR_PROC "discrime_update_b_closed" 914 915 int i, h, l, m; 916 917 int hist, size; 918 919 double *b_old = NULL; 920 double *b_new = NULL; 921 922 double sum, lagrangian; 923 924 ARRAY_CALLOC (b_old, mo[class]->M); 925 ARRAY_CALLOC (b_new, mo[class]->M); 926 927 /* itarate over alls state of the k-th model */ 928 for (i = 0; i < mo[class]->N; i++) { 929 if (mo[class]->s[i].fix) 930 continue; 931 932 size = (int) pow (mo[class]->M, mo[class]->s[i].order + 1); 933 for (hist = 0; hist < size; hist += mo[class]->M) { 934 935 /* compute lagrangian multiplier */ 936 lagrangian = 0.0; 937 for (h = hist; h < hist + mo[class]->M; h++) { 938 939 /* iterate over all classes and training sequences */ 940 for (m = 0; m < noC; m++) { 941 if (m == class) 942 for (l = 0; l < sqs[m]->seq_number; l++) 943 lagrangian -= omega[class][l] * expect_b[class][l][class][i][h]; 944 else 945 for (l = 0; l < sqs[m]->seq_number; l++) 946 lagrangian += 947 lfactor * omegati[m][l][class] * expect_b[m][l][class][i][h]; 948 } 949 } 950 951 for (h = hist; h < hist + mo[class]->M; h++) { 952 /* iterate over all classes and training sequences */ 953 sum = 0.0; 954 for (m = 0; m < noC; m++) { 955 if (m == class) 956 for (l = 0; l < sqs[m]->seq_number; l++) 957 sum -= omega[class][l] * expect_b[class][l][class][i][h]; 958 else 959 for (l = 0; l < sqs[m]->seq_number; l++) 960 sum += 961 lfactor * omegati[m][l][class] * expect_b[m][l][class][i][h]; 962 } 963 /* check for valid new parameters */ 964 b_old[h] = mo[class]->s[i].b[h]; 965 if (fabs (lagrangian) == 0.0) 966 b_new[h] = b_old[h]; 967 else 968 b_new[h] = sum / lagrangian; 969 } 970 971 /* update parameters */ 972 for (h = hist; h < hist + mo[class]->M; h++) { 973 mo[class]->s[i].b[h] = TRIM (b_old[h], b_new[h]); 974 /*printf("update parameter B[%d] in state %d of model %d with: %5.3g", 975 h, i, class, mo[class]->s[i].b[h]); 976 printf(" (%5.3g) old value, estimate %5.3g\n", b_old[h], b_new[h]); */ 977 } 978 } 979 } 980 981STOP: /* Label STOP from ARRAY_[CM]ALLOC */ 982 m_free (b_old); 983 m_free (b_new); 984 return; 985#undef CUR_PROC 986} 987 988 989/*----------------------------------------------------------------------------*/ 990static void discrime_find_factor (model * mo, sequence_t ** sqs, int noC, int k, 991 double lfactor, double ****expect_pi, 992 double ****expect_a, double *****expect_b, 993 long double **omega, long double ***omegati) 994{ 995#define CUR_PROC "driscrime_find_factor" 996 997 int h, i, j, l, m; 998 int size, hist, j_id; 999 double self, other; 1000 sequence_t *sq; 1001 1002 /* itarate over all states of the k-th model */ 1003 for (i = 0; i < mo->N; i++) { 1004 /* PI */ 1005 /* iterate over all classes */ 1006 self = other = 0.0; 1007 for (m = 0; m < noC; m++) { 1008 sq = sqs[m]; 1009 /* if current class and class of training sequence match, add the first term */ 1010 if (m == k) 1011 for (l = 0; l < sq->seq_number; l++) 1012 self += omega[k][l] * expect_pi[k][l][k][i]; 1013 /* otherwise the second */ 1014 else 1015 for (l = 0; l < sq->seq_number; l++) 1016 other += lfactor * omegati[m][l][k] * expect_pi[m][l][k][i]; 1017 } 1018 if (self < other) 1019 lfactor *= self / other; 1020 /*printf("PI[%d]: %g - %g = %g \t %g\n", k, self, other, self-other, lfactor);*/ 1021 1022 /* A */ 1023 for (j = 0; j < mo->s[i].out_states; j++) { 1024 j_id = mo->s[i].out_id[j]; 1025 /* iterate over all classes and training sequences */ 1026 self = other = 0.0; 1027 for (m = 0; m < noC; m++) { 1028 sq = sqs[m]; 1029 if (m == k) 1030 for (l = 0; l < sq->seq_number; l++) 1031 self += omega[k][l] * expect_a[k][l][k][i * mo->N + j_id]; 1032 else 1033 for (l = 0; l < sq->seq_number; l++) 1034 other += 1035 lfactor * omegati[m][l][k] * expect_a[m][l][k][i * mo->N + 1036 j_id]; 1037 } 1038 if (self < other) 1039 lfactor *= self / other; 1040 /*printf(" A[%d]: %g - %g = %g \t %g\n", k, self, other, self-other, lfactor);*/ 1041 } 1042 1043 /* B */ 1044 if (mo->s[i].fix) 1045 continue; 1046 1047 size = (int) pow (mo->M, mo->s[i].order + 1); 1048 for (hist = 0; hist < size; hist += mo->M) { 1049 for (h = hist; h < hist + mo->M; h++) { 1050 self = other = 0.0; 1051 /* iterate over all classes and training sequences */ 1052 for (m = 0; m < noC; m++) { 1053 if (m == k) 1054 for (l = 0; l < sqs[m]->seq_number; l++) 1055 self += omega[k][l] * expect_b[k][l][k][i][h]; 1056 else 1057 for (l = 0; l < sqs[m]->seq_number; l++) 1058 other += lfactor * omegati[m][l][k] * expect_b[m][l][k][i][h]; 1059 } 1060 if (self < other) 1061 lfactor *= self / other; 1062 /*printf(" B[%d]: %g - %g = %g \t %g\n", k, self, other, self-other, lfactor);*/ 1063 } 1064 } 1065 } 1066 1067 return; 1068 1069#undef CUR_PROC 1070} 1071 1072 1073 1074/*----------------------------------------------------------------------------*/ 1075static int discrime_onestep (model ** mo, sequence_t ** sqs, int noC, int grad, 1076 int class) 1077{ 1078#define CUR_PROC "driscrime_onestep" 1079 1080 int j, l, retval = -1; 1081 1082 /* expected use of parameter (for partial derivatives) */ 1083 double *****expect_b, ****expect_a, ****expect_pi; 1084 1085 /* matrix of log probabilities */ 1086 double ***log_p; 1087 1088 /* matrix of outer derivatives */ 1089 long double **omega, ***omegati; 1090 1091 long double psum = 0, nsum = 0; 1092 double lfactor; 1093 1094 if (-1 == discrime_galloc (mo, sqs, noC, &expect_b, &expect_a, 1095 &expect_pi, &omega, &omegati, &log_p)) { 1096 printf ("Allocation Error.\n"); 1097 return -1; 1098 } 1099 1100 if (-1 == 1101 discrime_precompute (mo, sqs, noC, expect_b, expect_a, expect_pi, 1102 log_p)) { 1103 printf ("precompute Error.\n"); 1104 goto FREE; 1105 } 1106 1107 if (-1 == discrime_calculate_omega (mo, sqs, noC, omega, omegati, log_p)) { 1108 printf ("omega Error.\n"); 1109 goto FREE; 1110 } 1111 1112 if (grad) { 1113 /* updateing parameters */ 1114 discrime_update_pi_gradient (mo, sqs, noC, class, expect_pi, omega, 1115 omegati); 1116 discrime_update_a_gradient (mo, sqs, noC, class, expect_a, omega, 1117 omegati); 1118 discrime_update_b_gradient (mo, sqs, noC, class, expect_b, omega, 1119 omegati); 1120 } 1121 else { 1122 /* calculate a pleaseant lfactor */ 1123 for (j = 0; j < noC; j++) { 1124 for (l = 0; l < sqs[j]->seq_number; l++) { 1125 if (j == class) 1126 psum += omega[class][l]; 1127 else 1128 nsum += omegati[j][l][class]; 1129 } 1130 } 1131 if (fabs (nsum) > 1e-200 && psum < nsum) 1132 lfactor = (psum / nsum); 1133 else 1134 lfactor = 1.0; 1135 1136 /* determing maximum lfactor */ 1137 discrime_find_factor (mo[class], sqs, noC, class, lfactor, expect_pi, 1138 expect_a, expect_b, omega, omegati); 1139 lfactor *= .99; 1140 printf ("Calculated L: %g\n", lfactor); 1141 1142 /* updating parameters */ 1143 discrime_update_pi_closed (mo, sqs, noC, class, lfactor, expect_pi, omega, 1144 omegati); 1145 discrime_update_a_closed (mo, sqs, noC, class, lfactor, expect_a, omega, 1146 omegati); 1147 discrime_update_b_closed (mo, sqs, noC, class, lfactor, expect_b, omega, 1148 omegati); 1149 } 1150 1151 retval = 0; 1152FREE: 1153 discrime_gfree (mo, sqs, noC, expect_b, expect_a, expect_pi, omega, 1154 omegati, log_p); 1155 return retval; 1156#undef CUR_PROC 1157} 1158 1159 1160/*----------------------------------------------------------------------------*/ 1161int discriminative (model ** mo, sequence_t ** sqs, int noC, int max_steps, 1162 int gradient) 1163{ 1164#define CUR_PROC "driscriminative" 1165 1166 double last_perf, cur_perf; 1167 int retval=-1, last_cer, cur_cer; 1168 1169 double lambda=0.0; 1170 1171 double noiselevel = 0.0667; 1172 1173 int fp, fn, totalseqs = 0, totalsyms = 0; 1174 1175 int *falseP=NULL, *falseN=NULL; 1176 1177 double * prior_backup=NULL; 1178 1179 int i, k, step; 1180 1181 model * last; 1182 1183 ARRAY_CALLOC (falseP, noC); 1184 ARRAY_CALLOC (falseN, noC); 1185 ARRAY_CALLOC (prior_backup, noC); 1186 1187 for (i = 0; i < noC; i++) { 1188 totalseqs += sqs[i]->seq_number; 1189 for (k = 0; k < sqs[i]->seq_number; k++) 1190 totalsyms += sqs[i]->seq_len[k]; 1191 } 1192 1193 /* setting priors from number of training sequences and 1194 remember the old values */ 1195 for (i=0; i<noC; i++) { 1196 prior_backup[i] = mo[i]->prior; 1197 mo[i]->prior = (double)sqs[i]->seq_number / totalseqs; 1198 printf("original prior: %g \t new prior %g\n", prior_backup[i], mo[i]->prior); 1199 } 1200 1201 last_perf = discrime_compute_performance (mo, sqs, noC); 1202 cur_perf = last_perf; 1203 1204 discrime_print_statistics (mo, sqs, noC, falseP, falseN); 1205 fp = fn = 0; 1206 for (i = 0; i < noC; i++) { 1207 printf ("Model %d likelihood: %g, \t false positives: %d\n", i, 1208 model_likelihood (mo[i], sqs[i]), falseP[i]); 1209 fn += falseN[i]; 1210 fp += falseP[i]; 1211 } 1212 printf 1213 ("\n%d false positives and %d false negatives after ML-initialisation; Performance: %g.\n", 1214 fp, fn, last_perf); 1215 1216 last_cer = cur_cer = fp; 1217 1218 for (k = 0; k < noC; k++) { 1219 last = NULL; 1220 step = 0; 1221 if (gradient) 1222 lambda = .3; 1223 1224 do { 1225 if (last) 1226 model_free (&last); 1227 1228 /* save a copy of the currently trained model */ 1229 last = model_copy (mo[k]); 1230 last_cer = cur_cer; 1231 last_perf = cur_perf; 1232 1233 noiselevel /= 1.8; 1234 discrime_lambda = lambda / totalsyms; 1235 1236 printf 1237 ("==============================================================\n"); 1238 printf 1239 ("Optimising class %d, current step %d, lambda: %g noise: %g, gradient: %d\n", 1240 k, step, discrime_lambda, noiselevel, gradient); 1241 1242/* if (gradient) */ 1243/* model_add_noise(mo[k], noiselevel, 0); */ 1244 1245 discrime_onestep (mo, sqs, noC, gradient, k); 1246 1247 cur_perf = discrime_compute_performance (mo, sqs, noC); 1248 discrime_print_statistics (mo, sqs, noC, falseP, falseN); 1249 1250 fp = fn = 0; 1251 for (i = 0; i < noC; i++) { 1252 printf ("Model %d likelihood: %g, \t false positives: %d\n", i, 1253 model_likelihood (mo[i], sqs[i]), falseP[i]); 1254 fn += falseN[i]; 1255 fp += falseP[i]; 1256 } 1257 cur_cer = fp; 1258 printf ("MAP=%12g -> training -> MAP=%12g", last_perf, cur_perf); 1259 printf (" %d false positives, %d false negatives\n", fp, fn); 1260 printf 1261 ("==============================================================\n"); 1262 1263 } while ((last_perf < cur_perf || cur_cer < last_cer) && (step++ < max_steps)); 1264 1265 mo[k] = model_copy (last); 1266 model_free (&last); 1267 cur_perf = last_perf; 1268 cur_cer = last_cer; 1269 } 1270 1271 /* resetting priors to old values */ 1272 for (i = 0; i < noC; i++) 1273 mo[i]->prior = prior_backup[i]; 1274 retval = 0; 1275STOP: /* Label STOP from ARRAY_[CM]ALLOC */ 1276 m_free (prior_backup); 1277 m_free (falseP); 1278 m_free (falseN); 1279 1280 return retval; 1281 1282#undef CUR_PROC 1283}