/ghmm-0.7.0/ghmm/discrime.c
http://github.com/jcnossen/hmmgenefinder · C · 1283 lines · 883 code · 219 blank · 181 comment · 225 complexity · 332c05a6fbdf4aa4414bae754c4cfe3d MD5 · raw file
- /*******************************************************************************
- *
- * This file is part of the General Hidden Markov Model Library,
- * GHMM version __VERSION__, see http://ghmm.org
- *
- * Filename: ghmm/ghmm/discrime.c
- * Authors: Janne Grunau
- *
- * Copyright (C) 1998-2004 Alexander Schliep
- * Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln
- * Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,
- * Berlin
- *
- * Contact: schliep@ghmm.org
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the Free
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *
- * This file is version $Revision: 1322 $
- * from $Date: 2005-09-09 12:29:17 +0200 (Fri, 09 Sep 2005) $
- * last change by $Author: grunau $.
- *
- *******************************************************************************/
- #include <float.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <math.h>
- #include "ghmm.h"
- #include "mes.h"
- #include "matrix.h"
- #include "model.h"
- #include "foba.h"
- #include "reestimate.h"
- #include "gradescent.h"
- #include "discrime.h"
- #include <ghmm/internal.h>
- #define LAMBDA 0.14
- #define TRIM(o, n) ((1-LAMBDA)*(o) + LAMBDA*(n))
- #ifdef __STRICT_ANSI__
- #define logl(A) log(A)
- #endif
- #ifdef __STRICT_ANSI__
- #define expl(A) exp(A)
- #endif
- /* forward declaration */
- static int discrime_galloc (model ** mo, sequence_t ** sqs, int noC,
- double ******matrix_b, double *****matrix_a,
- double *****matrix_pi, long double ***omega,
- long double ****omegati, double ****log_p);
- static void discrime_gfree (model ** mo, sequence_t ** sqs, int noC,
- double *****matrix_b, double ****matrix_a,
- double ****matrix_pi, long double **omega,
- long double ***omegati, double ***log_p);
- static void discrime_trim_gradient (double *new, int length);
- double discrime_lambda = 0.0;
- double discrime_alpha = 1.0;
- /*----------------------------------------------------------------------------*/
- /** allocates memory for m and n matrices: */
- static int discrime_galloc (model ** mo, sequence_t ** sqs, int noC,
- double ******matrix_b, double *****matrix_a,
- double *****matrix_pi, long double ***omega,
- long double ****omegati, double ****log_p)
- {
- #define CUR_PROC "discrime_galloc"
- int i, k, l, m;
- /* first allocate memory for matrix_b: */
- ARRAY_CALLOC (*matrix_b, noC);
- for (k = 0; k < noC; k++) {
- ARRAY_CALLOC ((*matrix_b)[k], sqs[k]->seq_number);
- for (l = 0; l < sqs[k]->seq_number; l++) {
- ARRAY_CALLOC ((*matrix_b)[k][l], noC);
- for (m = 0; m < noC; m++) {
- ARRAY_CALLOC ((*matrix_b)[k][l][m], mo[m]->N);
- for (i = 0; i < mo[m]->N; i++)
- ARRAY_CALLOC ((*matrix_b)[k][l][m][i], model_ipow (mo[m], mo[m]->M, mo[m]->s[i].order + 1));
- }
- }
- }
- /* matrix_a(k,l,i,j) = matrix_a[k][l][i*mo->N + j] */
- ARRAY_CALLOC (*matrix_a, noC);
- for (k = 0; k < noC; k++) {
- ARRAY_CALLOC ((*matrix_a)[k], sqs[k]->seq_number);
- for (l = 0; l < sqs[k]->seq_number; l++) {
- ARRAY_CALLOC ((*matrix_a)[k][l], noC);
- for (m = 0; m < noC; m++)
- ARRAY_CALLOC ((*matrix_a)[k][l][m], mo[m]->N * mo[m]->N);
- }
- }
- /* allocate memory for matrix_pi */
- ARRAY_CALLOC (*matrix_pi, noC);
- for (k = 0; k < noC; k++) {
- ARRAY_CALLOC ((*matrix_pi)[k], sqs[k]->seq_number);
- for (l = 0; l < sqs[k]->seq_number; l++) {
- ARRAY_CALLOC ((*matrix_pi)[k][l], noC);
- for (m = 0; m < noC; m++)
- ARRAY_CALLOC ((*matrix_pi)[k][l][m], mo[m]->N);
- }
- }
- /* allocate memory for matrices of likelihoods
- log_p[k][l][m] =
- log_prob of l-th sequence of k-th class under the m-th model */
- ARRAY_CALLOC (*log_p, noC);
- for (k = 0; k < noC; k++) {
- ARRAY_CALLOC ((*log_p)[k], sqs[k]->seq_number);
- for (l = 0; l < sqs[k]->seq_number; l++)
- ARRAY_CALLOC ((*log_p)[k][l], noC);
- }
- /* allocate memory for outer derivatives */
- ARRAY_CALLOC (*omega, noC);
- for (k = 0; k < noC; k++)
- ARRAY_CALLOC ((*omega)[k], sqs[k]->seq_number);
- /* allocate memory for omega tilde. NB: size(omega)*noC == size(omegati) */
- ARRAY_CALLOC (*omegati, noC);
- for (k = 0; k < noC; k++) {
- ARRAY_CALLOC ((*omegati)[k], sqs[k]->seq_number);
- for (l = 0; l < sqs[k]->seq_number; l++)
- ARRAY_CALLOC ((*omegati)[k][l], noC);
- }
- return 0;
- STOP: /* Label STOP from ARRAY_[CM]ALLOC */
- discrime_gfree (mo, sqs, noC, *matrix_b, *matrix_a, *matrix_pi,
- *omega, *omegati, *log_p);
- return -1;
- #undef CUR_PROC
- }
- /*----------------------------------------------------------------------------*/
- /** frees memory for expectation matrices for all classes & training sequences*/
- static void discrime_gfree (model ** mo, sequence_t ** sqs, int noC,
- double *****matrix_b, double ****matrix_a,
- double ****matrix_pi, long double **omega,
- long double ***omegati, double ***log_p)
- {
- #define CUR_PROC "discrime_gfree"
- int i, k, l, m;
- for (k = 0; k < noC; k++) {
- for (l = 0; l < sqs[k]->seq_number; l++) {
- for (m = 0; m < noC; m++) {
- for (i = 0; i < mo[m]->N; i++)
- m_free (matrix_b[k][l][m][i]);
- m_free (matrix_b[k][l][m]);
- }
- m_free (matrix_b[k][l]);
- }
- m_free (matrix_b[k]);
- }
- m_free (matrix_b);
- for (k = 0; k < noC; k++) {
- for (l = 0; l < sqs[k]->seq_number; l++) {
- for (m = 0; m < noC; m++)
- m_free (matrix_a[k][l][m]);
- m_free (matrix_a[k][l]);
- }
- m_free (matrix_a[k]);
- }
- m_free (matrix_a);
- for (k = 0; k < noC; k++) {
- for (l = 0; l < sqs[k]->seq_number; l++) {
- for (m = 0; m < noC; m++)
- m_free (matrix_pi[k][l][m]);
- m_free (matrix_pi[k][l]);
- }
- m_free (matrix_pi[k]);
- }
- m_free (matrix_pi);
- for (k = 0; k < noC; k++) {
- for (l = 0; l < sqs[k]->seq_number; l++)
- m_free (log_p[k][l]);
- m_free (log_p[k]);
- }
- m_free (log_p);
- for (k = 0; k < noC; k++)
- m_free (omega[k]);
- m_free (omega);
- for (k = 0; k < noC; k++) {
- for (l = 0; l < sqs[k]->seq_number; l++)
- m_free (omegati[k][l]);
- m_free (omegati[k]);
- }
- m_free (omegati);
- return;
- #undef CUR_PROC
- }
- /*----------------------------------------------------------------------------*/
- static int discrime_calculate_omega (model ** mo, sequence_t ** sqs, int noC,
- long double **omega, long double ***omegati,
- double ***log_p)
- {
- #define CUR_PROC "discrime_calculate_omega"
- int k, l, m;
- int seq_no, argmax = 0;
- double sum, max;
- double exponent;
- long double sigmoid;
- /* compute outer derivatives */
- /* iterate over all classes */
- for (k = 0; k < noC; k++) {
- seq_no = sqs[k]->seq_number;
- /*iterate over all training sequences */
- for (l = 0; l < seq_no; l++) {
- max = 1.0;
- /* calculate log(sum[prob]) for logarithmic probabilities
- first determine the maximum */
- for (m = 0; m < noC; m++) {
- if (m != k
- && (1.0 == max || max < (log_p[k][l][m] + log (mo[m]->prior)))) {
- max = log_p[k][l][m] + log (mo[m]->prior);
- argmax = m;
- }
- }
- /* sum */
- sum = 1.0;
- for (m = 0; m < noC; m++) {
- if (m != k && m != argmax)
- sum += exp (log_p[k][l][m] + log (mo[m]->prior) - max);
- }
- sum = log (sum) + max;
- exponent = log_p[k][l][k] + log (mo[k]->prior) - sum;
- if (exponent < logl (LDBL_MIN)) {
- printf ("exponent was too large (%g) cut it down!\n", exponent);
- exponent = (double) logl (LDBL_MIN);
- }
- sigmoid = 1.0 / (1.0 + expl ((-discrime_alpha) * exponent));
- omega[k][l] = discrime_alpha * sigmoid * (1.0 - sigmoid);
- /* printf("omega[%d][%d] = %Lg\n",k ,l, omega[k][l]); */
- /* omegati is not useful for m==k equation (2.9) */
- for (m = 0; m < noC; m++) {
- if (m == k)
- omegati[k][l][m] = 0;
- else
- omegati[k][l][m] =
- omega[k][l] * expl (log_p[k][l][m] -
- sum) * (long double) mo[m]->prior;
- /* printf("%Lg, ", expl(log_p[k][l][m] - sum) * (long double)mo[m]->prior); */
- /* printf("omegati[%d][%d][%d] = %Lg\n",k ,l, m, omegati[k][l][m]); */
- }
- /* printf("\n"); */
- }
- }
- return 0;
- #undef CUR_PROC
- }
- /*----------------------------------------------------------------------------*/
- static int discrime_precompute (model ** mo, sequence_t ** sqs, int noC,
- double *****expect_b, double ****expect_a,
- double ****expect_pi, double ***log_p)
- {
- #define CUR_PROC "discrime_precompute"
- int k, l, m;
- int seq_len, success = 1;
- /* forward, backward variables and scaler */
- double **alpha, **beta, *scale;
- sequence_t *sq;
- /* iterate over all classes */
- for (k = 0; k < noC; k++) {
- sq = sqs[k];
- /*iterate over all training sequences */
- for (l = 0; l < sq->seq_number; l++) {
- seq_len = sq->seq_len[l];
- /* iterate over all classes */
- for (m = 0; m < noC; m++) {
- if (-1 ==
- reestimate_alloc_matvek (&alpha, &beta, &scale, seq_len,
- mo[m]->N))
- return -1;
- /* calculate forward and backward variables without labels: */
- if (-1 == foba_forward (mo[m], sq->seq[l], seq_len, alpha, scale,
- &(log_p[k][l][m]))) {
- success = 0;
- printf ("forward\n");
- goto FREE;
- }
- if (-1 == foba_backward (mo[m], sq->seq[l], seq_len, beta, scale)) {
- success = 0;
- printf ("backward\n");
- goto FREE;
- }
- /* compute expectation matrices
- expect_*[k][l][m] holds the expected number how ofter a particular
- parameter of model m is used by l-th training sequence of class k */
- gradescent_compute_expectations (mo[m], alpha, beta, scale,
- sq->seq[l], seq_len,
- expect_b[k][l][m], expect_a[k][l][m],
- expect_pi[k][l][m]);
- FREE:
- reestimate_free_matvek (alpha, beta, scale, seq_len);
- if (!success)
- return -1;
- }
- }
- }
- return 0;
- #undef CUR_PROC
- }
- /*----------------------------------------------------------------------------*/
- double discrime_compute_performance (model ** mo, sequence_t ** sqs, int noC)
- {
- #define CUR_PROC "discrime_compute_performance"
- int k, l, m, temp;
- int argmax = 0;
- double sum, max;
- double *logp;
- double exponent;
- long double sigmoid;
- double performance = 0.0;
- sequence_t *sq;
- ARRAY_CALLOC (logp, noC);
- /* iterate over all classes */
- for (k = 0; k < noC; k++) {
- sq = sqs[k];
- /*iterate over all training sequences */
- for (l = 0; l < sq->seq_number; l++) {
- /* iterate over all classes */
- for (m = 0; m < noC; m++) {
- temp = foba_logp (mo[m], sq->seq[l], sq->seq_len[l], &(logp[m]));
- if (0 != temp)
- printf ("foba_logp error in sequence[%d][%d] under model %d (%g)\n",
- k, l, m, logp[m]);
- /*printf("foba_logp sequence[%d][%d] under model %d (%g)\n", k, l, m, logp[m]);*/
- }
- max = 1.0;
- for (m = 0; m < noC; m++) {
- if (m != k && (1.0 == max || max < (logp[m] + log (mo[m]->prior)))) {
- max = logp[m] + log (mo[m]->prior);
- argmax = m;
- }
- }
- /* sum */
- sum = 1.0;
- for (m = 0; m < noC; m++) {
- if (m != k && m != argmax)
- sum += exp (logp[m] + log (mo[m]->prior) - max);
- }
- sum = log (sum) + max;
- exponent = logp[k] + log (mo[k]->prior) - sum;
- if (exponent < logl (LDBL_MIN)) {
- printf ("exponent was too large (%g) cut it down!\n", exponent);
- exponent = (double) logl (LDBL_MIN);
- }
- sigmoid = 1.0 / (1.0 + expl ((-discrime_alpha) * exponent));
- performance += (double) sigmoid;
- }
- }
- m_free (logp);
- STOP: /* Label STOP from ARRAY_[CM]ALLOC */
- return performance;
- #undef CUR_PROC
- }
- /*----------------------------------------------------------------------------*/
- static void discrime_print_statistics (model ** mo, sequence_t ** sqs, int noC,
- int *falseP, int *falseN)
- {
- #define CUR_PROC "discrime_print_statistics"
- int k, l, m;
- int argmax;
- double *logp, max;
- sequence_t *sq;
- ARRAY_CALLOC (logp, noC);
- for (k = 0; k < noC; k++) {
- falseP[k] = 0;
- falseN[k] = 0;
- }
- for (k = 0; k < noC; k++) {
- sq = sqs[k];
- printf ("Looking at training tokens of Class %d\n", k);
- for (l = 0; l < sq->seq_number; l++) {
- argmax = 0, max = -DBL_MAX;
- for (m = 0; m < noC; m++) {
- foba_logp (mo[m], sq->seq[l], sq->seq_len[l], &(logp[m]));
- if (m == 0 || max < logp[m]) {
- max = logp[m];
- argmax = m;
- }
- }
- if (sq->seq_number < 11 && noC < 8) {
- /* printing fancy statistics */
- printf ("%2d: %8.4g", l, logp[0] - logp[argmax]);
- for (m = 1; m < noC; m++)
- printf (", %8.4g", logp[m] - logp[argmax]);
- printf (" \t+(%g)\n", logp[argmax]);
- }
- /* counting false positives and false negatives */
- if (argmax != k) {
- falseP[argmax]++;
- falseN[k]++;
- }
- }
- printf ("%d false negatives in class %d.\n", falseN[k], k);
- }
- STOP: /* Label STOP from ARRAY_[CM]ALLOC */
- m_free (logp);
- return;
- #undef CUR_PROC
- }
- /*----------------------------------------------------------------------------*/
- static void discrime_trim_gradient (double *new, int length)
- {
- #define CUR_PROC "discrime_trim_gradient"
- int i, argmin = 0;
- double min, sum = 0;
- /* printf("unnormalised: %g", new[0]); */
- /* for (i=1; i<length; i++) */
- /* printf(", %g", new[i]); */
- /* printf("\n"); */
- for (i = 1; i < length; i++)
- if (new[i] < new[argmin])
- argmin = i;
- min = new[argmin];
- if (new[argmin] < 0.0)
- for (i = 0; i < length; i++)
- new[i] -= (1.1 * min);
- for (i = 0; i < length; i++)
- sum += new[i];
- for (i = 0; i < length; i++)
- new[i] /= sum;
- /* printf(" normalised: %g", new[0]); */
- /* for (i=1; i<length; i++) */
- /* printf(", %g", new[i]); */
- /* printf("\n"); */
- return;
- #undef CUR_PROC
- }
- /*----------------------------------------------------------------------------*/
- static void discrime_update_pi_gradient (model ** mo, sequence_t ** sqs, int noC,
- int class, double ****expect_pi,
- long double **omega, long double ***omegati)
- {
- #define CUR_PROC "discrime_update_pi_gradient"
- int i, l, m;
- double *pi_old = NULL;
- double *pi_new = NULL;
- double sum;
- sequence_t *sq = NULL;
- ARRAY_CALLOC (pi_old, mo[class]->N);
- ARRAY_CALLOC (pi_new, mo[class]->N);
- /* itarate over alls state of the current model */
- for (i = 0; i < mo[class]->N; i++) {
- /* iterate over all classes */
- sum = 0.0;
- for (m = 0; m < noC; m++) {
- sq = sqs[m];
- /*iterate over all training sequences */
- if (m == class)
- for (l = 0; l < sq->seq_number; l++)
- sum += omega[class][l] * expect_pi[class][l][class][i];
- else
- for (l = 0; l < sq->seq_number; l++)
- sum -= omegati[m][l][class] * expect_pi[m][l][class][i];
- }
- /* check for valid new parameter */
- pi_old[i] = mo[class]->s[i].pi;
- if (fabs (pi_old[i]) == 0.0)
- pi_new[i] = pi_old[i];
- else
- pi_new[i] = pi_old[i] + discrime_lambda * (sum / pi_old[i]);
- }
- /* change paramters to fit into valid parameter range */
- discrime_trim_gradient (pi_new, mo[class]->N);
- /* update parameters */
- for (i = 0; i < mo[class]->N; i++) {
- /* printf("update parameter Pi in state %d of model %d with: %5.3g",
- i, class, pi_new[i]);
- printf(" (%5.3g) old value.\n", pi_old[i]); */
- mo[class]->s[i].pi = pi_new[i];
- }
- STOP: /* Label STOP from ARRAY_[CM]ALLOC */
- m_free (pi_old);
- m_free (pi_new);
- return;
- #undef CUR_PROC
- }
- /*----------------------------------------------------------------------------*/
- static void discrime_update_a_gradient (model ** mo, sequence_t ** sqs, int noC,
- int class, double ****expect_a,
- long double **omega, long double ***omegati)
- {
- #define CUR_PROC "discrime_update_a_gradient"
- int i, j, g, l, m;
- int j_id;
- double *a_old = NULL;
- double *a_new = NULL;
- double sum;
- sequence_t *sq = NULL;
- ARRAY_CALLOC (a_old, mo[class]->N);
- ARRAY_CALLOC (a_new, mo[class]->N);
- /* updating current class */
- /* itarate over all states of the current model */
- for (i = 0; i < mo[class]->N; i++) {
- for (j = 0; j < mo[class]->s[i].out_states; j++) {
- j_id = mo[class]->s[i].out_id[j];
- sum = 0.0;
- /* iterate over all classes and training sequences */
- for (m = 0; m < noC; m++) {
- sq = sqs[m];
- if (m == class)
- for (l = 0; l < sq->seq_number; l++)
- sum +=
- omega[class][l] * expect_a[class][l][+class][i * mo[class]->N +
- j_id];
- else
- for (l = 0; l < sq->seq_number; l++)
- sum -=
- omegati[m][l][class] * expect_a[m][l][class][i * mo[class]->N +
- j_id];
- }
- /* check for valid new parameter */
- a_old[j] = mo[class]->s[i].out_a[j];
- if (fabs (a_old[j]) == 0.0)
- a_new[j] = a_old[j];
- else
- a_new[j] = a_old[j] + discrime_lambda * (sum / a_old[j]);
- }
- /* change paramters to fit into valid parameter range */
- discrime_trim_gradient (a_new, mo[class]->s[i].out_states);
- /* update parameter */
- for (j = 0; j < mo[class]->s[i].out_states; j++) {
- /* printf("update parameter A[%d] in state %d of model %d with: %5.3g",
- j, i, class, a_new[j]);
- printf(" (%5.3g) old value.\n", a_old[j]); */
- mo[class]->s[i].out_a[j] = a_new[j];
- /* mirror out_a to corresponding in_a */
- j_id = mo[class]->s[i].out_id[j];
- for (g = 0; g < mo[class]->s[j_id].in_states; g++)
- if (i == mo[class]->s[j_id].in_id[g]) {
- mo[class]->s[j_id].in_a[g] = mo[class]->s[i].out_a[j];
- break;
- }
- }
- }
- STOP: /* Label STOP from ARRAY_[CM]ALLOC */
- m_free (a_old);
- m_free (a_new);
- return;
- #undef CUR_PROC
- }
- /*----------------------------------------------------------------------------*/
- static void discrime_update_b_gradient (model ** mo, sequence_t ** sqs, int noC,
- int class, double *****expect_b,
- long double **omega, long double ***omegati)
- {
- #define CUR_PROC "discrime_update_b_gradient"
- int i, h, l, m;
- int hist, size;
- double *b_old = NULL;
- double *b_new = NULL;
- double sum;
- ARRAY_CALLOC (b_old, mo[class]->M);
- ARRAY_CALLOC (b_new, mo[class]->M);
- /* updating current class */
- /* itarate over alls state of the current model */
- for (i = 0; i < mo[class]->N; i++) {
- if (mo[class]->s[i].fix)
- continue;
- size = (int) pow (mo[class]->M, mo[class]->s[i].order + 1);
- for (hist = 0; hist < size; hist += mo[class]->M) {
- for (h = hist; h < hist + mo[class]->M; h++) {
- /* iterate over all classes and training sequences */
- sum = 0.0;
- for (m = 0; m < noC; m++) {
- if (m == class)
- for (l = 0; l < sqs[m]->seq_number; l++)
- sum += omega[class][l] * expect_b[class][l][class][i][h];
- else
- for (l = 0; l < sqs[m]->seq_number; l++)
- sum -= omegati[m][l][class] * expect_b[m][l][class][i][h];
- }
- /* check for valid new parameters */
- b_old[h] = mo[class]->s[i].b[h];
- if (fabs (b_old[h]) == 0.0)
- b_new[h] = b_old[h];
- else
- b_new[h] = b_old[h] + discrime_lambda * (sum / b_old[h]);
- }
- /* change parameters to fit into valid parameter range */
- discrime_trim_gradient (b_new, mo[0]->M);
- for (h = hist; h < hist + mo[class]->M; h++) {
- /* update parameters */
- /*printf("update parameter B[%d] in state %d of model %d with: %5.3g",
- h, i, class, b_new[h]);
- printf(" (%5.3g) old value.\n", b_old[h]); */
- mo[class]->s[i].b[h] = b_new[h];
- }
- }
- }
- STOP: /* Label STOP from ARRAY_[CM]ALLOC */
- m_free (b_old);
- m_free (b_new);
- return;
- #undef CUR_PROC
- }
- /*----------------------------------------------------------------------------*/
- static void discrime_update_pi_closed (model ** mo, sequence_t ** sqs, int noC,
- int class, double lfactor,
- double ****expect_pi, long double **omega,
- long double ***omegati)
- {
- #define CUR_PROC "discrime_update_pi_closed"
- int i, l, m;
- double *pi_old = NULL;
- double *pi_new = NULL;
- double sum, lagrangian;
- sequence_t *sq = NULL;
- ARRAY_CALLOC (pi_old, mo[class]->N);
- ARRAY_CALLOC (pi_new, mo[class]->N);
- /* updating current class (all or only specified) */
- /* itarate over alls state of the k-th model to compute lagrangian multiplier */
- lagrangian = 0.0;
- for (i = 0; i < mo[class]->N; i++) {
- /* iterate over all classes */
- for (m = 0; m < noC; m++) {
- sq = sqs[m];
- /* if current class and class of training sequence match, add the first term */
- if (m == class)
- for (l = 0; l < sq->seq_number; l++)
- lagrangian -= omega[class][l] * expect_pi[class][l][class][i];
- /* otherwise the second */
- else
- for (l = 0; l < sq->seq_number; l++)
- lagrangian +=
- lfactor * omegati[m][l][class] * expect_pi[m][l][class][i];
- }
- }
- for (i = 0; i < mo[class]->N; i++) {
- /* iterate over all classes */
- sum = 0.0;
- for (m = 0; m < noC; m++) {
- sq = sqs[m];
- /*iterate over all training sequences */
- if (m == class)
- for (l = 0; l < sq->seq_number; l++)
- sum -= omega[class][l] * expect_pi[class][l][class][i];
- else
- for (l = 0; l < sq->seq_number; l++)
- sum += lfactor * omegati[m][l][class] * expect_pi[m][l][class][i];
- }
- /* check for valid new parameter */
- pi_old[i] = mo[class]->s[i].pi;
- if (fabs (lagrangian) == 0.0)
- pi_new[i] = pi_old[i];
- else
- pi_new[i] = sum / lagrangian;
- }
- /* update parameters */
- for (i = 0; i < mo[class]->N; i++) {
- /* printf("update parameter Pi in state %d of model %d with: %5.3g",
- i, class, pi_new[i]);
- printf(" (%5.3g) old value.\n", pi_old[i]); */
- mo[class]->s[i].pi = TRIM (pi_old[i], pi_new[i]);
- }
- STOP: /* Label STOP from ARRAY_[CM]ALLOC */
- m_free (pi_old);
- m_free (pi_new);
- return;
- #undef CUR_PROC
- }
- /*----------------------------------------------------------------------------*/
- static void discrime_update_a_closed (model ** mo, sequence_t ** sqs, int noC,
- int class, double lfactor, double ****expect_a,
- long double **omega, long double ***omegati)
- {
- #define CUR_PROC "discrime_update_a_closed"
- int i, j, g, l, m;
- int j_id;
- double *a_old = NULL;
- double *a_new = NULL;
- double sum, lagrangian;
- sequence_t *sq = NULL;
- ARRAY_CALLOC (a_old, mo[class]->N);
- ARRAY_CALLOC (a_new, mo[class]->N);
- /* updating current class (all or only specified) */
- /* itarate over all states of the k-th model */
- for (i = 0; i < mo[class]->N; i++) {
- /* compute lagrangian multiplier */
- lagrangian = 0.0;
- for (j = 0; j < mo[class]->s[i].out_states; j++) {
- j_id = mo[class]->s[i].out_id[j];
- /* iterate over all classes and training sequences */
- for (m = 0; m < noC; m++) {
- sq = sqs[m];
- if (m == class)
- for (l = 0; l < sq->seq_number; l++)
- lagrangian -=
- omega[class][l] * expect_a[class][l][class][i * mo[class]->N +
- j_id];
- else
- for (l = 0; l < sq->seq_number; l++)
- lagrangian +=
- lfactor * omegati[m][l][class] * expect_a[m][l][class][i *
- mo
- [class]->
- N +
- j_id];
- }
- }
- for (j = 0; j < mo[class]->s[i].out_states; j++) {
- j_id = mo[class]->s[i].out_id[j];
- sum = 0.0;
- /* iterate over all classes and training sequences */
- for (m = 0; m < noC; m++) {
- sq = sqs[m];
- if (m == class)
- for (l = 0; l < sq->seq_number; l++)
- sum -=
- omega[class][l] * expect_a[class][l][+class][i * mo[class]->N +
- j_id];
- else
- for (l = 0; l < sq->seq_number; l++)
- sum +=
- lfactor * omegati[m][l][class] * expect_a[m][l][class][i *
- mo
- [class]->
- N +
- j_id];
- }
- /* check for valid new parameter */
- a_old[j] = mo[class]->s[i].out_a[j];
- if (fabs (lagrangian) == 0.0)
- a_new[j] = a_old[j];
- else
- a_new[j] = sum / lagrangian;
- }
- /* update parameter */
- for (j = 0; j < mo[class]->s[i].out_states; j++) {
- mo[class]->s[i].out_a[j] = TRIM (a_old[j], a_new[j]);
- /*printf("update parameter A[%d] in state %d of model %d with: %5.3g",
- j, i, class, mo[class]->s[i].out_a[j]);
- printf(" (%5.3g) old value, %5.3g estimted\n", a_old[j], a_new[j]); */
- /* mirror out_a to corresponding in_a */
- j_id = mo[class]->s[i].out_id[j];
- for (g = 0; g < mo[class]->s[j_id].in_states; g++)
- if (i == mo[class]->s[j_id].in_id[g]) {
- mo[class]->s[j_id].in_a[g] = mo[class]->s[i].out_a[j];
- break;
- }
- }
- }
- STOP: /* Label STOP from ARRAY_[CM]ALLOC */
- m_free (a_old);
- m_free (a_new);
- return;
- #undef CUR_PROC
- }
- /*----------------------------------------------------------------------------*/
- static void discrime_update_b_closed (model ** mo, sequence_t ** sqs, int noC,
- int class, double lfactor,
- double *****expect_b, long double **omega,
- long double ***omegati)
- {
- #define CUR_PROC "discrime_update_b_closed"
- int i, h, l, m;
- int hist, size;
- double *b_old = NULL;
- double *b_new = NULL;
- double sum, lagrangian;
- ARRAY_CALLOC (b_old, mo[class]->M);
- ARRAY_CALLOC (b_new, mo[class]->M);
- /* itarate over alls state of the k-th model */
- for (i = 0; i < mo[class]->N; i++) {
- if (mo[class]->s[i].fix)
- continue;
- size = (int) pow (mo[class]->M, mo[class]->s[i].order + 1);
- for (hist = 0; hist < size; hist += mo[class]->M) {
- /* compute lagrangian multiplier */
- lagrangian = 0.0;
- for (h = hist; h < hist + mo[class]->M; h++) {
- /* iterate over all classes and training sequences */
- for (m = 0; m < noC; m++) {
- if (m == class)
- for (l = 0; l < sqs[m]->seq_number; l++)
- lagrangian -= omega[class][l] * expect_b[class][l][class][i][h];
- else
- for (l = 0; l < sqs[m]->seq_number; l++)
- lagrangian +=
- lfactor * omegati[m][l][class] * expect_b[m][l][class][i][h];
- }
- }
- for (h = hist; h < hist + mo[class]->M; h++) {
- /* iterate over all classes and training sequences */
- sum = 0.0;
- for (m = 0; m < noC; m++) {
- if (m == class)
- for (l = 0; l < sqs[m]->seq_number; l++)
- sum -= omega[class][l] * expect_b[class][l][class][i][h];
- else
- for (l = 0; l < sqs[m]->seq_number; l++)
- sum +=
- lfactor * omegati[m][l][class] * expect_b[m][l][class][i][h];
- }
- /* check for valid new parameters */
- b_old[h] = mo[class]->s[i].b[h];
- if (fabs (lagrangian) == 0.0)
- b_new[h] = b_old[h];
- else
- b_new[h] = sum / lagrangian;
- }
- /* update parameters */
- for (h = hist; h < hist + mo[class]->M; h++) {
- mo[class]->s[i].b[h] = TRIM (b_old[h], b_new[h]);
- /*printf("update parameter B[%d] in state %d of model %d with: %5.3g",
- h, i, class, mo[class]->s[i].b[h]);
- printf(" (%5.3g) old value, estimate %5.3g\n", b_old[h], b_new[h]); */
- }
- }
- }
- STOP: /* Label STOP from ARRAY_[CM]ALLOC */
- m_free (b_old);
- m_free (b_new);
- return;
- #undef CUR_PROC
- }
- /*----------------------------------------------------------------------------*/
- static void discrime_find_factor (model * mo, sequence_t ** sqs, int noC, int k,
- double lfactor, double ****expect_pi,
- double ****expect_a, double *****expect_b,
- long double **omega, long double ***omegati)
- {
- #define CUR_PROC "driscrime_find_factor"
- int h, i, j, l, m;
- int size, hist, j_id;
- double self, other;
- sequence_t *sq;
- /* itarate over all states of the k-th model */
- for (i = 0; i < mo->N; i++) {
- /* PI */
- /* iterate over all classes */
- self = other = 0.0;
- for (m = 0; m < noC; m++) {
- sq = sqs[m];
- /* if current class and class of training sequence match, add the first term */
- if (m == k)
- for (l = 0; l < sq->seq_number; l++)
- self += omega[k][l] * expect_pi[k][l][k][i];
- /* otherwise the second */
- else
- for (l = 0; l < sq->seq_number; l++)
- other += lfactor * omegati[m][l][k] * expect_pi[m][l][k][i];
- }
- if (self < other)
- lfactor *= self / other;
- /*printf("PI[%d]: %g - %g = %g \t %g\n", k, self, other, self-other, lfactor);*/
- /* A */
- for (j = 0; j < mo->s[i].out_states; j++) {
- j_id = mo->s[i].out_id[j];
- /* iterate over all classes and training sequences */
- self = other = 0.0;
- for (m = 0; m < noC; m++) {
- sq = sqs[m];
- if (m == k)
- for (l = 0; l < sq->seq_number; l++)
- self += omega[k][l] * expect_a[k][l][k][i * mo->N + j_id];
- else
- for (l = 0; l < sq->seq_number; l++)
- other +=
- lfactor * omegati[m][l][k] * expect_a[m][l][k][i * mo->N +
- j_id];
- }
- if (self < other)
- lfactor *= self / other;
- /*printf(" A[%d]: %g - %g = %g \t %g\n", k, self, other, self-other, lfactor);*/
- }
- /* B */
- if (mo->s[i].fix)
- continue;
- size = (int) pow (mo->M, mo->s[i].order + 1);
- for (hist = 0; hist < size; hist += mo->M) {
- for (h = hist; h < hist + mo->M; h++) {
- self = other = 0.0;
- /* iterate over all classes and training sequences */
- for (m = 0; m < noC; m++) {
- if (m == k)
- for (l = 0; l < sqs[m]->seq_number; l++)
- self += omega[k][l] * expect_b[k][l][k][i][h];
- else
- for (l = 0; l < sqs[m]->seq_number; l++)
- other += lfactor * omegati[m][l][k] * expect_b[m][l][k][i][h];
- }
- if (self < other)
- lfactor *= self / other;
- /*printf(" B[%d]: %g - %g = %g \t %g\n", k, self, other, self-other, lfactor);*/
- }
- }
- }
- return;
- #undef CUR_PROC
- }
- /*----------------------------------------------------------------------------*/
- static int discrime_onestep (model ** mo, sequence_t ** sqs, int noC, int grad,
- int class)
- {
- #define CUR_PROC "driscrime_onestep"
- int j, l, retval = -1;
- /* expected use of parameter (for partial derivatives) */
- double *****expect_b, ****expect_a, ****expect_pi;
- /* matrix of log probabilities */
- double ***log_p;
- /* matrix of outer derivatives */
- long double **omega, ***omegati;
- long double psum = 0, nsum = 0;
- double lfactor;
- if (-1 == discrime_galloc (mo, sqs, noC, &expect_b, &expect_a,
- &expect_pi, &omega, &omegati, &log_p)) {
- printf ("Allocation Error.\n");
- return -1;
- }
- if (-1 ==
- discrime_precompute (mo, sqs, noC, expect_b, expect_a, expect_pi,
- log_p)) {
- printf ("precompute Error.\n");
- goto FREE;
- }
- if (-1 == discrime_calculate_omega (mo, sqs, noC, omega, omegati, log_p)) {
- printf ("omega Error.\n");
- goto FREE;
- }
- if (grad) {
- /* updateing parameters */
- discrime_update_pi_gradient (mo, sqs, noC, class, expect_pi, omega,
- omegati);
- discrime_update_a_gradient (mo, sqs, noC, class, expect_a, omega,
- omegati);
- discrime_update_b_gradient (mo, sqs, noC, class, expect_b, omega,
- omegati);
- }
- else {
- /* calculate a pleaseant lfactor */
- for (j = 0; j < noC; j++) {
- for (l = 0; l < sqs[j]->seq_number; l++) {
- if (j == class)
- psum += omega[class][l];
- else
- nsum += omegati[j][l][class];
- }
- }
- if (fabs (nsum) > 1e-200 && psum < nsum)
- lfactor = (psum / nsum);
- else
- lfactor = 1.0;
- /* determing maximum lfactor */
- discrime_find_factor (mo[class], sqs, noC, class, lfactor, expect_pi,
- expect_a, expect_b, omega, omegati);
- lfactor *= .99;
- printf ("Calculated L: %g\n", lfactor);
- /* updating parameters */
- discrime_update_pi_closed (mo, sqs, noC, class, lfactor, expect_pi, omega,
- omegati);
- discrime_update_a_closed (mo, sqs, noC, class, lfactor, expect_a, omega,
- omegati);
- discrime_update_b_closed (mo, sqs, noC, class, lfactor, expect_b, omega,
- omegati);
- }
- retval = 0;
- FREE:
- discrime_gfree (mo, sqs, noC, expect_b, expect_a, expect_pi, omega,
- omegati, log_p);
- return retval;
- #undef CUR_PROC
- }
- /*----------------------------------------------------------------------------*/
- int discriminative (model ** mo, sequence_t ** sqs, int noC, int max_steps,
- int gradient)
- {
- #define CUR_PROC "driscriminative"
- double last_perf, cur_perf;
- int retval=-1, last_cer, cur_cer;
- double lambda=0.0;
- double noiselevel = 0.0667;
- int fp, fn, totalseqs = 0, totalsyms = 0;
- int *falseP=NULL, *falseN=NULL;
- double * prior_backup=NULL;
- int i, k, step;
- model * last;
- ARRAY_CALLOC (falseP, noC);
- ARRAY_CALLOC (falseN, noC);
- ARRAY_CALLOC (prior_backup, noC);
- for (i = 0; i < noC; i++) {
- totalseqs += sqs[i]->seq_number;
- for (k = 0; k < sqs[i]->seq_number; k++)
- totalsyms += sqs[i]->seq_len[k];
- }
-
- /* setting priors from number of training sequences and
- remember the old values */
- for (i=0; i<noC; i++) {
- prior_backup[i] = mo[i]->prior;
- mo[i]->prior = (double)sqs[i]->seq_number / totalseqs;
- printf("original prior: %g \t new prior %g\n", prior_backup[i], mo[i]->prior);
- }
- last_perf = discrime_compute_performance (mo, sqs, noC);
- cur_perf = last_perf;
- discrime_print_statistics (mo, sqs, noC, falseP, falseN);
- fp = fn = 0;
- for (i = 0; i < noC; i++) {
- printf ("Model %d likelihood: %g, \t false positives: %d\n", i,
- model_likelihood (mo[i], sqs[i]), falseP[i]);
- fn += falseN[i];
- fp += falseP[i];
- }
- printf
- ("\n%d false positives and %d false negatives after ML-initialisation; Performance: %g.\n",
- fp, fn, last_perf);
- last_cer = cur_cer = fp;
- for (k = 0; k < noC; k++) {
- last = NULL;
- step = 0;
- if (gradient)
- lambda = .3;
- do {
- if (last)
- model_free (&last);
- /* save a copy of the currently trained model */
- last = model_copy (mo[k]);
- last_cer = cur_cer;
- last_perf = cur_perf;
- noiselevel /= 1.8;
- discrime_lambda = lambda / totalsyms;
- printf
- ("==============================================================\n");
- printf
- ("Optimising class %d, current step %d, lambda: %g noise: %g, gradient: %d\n",
- k, step, discrime_lambda, noiselevel, gradient);
- /* if (gradient) */
- /* model_add_noise(mo[k], noiselevel, 0); */
- discrime_onestep (mo, sqs, noC, gradient, k);
- cur_perf = discrime_compute_performance (mo, sqs, noC);
- discrime_print_statistics (mo, sqs, noC, falseP, falseN);
- fp = fn = 0;
- for (i = 0; i < noC; i++) {
- printf ("Model %d likelihood: %g, \t false positives: %d\n", i,
- model_likelihood (mo[i], sqs[i]), falseP[i]);
- fn += falseN[i];
- fp += falseP[i];
- }
- cur_cer = fp;
- printf ("MAP=%12g -> training -> MAP=%12g", last_perf, cur_perf);
- printf (" %d false positives, %d false negatives\n", fp, fn);
- printf
- ("==============================================================\n");
- } while ((last_perf < cur_perf || cur_cer < last_cer) && (step++ < max_steps));
- mo[k] = model_copy (last);
- model_free (&last);
- cur_perf = last_perf;
- cur_cer = last_cer;
- }
- /* resetting priors to old values */
- for (i = 0; i < noC; i++)
- mo[i]->prior = prior_backup[i];
- retval = 0;
- STOP: /* Label STOP from ARRAY_[CM]ALLOC */
- m_free (prior_backup);
- m_free (falseP);
- m_free (falseN);
- return retval;
- #undef CUR_PROC
- }