PageRenderTime 128ms CodeModel.GetById 19ms app.highlight 50ms RepoModel.GetById 0ms app.codeStats 0ms

/ghmm-0.7.0/ghmm/discrime.c

http://github.com/jcnossen/hmmgenefinder
C | 1283 lines | 883 code | 219 blank | 181 comment | 225 complexity | 332c05a6fbdf4aa4414bae754c4cfe3d MD5 | raw file
Possible License(s): GPL-2.0
   1/*******************************************************************************
   2*
   3*       This file is part of the General Hidden Markov Model Library,
   4*       GHMM version __VERSION__, see http://ghmm.org
   5*
   6*       Filename: ghmm/ghmm/discrime.c
   7*       Authors:  Janne Grunau
   8*
   9*       Copyright (C) 1998-2004 Alexander Schliep
  10*       Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln
  11*       Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,
  12*                               Berlin
  13*
  14*       Contact: schliep@ghmm.org
  15*
  16*       This library is free software; you can redistribute it and/or
  17*       modify it under the terms of the GNU Library General Public
  18*       License as published by the Free Software Foundation; either
  19*       version 2 of the License, or (at your option) any later version.
  20*
  21*       This library is distributed in the hope that it will be useful,
  22*       but WITHOUT ANY WARRANTY; without even the implied warranty of
  23*       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  24*       Library General Public License for more details.
  25*
  26*       You should have received a copy of the GNU Library General Public
  27*       License along with this library; if not, write to the Free
  28*       Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  29*
  30*
  31*       This file is version $Revision: 1322 $
  32*                       from $Date: 2005-09-09 12:29:17 +0200 (Fri, 09 Sep 2005) $
  33*             last change by $Author: grunau $.
  34*
  35*******************************************************************************/
  36
  37#include <float.h>
  38#include <stdio.h>
  39#include <stdlib.h>
  40#include <math.h>
  41#include "ghmm.h"
  42#include "mes.h"
  43#include "matrix.h"
  44#include "model.h"
  45#include "foba.h"
  46#include "reestimate.h"
  47#include "gradescent.h"
  48#include "discrime.h"
  49#include <ghmm/internal.h>
  50
  51#define LAMBDA 0.14
  52#define TRIM(o, n) ((1-LAMBDA)*(o) + LAMBDA*(n))
  53
  54#ifdef __STRICT_ANSI__
  55#define logl(A) log(A)
  56#endif
  57#ifdef __STRICT_ANSI__
  58#define expl(A) exp(A)
  59#endif
  60
  61/* forward declaration */
  62static int discrime_galloc (model ** mo, sequence_t ** sqs, int noC,
  63                     double ******matrix_b, double *****matrix_a,
  64                     double *****matrix_pi, long double ***omega,
  65                     long double ****omegati, double ****log_p);
  66
  67static void discrime_gfree (model ** mo, sequence_t ** sqs, int noC,
  68                     double *****matrix_b, double ****matrix_a,
  69                     double ****matrix_pi, long double **omega,
  70                     long double ***omegati, double ***log_p);
  71
  72static void discrime_trim_gradient (double *new, int length);
  73
  74double discrime_lambda = 0.0;
  75double discrime_alpha = 1.0;
  76
  77
  78/*----------------------------------------------------------------------------*/
  79/** allocates memory for m and n matrices: */
  80static int discrime_galloc (model ** mo, sequence_t ** sqs, int noC,
  81                     double ******matrix_b, double *****matrix_a,
  82                     double *****matrix_pi, long double ***omega,
  83                     long double ****omegati, double ****log_p)
  84{
  85#define CUR_PROC "discrime_galloc"
  86
  87  int i, k, l, m;
  88
  89  /* first allocate memory for matrix_b: */
  90  ARRAY_CALLOC (*matrix_b, noC);
  91  for (k = 0; k < noC; k++) {
  92    ARRAY_CALLOC ((*matrix_b)[k], sqs[k]->seq_number);
  93    for (l = 0; l < sqs[k]->seq_number; l++) {
  94      ARRAY_CALLOC ((*matrix_b)[k][l], noC);
  95      for (m = 0; m < noC; m++) {
  96        ARRAY_CALLOC ((*matrix_b)[k][l][m], mo[m]->N);
  97        for (i = 0; i < mo[m]->N; i++)
  98          ARRAY_CALLOC ((*matrix_b)[k][l][m][i], model_ipow (mo[m], mo[m]->M, mo[m]->s[i].order + 1));
  99      }
 100    }
 101  }
 102
 103  /* matrix_a(k,l,i,j) = matrix_a[k][l][i*mo->N + j] */
 104  ARRAY_CALLOC (*matrix_a, noC);
 105  for (k = 0; k < noC; k++) {
 106    ARRAY_CALLOC ((*matrix_a)[k], sqs[k]->seq_number);
 107    for (l = 0; l < sqs[k]->seq_number; l++) {
 108      ARRAY_CALLOC ((*matrix_a)[k][l], noC);
 109      for (m = 0; m < noC; m++)
 110        ARRAY_CALLOC ((*matrix_a)[k][l][m], mo[m]->N * mo[m]->N);
 111    }
 112  }
 113
 114  /* allocate memory for matrix_pi */
 115  ARRAY_CALLOC (*matrix_pi, noC);
 116  for (k = 0; k < noC; k++) {
 117    ARRAY_CALLOC ((*matrix_pi)[k], sqs[k]->seq_number);
 118    for (l = 0; l < sqs[k]->seq_number; l++) {
 119      ARRAY_CALLOC ((*matrix_pi)[k][l], noC);
 120      for (m = 0; m < noC; m++)
 121        ARRAY_CALLOC ((*matrix_pi)[k][l][m], mo[m]->N);
 122    }
 123  }
 124
 125  /* allocate memory for matrices of likelihoods 
 126     log_p[k][l][m] =
 127     log_prob of l-th sequence of k-th class under the m-th model */
 128  ARRAY_CALLOC (*log_p, noC);
 129  for (k = 0; k < noC; k++) {
 130    ARRAY_CALLOC ((*log_p)[k], sqs[k]->seq_number);
 131    for (l = 0; l < sqs[k]->seq_number; l++)
 132      ARRAY_CALLOC ((*log_p)[k][l], noC);
 133  }
 134
 135  /* allocate memory for outer derivatives */
 136  ARRAY_CALLOC (*omega, noC);
 137  for (k = 0; k < noC; k++)
 138    ARRAY_CALLOC ((*omega)[k], sqs[k]->seq_number);
 139
 140  /* allocate memory for omega tilde. NB: size(omega)*noC == size(omegati) */
 141  ARRAY_CALLOC (*omegati, noC);
 142  for (k = 0; k < noC; k++) {
 143    ARRAY_CALLOC ((*omegati)[k], sqs[k]->seq_number);
 144    for (l = 0; l < sqs[k]->seq_number; l++)
 145      ARRAY_CALLOC ((*omegati)[k][l], noC);
 146  }
 147
 148  return 0;
 149STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
 150  discrime_gfree (mo, sqs, noC, *matrix_b, *matrix_a, *matrix_pi,
 151                  *omega, *omegati, *log_p);
 152  return -1;
 153#undef CUR_PROC
 154}
 155
 156
 157/*----------------------------------------------------------------------------*/
 158/** frees memory for expectation matrices for all classes & training sequences*/
 159static void discrime_gfree (model ** mo, sequence_t ** sqs, int noC,
 160                     double *****matrix_b, double ****matrix_a,
 161                     double ****matrix_pi, long double **omega,
 162                     long double ***omegati, double ***log_p)
 163{
 164#define CUR_PROC "discrime_gfree"
 165
 166  int i, k, l, m;
 167
 168  for (k = 0; k < noC; k++) {
 169    for (l = 0; l < sqs[k]->seq_number; l++) {
 170      for (m = 0; m < noC; m++) {
 171        for (i = 0; i < mo[m]->N; i++)
 172          m_free (matrix_b[k][l][m][i]);
 173        m_free (matrix_b[k][l][m]);
 174      }
 175      m_free (matrix_b[k][l]);
 176    }
 177    m_free (matrix_b[k]);
 178  }
 179  m_free (matrix_b);
 180
 181  for (k = 0; k < noC; k++) {
 182    for (l = 0; l < sqs[k]->seq_number; l++) {
 183      for (m = 0; m < noC; m++)
 184        m_free (matrix_a[k][l][m]);
 185      m_free (matrix_a[k][l]);
 186    }
 187    m_free (matrix_a[k]);
 188  }
 189  m_free (matrix_a);
 190
 191  for (k = 0; k < noC; k++) {
 192    for (l = 0; l < sqs[k]->seq_number; l++) {
 193      for (m = 0; m < noC; m++)
 194        m_free (matrix_pi[k][l][m]);
 195      m_free (matrix_pi[k][l]);
 196    }
 197    m_free (matrix_pi[k]);
 198  }
 199  m_free (matrix_pi);
 200
 201  for (k = 0; k < noC; k++) {
 202    for (l = 0; l < sqs[k]->seq_number; l++)
 203      m_free (log_p[k][l]);
 204    m_free (log_p[k]);
 205  }
 206  m_free (log_p);
 207
 208  for (k = 0; k < noC; k++)
 209    m_free (omega[k]);
 210  m_free (omega);
 211
 212  for (k = 0; k < noC; k++) {
 213    for (l = 0; l < sqs[k]->seq_number; l++)
 214      m_free (omegati[k][l]);
 215    m_free (omegati[k]);
 216  }
 217  m_free (omegati);
 218
 219  return;
 220#undef CUR_PROC
 221}
 222
 223
 224/*----------------------------------------------------------------------------*/
 225static int discrime_calculate_omega (model ** mo, sequence_t ** sqs, int noC,
 226                              long double **omega, long double ***omegati,
 227                              double ***log_p)
 228{
 229#define CUR_PROC "discrime_calculate_omega"
 230  int k, l, m;
 231  int seq_no, argmax = 0;
 232  double sum, max;
 233  double exponent;
 234  long double sigmoid;
 235
 236  /* compute outer derivatives */
 237  /* iterate over all classes */
 238  for (k = 0; k < noC; k++) {
 239    seq_no = sqs[k]->seq_number;
 240    /*iterate over all training sequences */
 241    for (l = 0; l < seq_no; l++) {
 242
 243      max = 1.0;
 244      /* calculate log(sum[prob]) for logarithmic probabilities
 245         first determine the maximum */
 246      for (m = 0; m < noC; m++) {
 247        if (m != k
 248            && (1.0 == max || max < (log_p[k][l][m] + log (mo[m]->prior)))) {
 249          max = log_p[k][l][m] + log (mo[m]->prior);
 250          argmax = m;
 251        }
 252      }
 253
 254      /* sum */
 255      sum = 1.0;
 256      for (m = 0; m < noC; m++) {
 257        if (m != k && m != argmax)
 258          sum += exp (log_p[k][l][m] + log (mo[m]->prior) - max);
 259      }
 260      sum = log (sum) + max;
 261
 262      exponent = log_p[k][l][k] + log (mo[k]->prior) - sum;
 263      if (exponent < logl (LDBL_MIN)) {
 264        printf ("exponent was too large (%g) cut it down!\n", exponent);
 265        exponent = (double) logl (LDBL_MIN);
 266      }
 267
 268      sigmoid = 1.0 / (1.0 + expl ((-discrime_alpha) * exponent));
 269
 270      omega[k][l] = discrime_alpha * sigmoid * (1.0 - sigmoid);
 271/*       printf("omega[%d][%d] = %Lg\n",k ,l, omega[k][l]); */
 272      /* omegati is not useful for m==k equation (2.9) */
 273      for (m = 0; m < noC; m++) {
 274        if (m == k)
 275          omegati[k][l][m] = 0;
 276        else
 277          omegati[k][l][m] =
 278            omega[k][l] * expl (log_p[k][l][m] -
 279                                sum) * (long double) mo[m]->prior;
 280/* 	printf("%Lg, ", expl(log_p[k][l][m] - sum) * (long double)mo[m]->prior); */
 281/* 	printf("omegati[%d][%d][%d] = %Lg\n",k ,l, m, omegati[k][l][m]); */
 282      }
 283/*       printf("\n"); */
 284    }
 285  }
 286  return 0;
 287#undef CUR_PROC
 288}
 289
 290
 291/*----------------------------------------------------------------------------*/
 292static int discrime_precompute (model ** mo, sequence_t ** sqs, int noC,
 293                         double *****expect_b, double ****expect_a,
 294                         double ****expect_pi, double ***log_p)
 295{
 296#define CUR_PROC "discrime_precompute"
 297
 298  int k, l, m;
 299  int seq_len, success = 1;
 300
 301  /* forward, backward variables and scaler */
 302  double **alpha, **beta, *scale;
 303
 304  sequence_t *sq;
 305
 306  /* iterate over all classes */
 307  for (k = 0; k < noC; k++) {
 308
 309    sq = sqs[k];
 310
 311    /*iterate over all training sequences */
 312    for (l = 0; l < sq->seq_number; l++) {
 313
 314      seq_len = sq->seq_len[l];
 315
 316      /* iterate over all classes */
 317      for (m = 0; m < noC; m++) {
 318
 319        if (-1 ==
 320            reestimate_alloc_matvek (&alpha, &beta, &scale, seq_len,
 321                                     mo[m]->N))
 322          return -1;
 323
 324        /* calculate forward and backward variables without labels: */
 325        if (-1 == foba_forward (mo[m], sq->seq[l], seq_len, alpha, scale,
 326                                &(log_p[k][l][m]))) {
 327          success = 0;
 328          printf ("forward\n");
 329          goto FREE;
 330        }
 331        if (-1 == foba_backward (mo[m], sq->seq[l], seq_len, beta, scale)) {
 332          success = 0;
 333          printf ("backward\n");
 334          goto FREE;
 335        }
 336
 337        /* compute expectation matrices
 338           expect_*[k][l][m] holds the expected number how ofter a particular
 339           parameter of model m is used by l-th training sequence of class k */
 340        gradescent_compute_expectations (mo[m], alpha, beta, scale,
 341                                         sq->seq[l], seq_len,
 342                                         expect_b[k][l][m], expect_a[k][l][m],
 343                                         expect_pi[k][l][m]);
 344
 345      FREE:
 346        reestimate_free_matvek (alpha, beta, scale, seq_len);
 347        if (!success)
 348          return -1;
 349      }
 350    }
 351  }
 352  return 0;
 353#undef CUR_PROC
 354}
 355
 356
 357/*----------------------------------------------------------------------------*/
 358double discrime_compute_performance (model ** mo, sequence_t ** sqs, int noC)
 359{
 360#define CUR_PROC "discrime_compute_performance"
 361
 362  int k, l, m, temp;
 363  int argmax = 0;
 364  double sum, max;
 365  double *logp;
 366  double exponent;
 367  long double sigmoid;
 368  double performance = 0.0;
 369
 370  sequence_t *sq;
 371
 372  ARRAY_CALLOC (logp, noC);
 373
 374  /* iterate over all classes */
 375  for (k = 0; k < noC; k++) {
 376    sq = sqs[k];
 377    /*iterate over all training sequences */
 378    for (l = 0; l < sq->seq_number; l++) {
 379
 380      /* iterate over all classes */
 381      for (m = 0; m < noC; m++) {
 382        temp = foba_logp (mo[m], sq->seq[l], sq->seq_len[l], &(logp[m]));
 383        if (0 != temp)
 384          printf ("foba_logp error in sequence[%d][%d] under model %d (%g)\n",
 385                  k, l, m, logp[m]);
 386        /*printf("foba_logp sequence[%d][%d] under model %d (%g)\n", k, l, m, logp[m]);*/
 387      }
 388
 389      max = 1.0;
 390      for (m = 0; m < noC; m++) {
 391        if (m != k && (1.0 == max || max < (logp[m] + log (mo[m]->prior)))) {
 392          max = logp[m] + log (mo[m]->prior);
 393          argmax = m;
 394        }
 395      }
 396
 397      /* sum */
 398      sum = 1.0;
 399      for (m = 0; m < noC; m++) {
 400        if (m != k && m != argmax)
 401          sum += exp (logp[m] + log (mo[m]->prior) - max);
 402      }
 403      sum = log (sum) + max;
 404
 405      exponent = logp[k] + log (mo[k]->prior) - sum;
 406
 407      if (exponent < logl (LDBL_MIN)) {
 408        printf ("exponent was too large (%g) cut it down!\n", exponent);
 409        exponent = (double) logl (LDBL_MIN);
 410      }
 411
 412      sigmoid = 1.0 / (1.0 + expl ((-discrime_alpha) * exponent));
 413
 414      performance += (double) sigmoid;
 415    }
 416  }
 417  m_free (logp);
 418STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
 419  return performance;
 420#undef CUR_PROC
 421}
 422
 423
 424/*----------------------------------------------------------------------------*/
 425static void discrime_print_statistics (model ** mo, sequence_t ** sqs, int noC,
 426                                int *falseP, int *falseN)
 427{
 428#define CUR_PROC "discrime_print_statistics"
 429
 430  int k, l, m;
 431  int argmax;
 432  double *logp, max;
 433
 434  sequence_t *sq;
 435
 436  ARRAY_CALLOC (logp, noC);
 437
 438  for (k = 0; k < noC; k++) {
 439    falseP[k] = 0;
 440    falseN[k] = 0;
 441  }
 442
 443  for (k = 0; k < noC; k++) {
 444    sq = sqs[k];
 445    printf ("Looking at training tokens of Class %d\n", k);
 446    for (l = 0; l < sq->seq_number; l++) {
 447      argmax = 0, max = -DBL_MAX;
 448      for (m = 0; m < noC; m++) {
 449        foba_logp (mo[m], sq->seq[l], sq->seq_len[l], &(logp[m]));
 450        if (m == 0 || max < logp[m]) {
 451          max = logp[m];
 452          argmax = m;
 453        }
 454      }
 455
 456      if (sq->seq_number < 11 && noC < 8) {
 457        /* printing fancy statistics */
 458        printf ("%2d: %8.4g", l, logp[0] - logp[argmax]);
 459        for (m = 1; m < noC; m++)
 460          printf (",  %8.4g", logp[m] - logp[argmax]);
 461        printf ("  \t+(%g)\n", logp[argmax]);
 462      }
 463
 464      /* counting false positives and false negatives */
 465      if (argmax != k) {
 466        falseP[argmax]++;
 467        falseN[k]++;
 468      }
 469    }
 470    printf ("%d false negatives in class %d.\n", falseN[k], k);
 471  }
 472STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
 473  m_free (logp);
 474  return;
 475#undef CUR_PROC
 476}
 477
 478
 479/*----------------------------------------------------------------------------*/
 480static void discrime_trim_gradient (double *new, int length)
 481{
 482#define CUR_PROC "discrime_trim_gradient"
 483
 484  int i, argmin = 0;
 485  double min, sum = 0;
 486
 487/*   printf("unnormalised: %g", new[0]); */
 488/*   for (i=1; i<length; i++) */
 489/*     printf(", %g", new[i]); */
 490/*   printf("\n"); */
 491
 492  for (i = 1; i < length; i++)
 493    if (new[i] < new[argmin])
 494      argmin = i;
 495
 496  min = new[argmin];
 497
 498  if (new[argmin] < 0.0)
 499    for (i = 0; i < length; i++)
 500      new[i] -= (1.1 * min);
 501
 502  for (i = 0; i < length; i++)
 503    sum += new[i];
 504
 505  for (i = 0; i < length; i++)
 506    new[i] /= sum;
 507
 508/*   printf("  normalised: %g", new[0]); */
 509/*   for (i=1; i<length; i++) */
 510/*     printf(", %g", new[i]); */
 511/*   printf("\n"); */
 512
 513  return;
 514#undef CUR_PROC
 515}
 516
 517
 518/*----------------------------------------------------------------------------*/
 519static void discrime_update_pi_gradient (model ** mo, sequence_t ** sqs, int noC,
 520                                  int class, double ****expect_pi,
 521                                  long double **omega, long double ***omegati)
 522{
 523#define CUR_PROC "discrime_update_pi_gradient"
 524
 525  int i, l, m;
 526
 527  double *pi_old = NULL;
 528  double *pi_new = NULL;
 529
 530  double sum;
 531
 532  sequence_t *sq = NULL;
 533
 534  ARRAY_CALLOC (pi_old, mo[class]->N);
 535  ARRAY_CALLOC (pi_new, mo[class]->N);
 536
 537  /* itarate over alls state of the current model */
 538  for (i = 0; i < mo[class]->N; i++) {
 539    /* iterate over all classes */
 540    sum = 0.0;
 541    for (m = 0; m < noC; m++) {
 542      sq = sqs[m];
 543      /*iterate over all training sequences */
 544      if (m == class)
 545        for (l = 0; l < sq->seq_number; l++)
 546          sum += omega[class][l] * expect_pi[class][l][class][i];
 547      else
 548        for (l = 0; l < sq->seq_number; l++)
 549          sum -= omegati[m][l][class] * expect_pi[m][l][class][i];
 550    }
 551    /* check for valid new parameter */
 552    pi_old[i] = mo[class]->s[i].pi;
 553    if (fabs (pi_old[i]) == 0.0)
 554      pi_new[i] = pi_old[i];
 555    else
 556      pi_new[i] = pi_old[i] + discrime_lambda * (sum / pi_old[i]);
 557  }
 558
 559  /* change paramters to fit into valid parameter range */
 560  discrime_trim_gradient (pi_new, mo[class]->N);
 561
 562  /* update parameters */
 563  for (i = 0; i < mo[class]->N; i++) {
 564    /* printf("update parameter Pi in state %d of model %d with: %5.3g",
 565       i, class, pi_new[i]);
 566       printf(" (%5.3g) old value.\n", pi_old[i]); */
 567    mo[class]->s[i].pi = pi_new[i];
 568  }
 569
 570STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
 571  m_free (pi_old);
 572  m_free (pi_new);
 573  return;
 574#undef CUR_PROC
 575}
 576
 577
 578/*----------------------------------------------------------------------------*/
 579static void discrime_update_a_gradient (model ** mo, sequence_t ** sqs, int noC,
 580                                 int class, double ****expect_a,
 581                                 long double **omega, long double ***omegati)
 582{
 583#define CUR_PROC "discrime_update_a_gradient"
 584
 585  int i, j, g, l, m;
 586
 587  int j_id;
 588
 589  double *a_old = NULL;
 590  double *a_new = NULL;
 591
 592  double sum;
 593
 594  sequence_t *sq = NULL;
 595
 596  ARRAY_CALLOC (a_old, mo[class]->N);
 597  ARRAY_CALLOC (a_new, mo[class]->N);
 598
 599  /* updating current class */
 600  /* itarate over all states of the current model */
 601  for (i = 0; i < mo[class]->N; i++) {
 602
 603    for (j = 0; j < mo[class]->s[i].out_states; j++) {
 604      j_id = mo[class]->s[i].out_id[j];
 605      sum = 0.0;
 606      /* iterate over all classes and training sequences */
 607      for (m = 0; m < noC; m++) {
 608        sq = sqs[m];
 609        if (m == class)
 610          for (l = 0; l < sq->seq_number; l++)
 611            sum +=
 612              omega[class][l] * expect_a[class][l][+class][i * mo[class]->N +
 613                                                           j_id];
 614        else
 615          for (l = 0; l < sq->seq_number; l++)
 616            sum -=
 617              omegati[m][l][class] * expect_a[m][l][class][i * mo[class]->N +
 618                                                           j_id];
 619      }
 620      /* check for valid new parameter */
 621      a_old[j] = mo[class]->s[i].out_a[j];
 622      if (fabs (a_old[j]) == 0.0)
 623        a_new[j] = a_old[j];
 624      else
 625        a_new[j] = a_old[j] + discrime_lambda * (sum / a_old[j]);
 626    }
 627
 628    /* change paramters to fit into valid parameter range */
 629    discrime_trim_gradient (a_new, mo[class]->s[i].out_states);
 630
 631    /* update parameter */
 632    for (j = 0; j < mo[class]->s[i].out_states; j++) {
 633      /* printf("update parameter A[%d] in state %d of model %d with: %5.3g",
 634         j, i, class, a_new[j]);
 635         printf(" (%5.3g) old value.\n", a_old[j]); */
 636      mo[class]->s[i].out_a[j] = a_new[j];
 637
 638      /* mirror out_a to corresponding in_a */
 639      j_id = mo[class]->s[i].out_id[j];
 640      for (g = 0; g < mo[class]->s[j_id].in_states; g++)
 641        if (i == mo[class]->s[j_id].in_id[g]) {
 642          mo[class]->s[j_id].in_a[g] = mo[class]->s[i].out_a[j];
 643          break;
 644        }
 645    }
 646  }
 647
 648STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
 649  m_free (a_old);
 650  m_free (a_new);
 651  return;
 652#undef CUR_PROC
 653}
 654
 655
 656/*----------------------------------------------------------------------------*/
 657static void discrime_update_b_gradient (model ** mo, sequence_t ** sqs, int noC,
 658                                 int class, double *****expect_b,
 659                                 long double **omega, long double ***omegati)
 660{
 661#define CUR_PROC "discrime_update_b_gradient"
 662
 663  int i, h, l, m;
 664
 665  int hist, size;
 666
 667  double *b_old = NULL;
 668  double *b_new = NULL;
 669
 670  double sum;
 671
 672  ARRAY_CALLOC (b_old, mo[class]->M);
 673  ARRAY_CALLOC (b_new, mo[class]->M);
 674
 675  /* updating current class */
 676
 677  /* itarate over alls state of the current model */
 678  for (i = 0; i < mo[class]->N; i++) {
 679    if (mo[class]->s[i].fix)
 680      continue;
 681
 682    size = (int) pow (mo[class]->M, mo[class]->s[i].order + 1);
 683    for (hist = 0; hist < size; hist += mo[class]->M) {
 684
 685      for (h = hist; h < hist + mo[class]->M; h++) {
 686
 687        /* iterate over all classes and training sequences */
 688        sum = 0.0;
 689        for (m = 0; m < noC; m++) {
 690          if (m == class)
 691            for (l = 0; l < sqs[m]->seq_number; l++)
 692              sum += omega[class][l] * expect_b[class][l][class][i][h];
 693          else
 694            for (l = 0; l < sqs[m]->seq_number; l++)
 695              sum -= omegati[m][l][class] * expect_b[m][l][class][i][h];
 696        }
 697        /* check for valid new parameters */
 698        b_old[h] = mo[class]->s[i].b[h];
 699        if (fabs (b_old[h]) == 0.0)
 700          b_new[h] = b_old[h];
 701        else
 702          b_new[h] = b_old[h] + discrime_lambda * (sum / b_old[h]);
 703      }
 704
 705      /* change parameters to fit into valid parameter range */
 706      discrime_trim_gradient (b_new, mo[0]->M);
 707
 708      for (h = hist; h < hist + mo[class]->M; h++) {
 709        /* update parameters */
 710        /*printf("update parameter B[%d] in state %d of model %d with: %5.3g",
 711           h, i, class, b_new[h]);
 712           printf(" (%5.3g) old value.\n", b_old[h]); */
 713        mo[class]->s[i].b[h] = b_new[h];
 714      }
 715    }
 716  }
 717
 718STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
 719  m_free (b_old);
 720  m_free (b_new);
 721  return;
 722#undef CUR_PROC
 723}
 724
 725
 726/*----------------------------------------------------------------------------*/
 727static void discrime_update_pi_closed (model ** mo, sequence_t ** sqs, int noC,
 728                                int class, double lfactor,
 729                                double ****expect_pi, long double **omega,
 730                                long double ***omegati)
 731{
 732#define CUR_PROC "discrime_update_pi_closed"
 733
 734  int i, l, m;
 735
 736  double *pi_old = NULL;
 737  double *pi_new = NULL;
 738
 739  double sum, lagrangian;
 740
 741  sequence_t *sq = NULL;
 742
 743  ARRAY_CALLOC (pi_old, mo[class]->N);
 744  ARRAY_CALLOC (pi_new, mo[class]->N);
 745
 746  /* updating current class (all or only specified) */
 747
 748  /* itarate over alls state of the k-th model to compute lagrangian multiplier */
 749  lagrangian = 0.0;
 750  for (i = 0; i < mo[class]->N; i++) {
 751
 752    /* iterate over all classes */
 753    for (m = 0; m < noC; m++) {
 754      sq = sqs[m];
 755      /* if current class and class of training sequence match, add the first term */
 756      if (m == class)
 757        for (l = 0; l < sq->seq_number; l++)
 758          lagrangian -= omega[class][l] * expect_pi[class][l][class][i];
 759      /* otherwise the second */
 760      else
 761        for (l = 0; l < sq->seq_number; l++)
 762          lagrangian +=
 763            lfactor * omegati[m][l][class] * expect_pi[m][l][class][i];
 764    }
 765  }
 766
 767  for (i = 0; i < mo[class]->N; i++) {
 768    /* iterate over all classes */
 769    sum = 0.0;
 770    for (m = 0; m < noC; m++) {
 771      sq = sqs[m];
 772      /*iterate over all training sequences */
 773      if (m == class)
 774        for (l = 0; l < sq->seq_number; l++)
 775          sum -= omega[class][l] * expect_pi[class][l][class][i];
 776      else
 777        for (l = 0; l < sq->seq_number; l++)
 778          sum += lfactor * omegati[m][l][class] * expect_pi[m][l][class][i];
 779    }
 780    /* check for valid new parameter */
 781    pi_old[i] = mo[class]->s[i].pi;
 782    if (fabs (lagrangian) == 0.0)
 783      pi_new[i] = pi_old[i];
 784    else
 785      pi_new[i] = sum / lagrangian;
 786  }
 787
 788  /* update parameters */
 789  for (i = 0; i < mo[class]->N; i++) {
 790    /* printf("update parameter Pi in state %d of model %d with: %5.3g",
 791       i, class, pi_new[i]);
 792       printf(" (%5.3g) old value.\n", pi_old[i]); */
 793    mo[class]->s[i].pi = TRIM (pi_old[i], pi_new[i]);
 794  }
 795
 796STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
 797  m_free (pi_old);
 798  m_free (pi_new);
 799  return;
 800#undef CUR_PROC
 801}
 802
 803
 804/*----------------------------------------------------------------------------*/
 805static void discrime_update_a_closed (model ** mo, sequence_t ** sqs, int noC,
 806                               int class, double lfactor, double ****expect_a,
 807                               long double **omega, long double ***omegati)
 808{
 809#define CUR_PROC "discrime_update_a_closed"
 810
 811  int i, j, g, l, m;
 812
 813  int j_id;
 814
 815  double *a_old = NULL;
 816  double *a_new = NULL;
 817
 818  double sum, lagrangian;
 819
 820  sequence_t *sq = NULL;
 821
 822  ARRAY_CALLOC (a_old, mo[class]->N);
 823  ARRAY_CALLOC (a_new, mo[class]->N);
 824
 825  /* updating current class (all or only specified) */
 826
 827  /* itarate over all states of the k-th model */
 828  for (i = 0; i < mo[class]->N; i++) {
 829
 830    /* compute lagrangian multiplier */
 831    lagrangian = 0.0;
 832    for (j = 0; j < mo[class]->s[i].out_states; j++) {
 833      j_id = mo[class]->s[i].out_id[j];
 834
 835      /* iterate over all classes and training sequences */
 836      for (m = 0; m < noC; m++) {
 837        sq = sqs[m];
 838        if (m == class)
 839          for (l = 0; l < sq->seq_number; l++)
 840            lagrangian -=
 841              omega[class][l] * expect_a[class][l][class][i * mo[class]->N +
 842                                                          j_id];
 843        else
 844          for (l = 0; l < sq->seq_number; l++)
 845            lagrangian +=
 846              lfactor * omegati[m][l][class] * expect_a[m][l][class][i *
 847                                                                     mo
 848                                                                     [class]->
 849                                                                     N +
 850                                                                     j_id];
 851      }
 852    }
 853
 854    for (j = 0; j < mo[class]->s[i].out_states; j++) {
 855      j_id = mo[class]->s[i].out_id[j];
 856      sum = 0.0;
 857      /* iterate over all classes and training sequences */
 858      for (m = 0; m < noC; m++) {
 859        sq = sqs[m];
 860        if (m == class)
 861          for (l = 0; l < sq->seq_number; l++)
 862            sum -=
 863              omega[class][l] * expect_a[class][l][+class][i * mo[class]->N +
 864                                                           j_id];
 865        else
 866          for (l = 0; l < sq->seq_number; l++)
 867            sum +=
 868              lfactor * omegati[m][l][class] * expect_a[m][l][class][i *
 869                                                                     mo
 870                                                                     [class]->
 871                                                                     N +
 872                                                                     j_id];
 873      }
 874      /* check for valid new parameter */
 875      a_old[j] = mo[class]->s[i].out_a[j];
 876      if (fabs (lagrangian) == 0.0)
 877        a_new[j] = a_old[j];
 878      else
 879        a_new[j] = sum / lagrangian;
 880    }
 881
 882    /* update parameter */
 883    for (j = 0; j < mo[class]->s[i].out_states; j++) {
 884      mo[class]->s[i].out_a[j] = TRIM (a_old[j], a_new[j]);
 885      /*printf("update parameter A[%d] in state %d of model %d with: %5.3g",
 886         j, i, class, mo[class]->s[i].out_a[j]);
 887         printf(" (%5.3g) old value, %5.3g estimted\n", a_old[j], a_new[j]); */
 888
 889      /* mirror out_a to corresponding in_a */
 890      j_id = mo[class]->s[i].out_id[j];
 891      for (g = 0; g < mo[class]->s[j_id].in_states; g++)
 892        if (i == mo[class]->s[j_id].in_id[g]) {
 893          mo[class]->s[j_id].in_a[g] = mo[class]->s[i].out_a[j];
 894          break;
 895        }
 896    }
 897  }
 898
 899STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
 900  m_free (a_old);
 901  m_free (a_new);
 902  return;
 903#undef CUR_PROC
 904}
 905
 906
 907/*----------------------------------------------------------------------------*/
 908static void discrime_update_b_closed (model ** mo, sequence_t ** sqs, int noC,
 909                               int class, double lfactor,
 910                               double *****expect_b, long double **omega,
 911                               long double ***omegati)
 912{
 913#define CUR_PROC "discrime_update_b_closed"
 914
 915  int i, h, l, m;
 916
 917  int hist, size;
 918
 919  double *b_old = NULL;
 920  double *b_new = NULL;
 921
 922  double sum, lagrangian;
 923
 924  ARRAY_CALLOC (b_old, mo[class]->M);
 925  ARRAY_CALLOC (b_new, mo[class]->M);
 926
 927  /* itarate over alls state of the k-th model */
 928  for (i = 0; i < mo[class]->N; i++) {
 929    if (mo[class]->s[i].fix)
 930      continue;
 931
 932    size = (int) pow (mo[class]->M, mo[class]->s[i].order + 1);
 933    for (hist = 0; hist < size; hist += mo[class]->M) {
 934
 935      /* compute lagrangian multiplier */
 936      lagrangian = 0.0;
 937      for (h = hist; h < hist + mo[class]->M; h++) {
 938
 939        /* iterate over all classes and training sequences */
 940        for (m = 0; m < noC; m++) {
 941          if (m == class)
 942            for (l = 0; l < sqs[m]->seq_number; l++)
 943              lagrangian -= omega[class][l] * expect_b[class][l][class][i][h];
 944          else
 945            for (l = 0; l < sqs[m]->seq_number; l++)
 946              lagrangian +=
 947                lfactor * omegati[m][l][class] * expect_b[m][l][class][i][h];
 948        }
 949      }
 950
 951      for (h = hist; h < hist + mo[class]->M; h++) {
 952        /* iterate over all classes and training sequences */
 953        sum = 0.0;
 954        for (m = 0; m < noC; m++) {
 955          if (m == class)
 956            for (l = 0; l < sqs[m]->seq_number; l++)
 957              sum -= omega[class][l] * expect_b[class][l][class][i][h];
 958          else
 959            for (l = 0; l < sqs[m]->seq_number; l++)
 960              sum +=
 961                lfactor * omegati[m][l][class] * expect_b[m][l][class][i][h];
 962        }
 963        /* check for valid new parameters */
 964        b_old[h] = mo[class]->s[i].b[h];
 965        if (fabs (lagrangian) == 0.0)
 966          b_new[h] = b_old[h];
 967        else
 968          b_new[h] = sum / lagrangian;
 969      }
 970
 971      /* update parameters */
 972      for (h = hist; h < hist + mo[class]->M; h++) {
 973        mo[class]->s[i].b[h] = TRIM (b_old[h], b_new[h]);
 974        /*printf("update parameter B[%d] in state %d of model %d with: %5.3g",
 975           h, i, class, mo[class]->s[i].b[h]);
 976           printf(" (%5.3g) old value, estimate %5.3g\n", b_old[h], b_new[h]); */
 977      }
 978    }
 979  }
 980
 981STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
 982  m_free (b_old);
 983  m_free (b_new);
 984  return;
 985#undef CUR_PROC
 986}
 987
 988
 989/*----------------------------------------------------------------------------*/
 990static void discrime_find_factor (model * mo, sequence_t ** sqs, int noC, int k,
 991                           double lfactor, double ****expect_pi,
 992                           double ****expect_a, double *****expect_b,
 993                           long double **omega, long double ***omegati)
 994{
 995#define CUR_PROC "driscrime_find_factor"
 996
 997  int h, i, j, l, m;
 998  int size, hist, j_id;
 999  double self, other;
1000  sequence_t *sq;
1001
1002  /* itarate over all states of the k-th model */
1003  for (i = 0; i < mo->N; i++) {
1004    /* PI */
1005    /* iterate over all classes */
1006    self = other = 0.0;
1007    for (m = 0; m < noC; m++) {
1008      sq = sqs[m];
1009      /* if current class and class of training sequence match, add the first term */
1010      if (m == k)
1011        for (l = 0; l < sq->seq_number; l++)
1012          self += omega[k][l] * expect_pi[k][l][k][i];
1013      /* otherwise the second */
1014      else
1015        for (l = 0; l < sq->seq_number; l++)
1016          other += lfactor * omegati[m][l][k] * expect_pi[m][l][k][i];
1017    }
1018    if (self < other)
1019      lfactor *= self / other;
1020    /*printf("PI[%d]: %g - %g = %g \t %g\n", k, self, other, self-other, lfactor);*/
1021
1022    /* A */
1023    for (j = 0; j < mo->s[i].out_states; j++) {
1024      j_id = mo->s[i].out_id[j];
1025      /* iterate over all classes and training sequences */
1026      self = other = 0.0;
1027      for (m = 0; m < noC; m++) {
1028        sq = sqs[m];
1029        if (m == k)
1030          for (l = 0; l < sq->seq_number; l++)
1031            self += omega[k][l] * expect_a[k][l][k][i * mo->N + j_id];
1032        else
1033          for (l = 0; l < sq->seq_number; l++)
1034            other +=
1035              lfactor * omegati[m][l][k] * expect_a[m][l][k][i * mo->N +
1036                                                             j_id];
1037      }
1038      if (self < other)
1039        lfactor *= self / other;
1040      /*printf(" A[%d]: %g - %g = %g \t %g\n", k, self, other, self-other, lfactor);*/
1041    }
1042
1043    /* B */
1044    if (mo->s[i].fix)
1045      continue;
1046
1047    size = (int) pow (mo->M, mo->s[i].order + 1);
1048    for (hist = 0; hist < size; hist += mo->M) {
1049      for (h = hist; h < hist + mo->M; h++) {
1050        self = other = 0.0;
1051        /* iterate over all classes and training sequences */
1052        for (m = 0; m < noC; m++) {
1053          if (m == k)
1054            for (l = 0; l < sqs[m]->seq_number; l++)
1055              self += omega[k][l] * expect_b[k][l][k][i][h];
1056          else
1057            for (l = 0; l < sqs[m]->seq_number; l++)
1058              other += lfactor * omegati[m][l][k] * expect_b[m][l][k][i][h];
1059        }
1060        if (self < other)
1061          lfactor *= self / other;
1062        /*printf(" B[%d]: %g - %g = %g \t %g\n", k, self, other, self-other, lfactor);*/
1063      }
1064    }
1065  }
1066
1067  return;
1068
1069#undef CUR_PROC
1070}
1071
1072
1073
1074/*----------------------------------------------------------------------------*/
1075static int discrime_onestep (model ** mo, sequence_t ** sqs, int noC, int grad,
1076                      int class)
1077{
1078#define CUR_PROC "driscrime_onestep"
1079
1080  int j, l, retval = -1;
1081
1082  /* expected use of parameter (for partial derivatives) */
1083  double *****expect_b, ****expect_a, ****expect_pi;
1084
1085  /* matrix of log probabilities */
1086  double ***log_p;
1087
1088  /* matrix of outer derivatives */
1089  long double **omega, ***omegati;
1090
1091  long double psum = 0, nsum = 0;
1092  double lfactor;
1093
1094  if (-1 == discrime_galloc (mo, sqs, noC, &expect_b, &expect_a,
1095                             &expect_pi, &omega, &omegati, &log_p)) {
1096    printf ("Allocation Error.\n");
1097    return -1;
1098  }
1099
1100  if (-1 ==
1101      discrime_precompute (mo, sqs, noC, expect_b, expect_a, expect_pi,
1102                           log_p)) {
1103    printf ("precompute Error.\n");
1104    goto FREE;
1105  }
1106
1107  if (-1 == discrime_calculate_omega (mo, sqs, noC, omega, omegati, log_p)) {
1108    printf ("omega Error.\n");
1109    goto FREE;
1110  }
1111
1112  if (grad) {
1113    /* updateing parameters */
1114    discrime_update_pi_gradient (mo, sqs, noC, class, expect_pi, omega,
1115                                 omegati);
1116    discrime_update_a_gradient (mo, sqs, noC, class, expect_a, omega,
1117                                omegati);
1118    discrime_update_b_gradient (mo, sqs, noC, class, expect_b, omega,
1119                                omegati);
1120  }
1121  else {
1122    /* calculate a pleaseant lfactor */
1123    for (j = 0; j < noC; j++) {
1124      for (l = 0; l < sqs[j]->seq_number; l++) {
1125        if (j == class)
1126          psum += omega[class][l];
1127        else
1128          nsum += omegati[j][l][class];
1129      }
1130    }
1131    if (fabs (nsum) > 1e-200 && psum < nsum)
1132      lfactor = (psum / nsum);
1133    else
1134      lfactor = 1.0;
1135
1136    /* determing maximum lfactor */
1137    discrime_find_factor (mo[class], sqs, noC, class, lfactor, expect_pi,
1138                          expect_a, expect_b, omega, omegati);
1139    lfactor *= .99;
1140    printf ("Calculated L: %g\n", lfactor);
1141
1142    /* updating parameters */
1143    discrime_update_pi_closed (mo, sqs, noC, class, lfactor, expect_pi, omega,
1144                               omegati);
1145    discrime_update_a_closed (mo, sqs, noC, class, lfactor, expect_a, omega,
1146                              omegati);
1147    discrime_update_b_closed (mo, sqs, noC, class, lfactor, expect_b, omega,
1148                              omegati);
1149  }
1150
1151  retval = 0;
1152FREE:
1153  discrime_gfree (mo, sqs, noC, expect_b, expect_a, expect_pi, omega,
1154                  omegati, log_p);
1155  return retval;
1156#undef CUR_PROC
1157}
1158
1159
1160/*----------------------------------------------------------------------------*/
1161int discriminative (model ** mo, sequence_t ** sqs, int noC, int max_steps, 
1162		    int gradient)
1163{
1164#define CUR_PROC "driscriminative"
1165
1166  double last_perf, cur_perf;
1167  int retval=-1, last_cer, cur_cer;
1168
1169  double lambda=0.0;
1170
1171  double noiselevel = 0.0667;
1172
1173  int fp, fn, totalseqs = 0, totalsyms = 0;
1174
1175  int *falseP=NULL, *falseN=NULL;
1176
1177  double * prior_backup=NULL;
1178
1179  int i, k, step;
1180
1181  model * last;
1182
1183  ARRAY_CALLOC (falseP, noC);
1184  ARRAY_CALLOC (falseN, noC);
1185  ARRAY_CALLOC (prior_backup, noC);
1186
1187  for (i = 0; i < noC; i++) {
1188    totalseqs += sqs[i]->seq_number;
1189    for (k = 0; k < sqs[i]->seq_number; k++)
1190      totalsyms += sqs[i]->seq_len[k];
1191  }
1192  
1193  /* setting priors from number of training sequences and
1194     remember the old values */
1195  for (i=0; i<noC; i++) {
1196    prior_backup[i] = mo[i]->prior;
1197    mo[i]->prior = (double)sqs[i]->seq_number / totalseqs;
1198    printf("original prior: %g \t new prior %g\n", prior_backup[i], mo[i]->prior);
1199  }
1200
1201  last_perf = discrime_compute_performance (mo, sqs, noC);
1202  cur_perf = last_perf;
1203
1204  discrime_print_statistics (mo, sqs, noC, falseP, falseN);
1205  fp = fn = 0;
1206  for (i = 0; i < noC; i++) {
1207    printf ("Model %d likelihood: %g, \t false positives: %d\n", i,
1208            model_likelihood (mo[i], sqs[i]), falseP[i]);
1209    fn += falseN[i];
1210    fp += falseP[i];
1211  }
1212  printf
1213    ("\n%d false positives and %d false negatives after ML-initialisation; Performance: %g.\n",
1214     fp, fn, last_perf);
1215
1216  last_cer = cur_cer = fp;
1217
1218  for (k = 0; k < noC; k++) {
1219    last = NULL;
1220    step = 0;
1221    if (gradient)
1222      lambda = .3;
1223
1224    do {
1225      if (last)
1226        model_free (&last);
1227
1228      /* save a copy of the currently trained model */
1229      last = model_copy (mo[k]);
1230      last_cer = cur_cer;
1231      last_perf = cur_perf;
1232
1233      noiselevel /= 1.8;
1234      discrime_lambda = lambda / totalsyms;
1235
1236      printf
1237        ("==============================================================\n");
1238      printf
1239        ("Optimising class %d, current step %d, lambda: %g  noise: %g, gradient: %d\n",
1240         k, step, discrime_lambda, noiselevel, gradient);
1241
1242/*       if (gradient)   */
1243/* 	model_add_noise(mo[k], noiselevel, 0); */
1244
1245      discrime_onestep (mo, sqs, noC, gradient, k);
1246
1247      cur_perf = discrime_compute_performance (mo, sqs, noC);
1248      discrime_print_statistics (mo, sqs, noC, falseP, falseN);
1249
1250      fp = fn = 0;
1251      for (i = 0; i < noC; i++) {
1252        printf ("Model %d likelihood: %g, \t false positives: %d\n", i,
1253                model_likelihood (mo[i], sqs[i]), falseP[i]);
1254        fn += falseN[i];
1255        fp += falseP[i];
1256      }
1257      cur_cer = fp;
1258      printf ("MAP=%12g -> training -> MAP=%12g", last_perf, cur_perf);
1259      printf ("  %d false positives, %d false negatives\n", fp, fn);
1260      printf
1261        ("==============================================================\n");
1262
1263    } while ((last_perf < cur_perf || cur_cer < last_cer) && (step++ < max_steps));
1264
1265    mo[k] = model_copy (last);
1266    model_free (&last);
1267    cur_perf = last_perf;
1268    cur_cer = last_cer;
1269  }
1270
1271  /* resetting priors to old values */
1272  for (i = 0; i < noC; i++)
1273    mo[i]->prior = prior_backup[i];
1274  retval = 0;
1275STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
1276  m_free (prior_backup);
1277  m_free (falseP);
1278  m_free (falseN);
1279
1280  return retval;
1281
1282#undef CUR_PROC
1283}