PageRenderTime 308ms CodeModel.GetById 40ms app.highlight 237ms RepoModel.GetById 14ms app.codeStats 1ms

/src/shogun/lib/external/gpdtsolve.cpp

https://github.com/mikelfer/shogun
C++ | 1576 lines | 1232 code | 173 blank | 171 comment | 298 complexity | 88a9d9965d3837311ef9f151d8332442 MD5 | raw file
   1/******************************************************************************
   2 ***        GPDT - Gradient Projection Decomposition Technique              ***
   3 ******************************************************************************
   4 ***                                                                        ***
   5 *** GPDT is a C++ software designed to train large-scale Support Vector    ***
   6 *** Machines for binary classification in both scalar and distributed      ***
   7 *** memory parallel environments. It uses the Joachims' problem            ***
   8 *** decomposition technique to split the whole quadratic programming (QP)  ***
   9 *** problem into a sequence of smaller QP subproblems, each one being      ***
  10 *** solved by a suitable gradient projection method (GPM). The presently   ***
  11 *** implemented GPMs are the Generalized Variable Projection Method        ***
  12 *** GVPM (T. Serafini, G. Zanghirati, L. Zanni, "Gradient Projection       ***
  13 *** Methods for Quadratic Programs and Applications in Training Support    ***
  14 *** Vector Machines"; Optim. Meth. Soft. 20, 2005, 353-378) and the        ***
  15 *** Dai-Fletcher Method DFGPM (Y. Dai and R. Fletcher,"New Algorithms for  ***
  16 *** Singly Linear Constrained Quadratic Programs Subject to Lower and      ***
  17 *** Upper Bounds"; Math. Prog. to appear).                                 ***
  18 ***                                                                        ***
  19 *** Authors:                                                               ***
  20 ***  Thomas Serafini, Luca Zanni                                           ***
  21 ***   Dept. of Mathematics, University of Modena and Reggio Emilia - ITALY ***
  22 ***   serafini.thomas@unimo.it, zanni.luca@unimo.it                        ***
  23 ***  Gaetano Zanghirati                                                    ***
  24 ***   Dept. of Mathematics, University of Ferrara - ITALY                  ***
  25 ***   g.zanghirati@unife.it                                                ***
  26 ***                                                                        ***
  27 *** Software homepage: http://dm.unife.it/gpdt                             ***
  28 ***                                                                        ***
  29 *** This work is supported by the Italian FIRB Projects                    ***
  30 ***      'Statistical Learning: Theory, Algorithms and Applications'       ***
  31 ***      (grant RBAU01877P), http://slipguru.disi.unige.it/ASTA            ***
  32 *** and                                                                    ***
  33 ***      'Parallel Algorithms and Numerical Nonlinear Optimization'        ***
  34 ***      (grant RBAU01JYPN), http://dm.unife.it/pn2o                       ***
  35 ***                                                                        ***
  36 *** Copyright (C) 2004-2008 by T. Serafini, G. Zanghirati, L. Zanni.       ***
  37 ***                                                                        ***
  38 ***                     COPYRIGHT NOTIFICATION                             ***
  39 ***                                                                        ***
  40 *** Permission to copy and modify this software and its documentation      ***
  41 *** for internal research use is granted, provided that this notice is     ***
  42 *** retained thereon and on all copies or modifications. The authors and   ***
  43 *** their respective Universities makes no representations as to the       ***
  44 *** suitability and operability of this software for any purpose. It is    ***
  45 *** provided "as is" without express or implied warranty.                  ***
  46 *** Use of this software for commercial purposes is expressly prohibited   ***
  47 *** without contacting the authors.                                        ***
  48 ***                                                                        ***
  49 *** This program is free software; you can redistribute it and/or modify   ***
  50 *** it under the terms of the GNU General Public License as published by   ***
  51 *** the Free Software Foundation; either version 3 of the License, or      ***
  52 *** (at your option) any later version.                                    ***
  53 ***                                                                        ***
  54 *** This program is distributed in the hope that it will be useful,        ***
  55 *** but WITHOUT ANY WARRANTY; without even the implied warranty of         ***
  56 *** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ***
  57 *** GNU General Public License for more details.                           ***
  58 ***                                                                        ***
  59 *** You should have received a copy of the GNU General Public License      ***
  60 *** along with this program; if not, write to the Free Software            ***
  61 *** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              ***
  62 ***                                                                        ***
  63 *** File:     gpdtsolve.cpp                                                ***
  64 *** Type:     scalar                                                       ***
  65 *** Version:  1.0                                                          ***
  66 *** Date:     November, 2006                                                ***
  67 *** Revision: 2                                                            ***
  68 ***                                                                        ***
  69 *** SHOGUN adaptions  Written (W) 2006-2009 Soeren Sonnenburg              ***
  70 ******************************************************************************/
  71#include <math.h>
  72#include <stdio.h>
  73#include <stdlib.h>
  74#include <string.h>
  75#include <time.h>
  76
  77#include <shogun/lib/external/gpm.h>
  78#include <shogun/lib/external/gpdt.h>
  79#include <shogun/lib/external/gpdtsolve.h>
  80#include <shogun/lib/Signal.h>
  81#include <shogun/io/SGIO.h>
  82
  83using namespace shogun;
  84
  85#ifndef DOXYGEN_SHOULD_SKIP_THIS
  86namespace shogun
  87{
  88#define y_in(i)      y[index_in[(i)]]
  89#define y_out(i)     y[index_out[(i)]]
  90#define alpha_in(i)  alpha[index_in[(i)]]
  91#define alpha_out(i) alpha[index_out[(i)]]
  92#define minfty       (-1.0e+30)  // minus infinity
  93
  94uint32_t Randnext = 1;
  95
  96#define ThRand    (Randnext = Randnext * 1103515245L + 12345L)
  97#define ThRandPos ((Randnext = Randnext * 1103515245L + 12345L) & 0x7fffffff)
  98
  99FILE        *fp;
 100
 101/* utility routines prototyping */
 102void quick_si (int32_t a[], int32_t k);
 103void quick_s3 (int32_t a[], int32_t k, int32_t ia[]);
 104void quick_s2 (float64_t a[], int32_t k, int32_t ia[]);
 105
 106/******************************************************************************/
 107/*** Class for caching strategy implementation                              ***/
 108/******************************************************************************/
 109class sCache
 110{
 111
 112public:
 113  sCache  (sKernel* sk, int32_t Mbyte, int32_t ell);
 114  ~sCache ();
 115
 116  cachetype *FillRow (int32_t row, int32_t IsC = 0);
 117  cachetype *GetRow  (int32_t row);
 118
 119  int32_t DivideMP (int32_t *out, int32_t *in, int32_t n);
 120
 121  /*** Itarations counter ***/
 122  void Iteration() { nit++; }
 123
 124  /*** Cache size control ***/
 125  int32_t CheckCycle()
 126  {
 127    int32_t us;
 128    cache_entry *pt = first_free->next;
 129
 130    for (us = 0; pt != first_free; us++, pt = pt->next);
 131    if (us != maxmw-1)
 132        return 1;
 133    else
 134        return 0;
 135  }
 136
 137private:
 138
 139  struct cache_entry
 140  {
 141    int32_t row;      // unused row
 142    int32_t last_access_it;
 143    cache_entry *prev, *next;
 144    cachetype   *data;
 145  };
 146
 147  sKernel* KER;
 148  int32_t maxmw, ell;
 149  int32_t nit;
 150
 151  cache_entry *mw;
 152  cache_entry *first_free;
 153  cache_entry **pindmw;    // 0 if unused row
 154  cachetype   *onerow;
 155
 156  cachetype   *FindFree(int32_t row, int32_t IsC);
 157};
 158
 159
 160/******************************************************************************/
 161/*** Cache class constructor                                                ***/
 162/******************************************************************************/
 163sCache::sCache(sKernel* sk, int32_t Mbyte, int32_t _ell) : KER(sk), ell(_ell)
 164{
 165  int32_t i;
 166
 167  // size in dwords of one cache row
 168  maxmw = (sizeof(cache_entry) + sizeof(cache_entry *)
 169           + ell*sizeof(cachetype)) / 4;
 170  // number of cache rows
 171  maxmw = Mbyte*1024*(1024/4) / maxmw;
 172
 173  /* arrays allocation */
 174  mw     = SG_MALLOC(cache_entry, maxmw);
 175  pindmw = SG_MALLOC(cache_entry*,  ell);
 176  onerow = SG_MALLOC(cachetype,     ell);
 177
 178  /* arrays initialization */
 179  for (i = 0; i < maxmw; i++)
 180  {
 181      mw[i].prev           = (i == 0 ? &mw[maxmw-1] : &mw[i-1]);
 182      mw[i].next           = (i == maxmw-1 ? &mw[0] : &mw[i+1]);
 183      mw[i].data           = SG_MALLOC(cachetype, ell);
 184      mw[i].row            = -1;    // unused row
 185      mw[i].last_access_it = -1;
 186  }
 187  for (i = 0; i < ell; i++)
 188      pindmw[i] = 0;
 189
 190  first_free = &mw[0];
 191  nit        = 0;
 192}
 193
 194/******************************************************************************/
 195/*** Cache class destructor                                                 ***/
 196/******************************************************************************/
 197sCache::~sCache()
 198{
 199  int32_t i;
 200
 201  for (i = maxmw-1; i >= 0; i--)
 202      SG_FREE(mw[i].data);
 203
 204  SG_FREE(onerow);
 205  SG_FREE(pindmw);
 206  SG_FREE(mw);
 207}
 208
 209
 210/******************************************************************************/
 211/*** Retrieve a cached row                                                  ***/
 212/******************************************************************************/
 213cachetype *sCache::GetRow(int32_t row)
 214{
 215  cache_entry *c;
 216
 217  c = pindmw[row];
 218  if (c == NULL)
 219      return NULL;
 220
 221  c->last_access_it = nit;
 222  if (c == first_free)
 223  {
 224      first_free = first_free->next;
 225  }
 226  else
 227  {
 228      // move "row" as the most recently used.
 229      c->prev->next    = c->next;
 230      c->next->prev    = c->prev;
 231      c->next          = first_free;
 232      c->prev          = first_free->prev;
 233      first_free->prev = c;
 234      c->prev->next    = c;
 235  }
 236
 237  return c->data;
 238}
 239
 240/******************************************************************************
 241 *** Look for a free cache row                                              ***
 242 *** IMPORTANT: call this method only if you are sure that "row"            ***
 243 ***            is not already in the cache ( i.e. after calling GetRow() ) ***
 244 ******************************************************************************/
 245cachetype *sCache::FindFree(int32_t row, int32_t IsC)
 246{
 247  cachetype *pt;
 248
 249  if (first_free->row != -1) // cache row already contains data
 250  {
 251      if (first_free->last_access_it == nit || IsC)
 252          return 0;
 253      else
 254          pindmw[first_free->row] = 0;
 255  }
 256  first_free->row            = row;
 257  first_free->last_access_it = nit;
 258  pindmw[row]                = first_free;
 259
 260  pt         = first_free->data;
 261  first_free = first_free->next;
 262
 263  return pt;
 264}
 265
 266
 267/******************************************************************************/
 268/*** Enter data in a cache row                                              ***/
 269/******************************************************************************/
 270cachetype *sCache::FillRow(int32_t row, int32_t IsC)
 271{
 272  int32_t j;
 273  cachetype *pt;
 274
 275  pt = GetRow(row);
 276  if (pt != NULL)
 277      return pt;
 278
 279  pt = FindFree(row, IsC);
 280  if (pt == 0)
 281      pt = onerow;
 282
 283  // Compute all the row elements
 284  for (j = 0; j < ell; j++)
 285      pt[j] = (cachetype)KER->Get(row, j);
 286  return pt;
 287}
 288
 289
 290/******************************************************************************/
 291/*** Expand a sparse row in a full cache row                                ***/
 292/******************************************************************************/
 293int32_t sCache::DivideMP(int32_t *out, int32_t *in, int32_t n)
 294{
 295   /********************************************************************
 296    * Input meaning:                                                   *
 297    *    in  = vector containing row to be extracted in the cache      *
 298    *    n   = size of in                                              *
 299    *    out = the indexes of "in" of the components to be computed    *
 300    *          by this processor (first those in the cache, then the   *
 301    *          ones not yet computed)                                  *
 302    * Returns: the number of components of this processor              *
 303    ********************************************************************/
 304
 305  int32_t *remained, nremained, k;
 306  int32_t i;
 307
 308  remained = SG_MALLOC(int32_t, n);
 309
 310  nremained = 0;
 311  k = 0;
 312  for (i = 0; i < n; i++)
 313  {
 314      if (pindmw[in[i]] != NULL)
 315          out[k++] = i;
 316      else
 317          remained[nremained++] = i;
 318  }
 319  for (i = 0; i < nremained; i++)
 320      out[k++] = remained[i];
 321
 322  SG_FREE(remained);
 323  return n;
 324}
 325
 326/******************************************************************************/
 327/*** Check solution optimality                                              ***/
 328/******************************************************************************/
 329int32_t QPproblem::optimal()
 330{
 331  /***********************************************************************
 332   * Returns 1 if the computed solution is optimal, otherwise returns 0. *
 333   * To verify the optimality it checks the KKT optimality conditions.   *
 334   ***********************************************************************/
 335  register int32_t i, j, margin_sv_number, z, k, s, kin, z1, znew=0, nnew;
 336
 337  float64_t gx_i, aux, s1, s2;
 338
 339  /* sort -y*grad and store in ing the swaps done */
 340  for (j = 0; j < ell; j++)
 341  {
 342      grad[j] = y[j] - st[j];
 343      ing[j]  = j;
 344  }
 345
 346  quick_s2(grad,ell,ing);
 347
 348  /* compute bee */
 349  margin_sv_number = 0;
 350
 351  for (i = chunk_size - 1; i >= 0; i--)
 352      if (is_free(index_in[i]))
 353      {
 354          margin_sv_number++;
 355          bee = y_in(i) - st[index_in[i]];
 356          break;
 357      }
 358
 359  if (margin_sv_number > 0)
 360  {
 361      aux=-1.0;
 362      for (i = nb-1; i >= 0; i--)
 363      {
 364          gx_i = bee + st[index_out[i]];
 365          if ((is_zero(index_out[i]) && (gx_i*y_out(i) < (1.0-delta))) ||
 366             (is_bound(index_out[i]) && (gx_i*y_out(i) > (1.0+delta))) ||
 367             (is_free(index_out[i])  &&
 368             ((gx_i*y_out(i) < 1.0-delta) || (gx_i*y_out(i) > 1.0+delta))))
 369          {
 370              if (fabs(gx_i*y_out(i) - 1.0) > aux)
 371                  aux = fabs(gx_i*y_out(i) - 1.0);
 372          }
 373      }
 374  }
 375  else
 376  {
 377      for (i = ell - 1; i >= 0; i--)
 378          if (is_free(i))
 379          {
 380              margin_sv_number++;
 381              bee = y[i] - st[i];
 382              break;
 383          }
 384      if (margin_sv_number == 0)
 385      {
 386          s1 = -minfty;
 387          s2 = -minfty;
 388          for (j = 0; j < ell; j++)
 389              if ( (alpha[ing[j]] > DELTAsv) &&  (y[ing[j]] >= 0) )
 390              {
 391                  s1 = grad[j];
 392                  break;
 393              }
 394          for (j = 0; j < ell; j++)
 395              if ( (alpha[ing[j]] < c_const-DELTAsv) && (y[ing[j]] <= 0) )
 396              {
 397                  s2 = grad[j];
 398                  break;
 399              }
 400          if (s1 < s2)
 401              aux = s1;
 402          else
 403              aux = s2;
 404
 405          s1 = minfty;
 406          s2 = minfty;
 407          for (j = ell-1; j >=0; j--)
 408              if ( (alpha[ing[j]] > DELTAsv) && (y[ing[j]] <= 0) )
 409              {
 410                  s1 = grad[j];
 411                  break;
 412              }
 413          for (j = ell-1; j >=0; j--)
 414              if ( (alpha[ing[j]] < c_const-DELTAsv) && (y[ing[j]] >= 0) )
 415              {
 416                  s2 = grad[j];
 417                  break;
 418              }
 419          if (s2 > s1) s1 = s2;
 420
 421          bee = 0.5 * (s1+aux);
 422      }
 423
 424      aux = -1.0;
 425      for (i = ell-1; i >= 0; i--)
 426      {
 427          gx_i = bee + st[i];
 428          if ((is_zero(i) && (gx_i*y[i] < (1.0-delta))) ||
 429             (is_bound(i) && (gx_i*y[i] > (1.0+delta))) ||
 430             (is_free(i)  &&
 431             ((gx_i*y[i] < 1.0-delta) || (gx_i*y[i] > 1.0+delta))))
 432          {
 433              if (fabs(gx_i*y[i] - 1.0) > aux)
 434                  aux = fabs(gx_i*y[i] - 1.0);
 435          }
 436      }
 437  }
 438
 439  if (aux < 0.0)
 440      return 1;
 441  else
 442  {
 443      if (verbosity > 1)
 444          SG_SINFO("  Max KKT violation: %lf\n", aux)
 445      else if (verbosity > 0)
 446          SG_SINFO("  %lf\n", aux)
 447
 448      if (fabs(kktold-aux) < delta*0.01 &&  aux < delta*2.0)
 449      {
 450          if (DELTAvpm > InitialDELTAvpm*0.1)
 451          {
 452              DELTAvpm = (DELTAvpm*0.5 > InitialDELTAvpm*0.1 ?
 453                                            DELTAvpm*0.5 : InitialDELTAvpm*0.1);
 454              SG_SINFO("Inner tolerance changed to: %lf\n", DELTAvpm)
 455          }
 456      }
 457
 458      kktold = aux;
 459
 460 /*****************************************************************************
 461  *** Update the working set (T. Serafini, L. Zanni, "On the Working Set    ***
 462  *** Selection in Gradient Projection-based Decomposition Techniques for   ***
 463  *** Support Vector Machines"; Optim. Meth. Soft. 20, 2005).               ***
 464  *****************************************************************************/
 465      for (j = 0; j < chunk_size; j++)
 466          inold[j] = index_in[j];
 467
 468      z  = -1;  /* index of the last entered component from the top    */
 469      z1 = ell; /* index of the last entered component from the bottom */
 470      k  = 0;
 471      j  = 0;
 472      while (k < q)
 473      {
 474          i = z + 1; /* index of the candidate components from the top */
 475          while (i < z1)
 476          {
 477              if ( is_free(ing[i]) ||
 478                   (-y[ing[i]]>=0 && is_zero(ing[i])) ||
 479                   (-y[ing[i]]<=0 && is_bound(ing[i]))
 480                 )
 481              {
 482                  znew = i; /* index of the component to select from the top */
 483                  break;
 484              }
 485              i++;
 486          }
 487          if (i == z1) break;
 488
 489          s = z1 - 1;
 490          while (znew < s)
 491          {
 492              if ( is_free(ing[s]) ||
 493                   (y[ing[s]]>=0 && is_zero(ing[s])) ||
 494                   (y[ing[s]]<=0 && is_bound(ing[s]))
 495                 )
 496              {
 497                  z1 = s;
 498                  z  = znew;
 499                  break;
 500              }
 501              s--;
 502          }
 503          if (znew == s) break;
 504
 505          index_in[k++] = ing[z];
 506          index_in[k++] = ing[z1];
 507      }
 508
 509      if (k < q)
 510      {
 511          if (verbosity > 1)
 512              SG_SINFO("  New q: %i\n", k)
 513          q = k;
 514      }
 515
 516      quick_si(index_in, q);
 517
 518      s = 0;
 519      j = 0;
 520      for (k = 0; k < chunk_size; k++)
 521      {
 522          z = inold[k];
 523          for (i = j; i < q; i++)
 524              if (z <= index_in[i])
 525                  break;
 526
 527          if (i == q)
 528          {
 529              for (i = k; i < chunk_size; i++)
 530              {
 531                  ing[s] = inold[i]; /* older components not in the new basis */
 532                  s      = s+1;
 533              }
 534              break;
 535          }
 536
 537          if (z == index_in[i])
 538              j      = i + 1; /* the component is still in the basis  */
 539          else
 540          {
 541              ing[s] = z;     /* older component not in the new basis */
 542              s      = s + 1;
 543              j      = i;
 544          }
 545      }
 546
 547      for (i = 0; i < s; i++)
 548      {
 549          bmemrid[i] = bmem[ing[i]];
 550          pbmr[i]    = i;
 551      }
 552
 553      quick_s3(bmemrid, s, pbmr);
 554
 555      /* check if support vectors not at bound enter the basis */
 556      j = q;
 557      i = 0;
 558      while (j < chunk_size && i < s)
 559      {
 560          if (is_free(ing[pbmr[i]]))
 561          {
 562              index_in[j] = ing[pbmr[i]];
 563              j++;
 564          }
 565          i++;
 566      }
 567
 568      /* choose which bound variables keep in basis (alpha = 0 or alpha = C) */
 569      if (j < chunk_size)
 570      {
 571          i = 0;
 572          while (j < chunk_size && i < s)
 573          {
 574              if (is_zero(ing[pbmr[i]]))
 575              {
 576                  index_in[j] = ing[pbmr[i]];
 577                  j++;
 578              }
 579              i++;
 580          }
 581          if (j < chunk_size)
 582          {
 583              i = 0;
 584              while (j < chunk_size && i < s)
 585              {
 586                  if (is_bound(ing[pbmr[i]]))
 587                  {
 588                      index_in[j] = ing[pbmr[i]];
 589                      j++;
 590                  }
 591                  i++;
 592              }
 593          }
 594      }
 595
 596      quick_si(index_in, chunk_size);
 597
 598      for (i = 0; i < chunk_size; i++)
 599          bmem[index_in[i]]++;
 600
 601      j = 0;
 602      k = 0;
 603      for (i = 0; i < chunk_size; i++)
 604      {
 605          for (z = j; z < index_in[i]; z++)
 606          {
 607              index_out[k] = z;
 608              k++;
 609          }
 610          j = index_in[i]+1;
 611      }
 612      for (z = j; z < ell; z++)
 613      {
 614          index_out[k] = z;
 615          k++;
 616      }
 617
 618      for (i = 0; i < nb; i++)
 619          bmem[index_out[i]] = 0;
 620
 621      kin = 0;
 622      j   = 0;
 623      for (k = 0; k < chunk_size; k++)
 624      {
 625          z = index_in[k];
 626          for (i = j; i < chunk_size; i++)
 627              if (z <= inold[i])
 628                  break;
 629          if (i == chunk_size)
 630          {
 631              for (s = k; s < chunk_size; s++)
 632              {
 633                  incom[s] = -1;
 634                  cec[index_in[s]]++;
 635              }
 636              kin = kin + chunk_size - k ;
 637              break;
 638          }
 639
 640          if (z == inold[i])
 641          {
 642              incom[k] = i;
 643              j        = i+1;
 644          }
 645          else
 646          {
 647              incom[k] = -1;
 648              j        = i;
 649              kin      = kin + 1;
 650              cec[index_in[k]]++;
 651          }
 652      }
 653
 654      nnew = kin & (~1);
 655      if (nnew < 10)
 656          nnew = 10;
 657      if (nnew < chunk_size/10)
 658          nnew = chunk_size/10;
 659      if (nnew < q)
 660      {
 661          q = nnew;
 662          q = q & (~1);
 663      }
 664
 665      if (kin == 0)
 666      {
 667          DELTAkin *= 0.1;
 668          if (DELTAkin < 1.0e-6)
 669          {
 670              SG_SINFO("\n***ERROR***: GPDT stops with tolerance")
 671              SG_SINFO(
 672              " %lf because it is unable to change the working set.\n", kktold);
 673              return 1;
 674          }
 675          else
 676          {
 677              SG_SINFO("Inner tolerance temporary changed to:")
 678              SG_SINFO(" %e\n", DELTAvpm*DELTAkin)
 679          }
 680      }
 681      else
 682          DELTAkin = 1.0;
 683
 684      if (verbosity > 1)
 685      {
 686          SG_SINFO("  Working set: new components: %i", kin)
 687          SG_SINFO(",  new parameter n: %i\n", q)
 688      }
 689
 690      return 0;
 691   }
 692}
 693
 694/******************************************************************************/
 695/*** Optional preprocessing: random distribution                            ***/
 696/******************************************************************************/
 697int32_t QPproblem::Preprocess0(int32_t *aux, int32_t *sv)
 698{
 699  int32_t i, j;
 700
 701  Randnext = 1;
 702  memset(sv, 0, ell*sizeof(int32_t));
 703  for (i = 0; i < chunk_size; i++)
 704  {
 705      do
 706      {
 707          j = ThRandPos % ell;
 708      } while (sv[j] != 0);
 709      sv[j] = 1;
 710  }
 711  return(chunk_size);
 712}
 713
 714/******************************************************************************/
 715/*** Optional preprocessing: block parallel distribution                    ***/
 716/******************************************************************************/
 717int32_t QPproblem::Preprocess1(sKernel* kernel, int32_t *aux, int32_t *sv)
 718{
 719  int32_t    s;    // elements owned by the processor
 720  int32_t    sl;   // elements of the n-1 subproblems
 721  int32_t    n, i, off, j, k, ll;
 722  int32_t    nsv, nbsv;
 723  int32_t    *sv_loc, *bsv_loc, *sp_y;
 724  float32_t  *sp_D=NULL;
 725  float64_t *sp_alpha, *sp_h;
 726
 727  s  = ell;
 728  /* divide the s elements into n blocks smaller than preprocess_size */
 729  n  = (s + preprocess_size - 1) / preprocess_size;
 730  sl = 1 + s / n;
 731
 732  if (verbosity > 0)
 733  {
 734      SG_SINFO("  Preprocessing: examples = %d", s)
 735      SG_SINFO(", subp. = %d", n)
 736      SG_SINFO(", size = %d\n",sl)
 737  }
 738
 739  sv_loc   = SG_MALLOC(int32_t, s);
 740  bsv_loc  = SG_MALLOC(int32_t, s);
 741  sp_alpha = SG_MALLOC(float64_t, sl);
 742  sp_h     = SG_MALLOC(float64_t, sl);
 743  sp_y     = SG_MALLOC(int32_t, sl);
 744
 745  if (sl < 500)
 746      sp_D = SG_MALLOC(float32_t, sl*sl);
 747
 748  for (i = 0; i < sl; i++)
 749       sp_h[i] = -1.0;
 750  memset(alpha, 0, ell*sizeof(float64_t));
 751
 752  /* randomly reorder the component to select */
 753  for (i = 0; i < ell; i++)
 754      aux[i] = i;
 755  Randnext = 1;
 756  for (i = 0; i < ell; i++)
 757  {
 758      j  = ThRandPos % ell;
 759      k  = ThRandPos % ell;
 760      ll = aux[j]; aux[j] = aux[k]; aux[k] = ll;
 761  }
 762
 763  nbsv = nsv = 0;
 764  for (i = 0; i < n; i++)
 765  {
 766      if (verbosity > 0)
 767          SG_SINFO("%d...", i)
 768      SplitParts(s, i, n, &ll, &off);
 769
 770      if (sl < 500)
 771      {
 772          for (j = 0; j < ll; j++)
 773          {
 774              sp_y[j] = y[aux[j+off]];
 775              for (k = j; k < ll; k++)
 776                  sp_D[k*sl + j] = sp_D[j*sl + k]
 777                                 = y[aux[j+off]] * y[aux[k+off]]
 778                                   * (float32_t)kernel->Get(aux[j+off], aux[k+off]);
 779          }
 780
 781          memset(sp_alpha, 0, sl*sizeof(float64_t));
 782
 783          /* call the gradient projection QP solver */
 784          gpm_solver(projection_solver, projection_projector, ll, sp_D, sp_h,
 785                     c_const, 0.0, sp_y, sp_alpha, delta*10, NULL);
 786      }
 787      else
 788      {
 789          QPproblem p2;
 790		  QPproblem::copy_subproblem(&p2, this, ll, aux + off);
 791          p2.chunk_size     = (int32_t) ((float64_t)chunk_size / sqrt((float64_t)n));
 792          p2.q              = (int32_t) ((float64_t)q / sqrt((float64_t)n));
 793          p2.maxmw          = ll*ll*4 / (1024 * 1024);
 794          if (p2.maxmw > maxmw / 2)
 795              p2.maxmw = maxmw / 2;
 796          p2.verbosity      = 0;
 797          p2.delta          = delta * 10.0;
 798          p2.PreprocessMode = 0;
 799          kernel->KernelEvaluations += p2.gpdtsolve(sp_alpha);
 800      }
 801
 802      for (j = 0; j < ll; j++)
 803      {
 804          /* modify bound support vector approximation */
 805          if (sp_alpha[j] < (c_const-DELTAsv))
 806              sp_alpha[j] = 0.0;
 807          else
 808              sp_alpha[j] = c_const;
 809          if (sp_alpha[j] > DELTAsv)
 810          {
 811              if (sp_alpha[j] < (c_const-DELTAsv))
 812                  sv_loc[nsv++]   = aux[j+off];
 813              else
 814                  bsv_loc[nbsv++] = aux[j+off];
 815              alpha[aux[j+off]] = sp_alpha[j];
 816          }
 817      }
 818  }
 819
 820  Randnext = 1;
 821
 822  /* add the known support vectors to the working set */
 823  memset(sv, 0, ell*sizeof(int32_t));
 824  ll = (nsv < chunk_size ? nsv : chunk_size);
 825  for (i = 0; i < ll; i++)
 826  {
 827      do {
 828           j = sv_loc[ThRandPos % nsv];
 829      } while (sv[j] != 0);
 830      sv[j] = 1;
 831  }
 832
 833  /* add the known bound support vectors to the working set */
 834  ll = ((nsv+nbsv) < chunk_size ? (nsv+nbsv) : chunk_size);
 835  for (; i < ll; i++)
 836  {
 837      do {
 838           j = bsv_loc[ThRandPos % nbsv];
 839      } while (sv[j] != 0);
 840      sv[j] = 1;
 841  }
 842
 843  /* eventually fill up the working set with other components
 844     randomly chosen                                          */
 845  for (; i < chunk_size; i++)
 846  {
 847      do {
 848           j = ThRandPos % ell;
 849      } while (sv[j] != 0);
 850      sv[j] = 1;
 851  }
 852
 853
 854  /* dealloc temporary arrays */
 855  if (sl < 500) SG_FREE(sp_D);
 856  SG_FREE(sp_y    );
 857  SG_FREE(sp_h    );
 858  SG_FREE(sv_loc  );
 859  SG_FREE(bsv_loc );
 860  SG_FREE(sp_alpha);
 861
 862  if (verbosity > 0)
 863  {
 864      SG_SINFO("\n  Preprocessing: SV = %d", nsv)
 865      SG_SINFO(", BSV = %d\n", nbsv)
 866  }
 867
 868  return(nsv);
 869}
 870
 871/******************************************************************************/
 872/*** Compute the QP problem solution                                        ***/
 873/******************************************************************************/
 874float64_t QPproblem::gpdtsolve(float64_t *solution)
 875{
 876  int32_t i, j, k, z, iin, jin, nit, tot_vpm_iter, lsCount;
 877  int32_t tot_vpm_secant, projCount, proximal_count;
 878  int32_t vpmWarningThreshold;
 879  int32_t  nzin, nzout;
 880  int32_t *sp_y;               /* labels vector                             */
 881  int32_t *indnzin, *indnzout; /* nonzero components indices vectors        */
 882  float32_t     *sp_D;               /* quadratic part of the objective function  */
 883  float64_t    *sp_h, *sp_hloc,     /* linear part of the objective function     */
 884            *sp_alpha,*stloc;    /* variables and gradient updating vectors   */
 885  float64_t    sp_e, aux, fval, tau_proximal_this, dfval;
 886  float64_t    *vau;
 887  float64_t    *weight;
 888  float64_t    tot_prep_time, tot_vpm_time, tot_st_time, total_time;
 889  sCache    *Cache;
 890  cachetype *ptmw;
 891  clock_t   t, ti;
 892
 893  Cache = new sCache(KER, maxmw, ell);
 894    if (chunk_size > ell) chunk_size = ell;
 895
 896  if (chunk_size <= 20)
 897      vpmWarningThreshold = 30*chunk_size;
 898  else if (chunk_size <= 200)
 899      vpmWarningThreshold = 20*chunk_size + 200;
 900  else
 901      vpmWarningThreshold = 10*chunk_size + 2200;
 902
 903  kktold = 10000.0;
 904  if (delta <= 5e-3)
 905  {
 906      if ( (chunk_size <= 20) | ((float64_t)chunk_size/ell <= 0.001) )
 907          DELTAvpm = delta * 0.1;
 908      else if ( (chunk_size <= 200) | ((float64_t)chunk_size/ell <= 0.005) )
 909          DELTAvpm = delta * 0.5;
 910      else
 911          DELTAvpm = delta;
 912  }
 913  else
 914  {
 915      if ( (chunk_size <= 200) | ((float64_t)chunk_size/ell <= 0.005) )
 916          DELTAvpm = (1e-3 < delta*0.1) ? 1e-3 : delta*0.1;
 917      else
 918          DELTAvpm = 5e-3;
 919  }
 920
 921  InitialDELTAvpm = DELTAvpm;
 922  DELTAsv         = EPS_SV * c_const;
 923  DELTAkin        = 1.0;
 924
 925  q               = q & (~1);
 926  nb              = ell - chunk_size;
 927  tot_vpm_iter    = 0;
 928  tot_vpm_secant  = 0;
 929
 930  tot_prep_time = tot_vpm_time = tot_st_time = total_time = 0.0;
 931
 932  ti = clock();
 933
 934  /* arrays allocation */
 935  SG_SDEBUG("ell:%d, chunk_size:%d, nb:%d dim:%d\n", ell, chunk_size,nb, dim)
 936  ing       = SG_MALLOC(int32_t, ell);
 937  inaux     = SG_MALLOC(int32_t, ell);
 938  index_in  = SG_MALLOC(int32_t, chunk_size);
 939  index_out = SG_MALLOC(int32_t, ell);
 940  indnzout  = SG_MALLOC(int32_t, nb);
 941  alpha     = SG_MALLOC(float64_t, ell);
 942
 943  memset(alpha, 0, ell*sizeof(float64_t));
 944  memset(ing,   0, ell*sizeof(int32_t));
 945
 946  if (verbosity > 0 && PreprocessMode != 0)
 947      SG_SINFO("\n*********** Begin setup step...\n")
 948  t = clock();
 949
 950  switch(PreprocessMode)
 951  {
 952    case 1: Preprocess1(KER, inaux, ing); break;
 953    case 0:
 954    default:
 955            Preprocess0(inaux, ing); break;
 956  }
 957
 958  for (j = k = i = 0; i < ell; i++)
 959      if (ing[i] == 0)
 960          index_out[j++] = i;
 961      else
 962          index_in[k++]  = i;
 963
 964  t = clock() - t;
 965  if (verbosity > 0 && PreprocessMode != 0)
 966  {
 967      SG_SINFO("  Time for setup: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC)
 968      SG_SINFO(
 969              "\n\n*********** Begin decomposition technique...\n");
 970  }
 971
 972  /* arrays allocation */
 973  bmem     = SG_MALLOC(int32_t, ell);
 974  bmemrid  = SG_MALLOC(int32_t, chunk_size);
 975  pbmr     = SG_MALLOC(int32_t, chunk_size);
 976  cec      = SG_MALLOC(int32_t, ell);
 977  indnzin  = SG_MALLOC(int32_t, chunk_size);
 978  inold    = SG_MALLOC(int32_t, chunk_size);
 979  incom    = SG_MALLOC(int32_t, chunk_size);
 980  vau      = SG_MALLOC(float64_t, ell);
 981  grad     = SG_MALLOC(float64_t, ell);
 982  weight   = SG_MALLOC(float64_t, dim);
 983  st       = SG_MALLOC(float64_t, ell);
 984  stloc    = SG_MALLOC(float64_t, ell);
 985
 986  for (i = 0; i < ell; i++)
 987  {
 988      bmem[i] = 0;
 989      cec[i]  = 0;
 990      st[i]   = 0;
 991  }
 992
 993  sp_y     = SG_MALLOC(int32_t, chunk_size);
 994  sp_D     = SG_MALLOC(float32_t, chunk_size*chunk_size);
 995  sp_alpha = SG_MALLOC(float64_t, chunk_size);
 996  sp_h     = SG_MALLOC(float64_t, chunk_size);
 997  sp_hloc  = SG_MALLOC(float64_t, chunk_size);
 998
 999  for (i = 0; i < chunk_size; i++)
1000      cec[index_in[i]] = cec[index_in[i]]+1;
1001
1002  for (i = chunk_size-1; i >= 0; i--)
1003  {
1004      incom[i]          = -1;
1005      sp_alpha[i]       = 0.0;
1006      bmem[index_in[i]] = 1;
1007  }
1008
1009  if (verbosity == 1)
1010  {
1011      SG_SINFO("  IT  | Prep Time | Solver IT | Solver Time |")
1012      SG_SINFO(" Grad Time | KKT violation\n")
1013      SG_SINFO("------+-----------+-----------+-------------+")
1014      SG_SINFO("-----------+--------------\n")
1015  }
1016
1017  /***************************************************************************/
1018  /* Begin the problem resolution loop                                       */
1019  nit = 0;
1020  do
1021  {
1022      t = clock();
1023      if ((nit % 10) == 9)
1024      {
1025          if ((t-ti) > 0)
1026              total_time += (float64_t)(t-ti) / CLOCKS_PER_SEC;
1027          else
1028              total_time += (float64_t)(ti-t) / CLOCKS_PER_SEC;
1029          ti = t;
1030      }
1031
1032      if (verbosity > 1)
1033          SG_SINFO("\n*********** ITERATION: %d\n", nit + 1)
1034      else if (verbosity > 0)
1035          SG_SINFO("%5d |", nit + 1)
1036      else
1037          SG_SINFO(".")
1038      fflush(stdout);
1039
1040      nzout = 0;
1041      for (k = 0; k < nb; k++)
1042          if (alpha_out(k)>DELTAsv)
1043          {
1044              indnzout[nzout] = index_out[k];
1045              nzout++;
1046          }
1047
1048      sp_e = 0.;
1049      for (j = 0; j < nzout; j++)
1050      {
1051          vau[j] = y[indnzout[j]]*alpha[indnzout[j]];
1052          sp_e  += vau[j];
1053      }
1054
1055      if (verbosity > 1)
1056          SG_SINFO("  spe: %e ", sp_e)
1057
1058      for (i = 0; i < chunk_size; i++)
1059          sp_y[i] = y_in(i);
1060
1061      /* Construct the objective function Hessian */
1062      for (i = 0; i < chunk_size; i++)
1063      {
1064          iin  = index_in[i];
1065          ptmw = Cache->GetRow(iin);
1066          if (ptmw != 0)
1067          {
1068              for (j = 0; j <= i; j++)
1069                  sp_D[i*chunk_size + j] = sp_y[i]*sp_y[j] * ptmw[index_in[j]];
1070          }
1071          else if (incom[i] == -1)
1072              for (j = 0; j <= i; j++)
1073                  sp_D[i*chunk_size + j] = sp_y[i]*sp_y[j]
1074                                           * (float32_t)KER->Get(iin, index_in[j]);
1075          else
1076          {
1077              for (j = 0; j < i; j++)
1078                  if (incom[j] == -1)
1079                      sp_D[i*chunk_size + j]
1080                         = sp_y[i]*sp_y[j] * (float32_t)KER->Get(iin, index_in[j]);
1081                  else
1082                      sp_D[i*chunk_size + j]
1083                         = sp_D[incom[j]*chunk_size + incom[i]];
1084              sp_D[i*chunk_size + i]
1085                  = sp_y[i]*sp_y[i] * (float32_t)KER->Get(iin, index_in[i]);
1086          }
1087      }
1088      for (i = 0; i < chunk_size; i++)
1089      {
1090          for (j = 0; j < i; j++)
1091              sp_D[j*chunk_size + i] = sp_D[i*chunk_size + j];
1092      }
1093
1094      if (nit == 0 && PreprocessMode > 0)
1095      {
1096         for (i = 0; i < chunk_size; i++)
1097         {
1098             iin  = index_in[i];
1099             aux  = 0.;
1100             ptmw = Cache->GetRow(iin);
1101             if (ptmw == NULL)
1102                 for (j = 0; j < nzout; j++)
1103                     aux += vau[j] * KER->Get(iin, indnzout[j]);
1104             else
1105                 for (j = 0; j < nzout; j++)
1106                     aux += vau[j] * ptmw[indnzout[j]];
1107             sp_h[i] = y_in(i) * aux - 1.0;
1108         }
1109      }
1110      else
1111      {
1112         for (i = 0; i < chunk_size; i++)
1113             vau[i] = alpha_in(i);
1114         for (i = 0; i < chunk_size; i++)
1115         {
1116             aux = 0.0;
1117             for (j = 0; j < chunk_size; j++)
1118                 aux += sp_D[i*chunk_size + j] * vau[j];
1119             sp_h[i] = st[index_in[i]] * y_in(i) - 1.0 - aux;
1120         }
1121      }
1122
1123    t = clock() - t;
1124    if (verbosity > 1)
1125        SG_SINFO("  Preparation Time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC)
1126    else if (verbosity > 0)
1127        SG_SINFO("  %8.2lf |", (float64_t)t/CLOCKS_PER_SEC)
1128    tot_prep_time += (float64_t)t/CLOCKS_PER_SEC;
1129
1130    /*** Proximal point modification: first type ***/
1131
1132    if (tau_proximal < 0.0)
1133      tau_proximal_this = 0.0;
1134    else
1135      tau_proximal_this = tau_proximal;
1136    proximal_count = 0;
1137    do {
1138      t = clock();
1139      for (i = 0; i < chunk_size; i++)
1140      {
1141          vau[i]                  = sp_D[i*chunk_size + i];
1142          sp_h[i]                -= tau_proximal_this * alpha_in(i);
1143          sp_D[i*chunk_size + i] += (float32_t)tau_proximal_this;
1144      }
1145
1146      if (kktold < delta*100)
1147          for (i = 0; i < chunk_size; i++)
1148              sp_alpha[i] = alpha_in(i);
1149      else
1150          for (i = 0; i < chunk_size; i++)
1151              sp_alpha[i] = 0.0;
1152
1153      /*** call the chosen inner gradient projection QP solver ***/
1154      i = gpm_solver(projection_solver, projection_projector, chunk_size,
1155                    sp_D, sp_h, c_const, sp_e, sp_y, sp_alpha,
1156                    DELTAvpm*DELTAkin, &lsCount, &projCount);
1157
1158      if (i > vpmWarningThreshold)
1159      {
1160        if (ker_type == 2)
1161        {
1162            SG_SINFO("\n WARNING: inner subproblem hard to solve;")
1163            SG_SINFO(" setting a smaller -q or")
1164            SG_SINFO(" tuning -c and -g options might help.\n")
1165        }
1166        else
1167        {
1168            SG_SINFO("\n WARNING: inner subproblem hard to solve;")
1169            SG_SINFO(" set a smaller -q or")
1170            SG_SINFO(" try a better data scaling.\n")
1171        }
1172      }
1173
1174      t = clock() - t;
1175      tot_vpm_iter   += i;
1176      tot_vpm_secant += projCount;
1177      tot_vpm_time   += (float64_t)t/CLOCKS_PER_SEC;
1178      if (verbosity > 1)
1179      {
1180          SG_SINFO("  Solver it: %d", i)
1181          SG_SINFO(", ls: %d", lsCount)
1182          SG_SINFO(", time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC)
1183      }
1184      else if (verbosity > 0)
1185      {
1186          SG_SINFO("    %6d", i)
1187          SG_SINFO(" |    %8.2lf |", (float64_t)t/CLOCKS_PER_SEC)
1188      }
1189
1190      /*** Proximal point modification: second type ***/
1191
1192      for (i = 0; i < chunk_size; i++)
1193          sp_D[i*chunk_size + i] = (float32_t)vau[i];
1194      tau_proximal_this = 0.0;
1195      if (tau_proximal < 0.0)
1196      {
1197        dfval = 0.0;
1198        for (i = 0; i < chunk_size; i++)
1199        {
1200          aux = 0.0;
1201          for (j = 0; j < chunk_size; j++)
1202            aux += sp_D[i*chunk_size + j]*(alpha_in(j) - sp_alpha[j]);
1203          dfval += (0.5*aux - st[index_in[i]]*y_in(i) + 1.0) * (alpha_in(i) - sp_alpha[i]);
1204        }
1205
1206        aux=0.0;
1207        for (i = 0; i < chunk_size; i++)
1208            aux +=  (alpha_in(i) - sp_alpha[i])*(alpha_in(i) - sp_alpha[i]);
1209
1210        if ((-dfval/aux) < -0.5*tau_proximal)
1211        {
1212          tau_proximal_this = -tau_proximal;
1213          if (verbosity > 0)
1214            SG_SDEBUG("tau threshold: %lf  ", -dfval/aux)
1215        }
1216      }
1217      proximal_count++;
1218    } while (tau_proximal_this != 0.0 && proximal_count < 2); // Proximal point loop
1219
1220    t = clock();
1221
1222    nzin = 0;
1223    for (j = 0; j < chunk_size; j++)
1224    {
1225        if (nit == 0)
1226            aux = sp_alpha[j];
1227        else
1228            aux = sp_alpha[j] - alpha_in(j);
1229        if (fabs(aux) > DELTAsv)
1230        {
1231            indnzin[nzin] = index_in[j];
1232            grad[nzin]    = aux * y_in(j);
1233            nzin++;
1234        }
1235    }
1236
1237	// in case of LINADD enabled use faster linadd variant
1238	if (KER->get_kernel()->has_property(KP_LINADD) && get_linadd_enabled())
1239	{
1240		KER->get_kernel()->clear_normal() ;
1241
1242		for (j = 0; j < nzin; j++)
1243			KER->get_kernel()->add_to_normal(indnzin[j], grad[j]);
1244
1245        if (nit == 0 && PreprocessMode > 0)
1246		{
1247			for (j = 0; j < nzout; j++)
1248			{
1249				jin = indnzout[j];
1250				KER->get_kernel()->add_to_normal(jin, alpha[jin] * y[jin]);
1251			}
1252		}
1253
1254        for (i = 0; i < ell; i++)
1255            st[i] += KER->get_kernel()->compute_optimized(i);
1256	}
1257	else  // nonlinear kernel
1258    {
1259        k = Cache->DivideMP(ing, indnzin, nzin);
1260        for (j = 0; j < k; j++)
1261        {
1262            ptmw = Cache->FillRow(indnzin[ing[j]]);
1263            for (i = 0; i < ell; i++)
1264                st[i] += grad[ing[j]] * ptmw[i];
1265        }
1266
1267        if (PreprocessMode > 0 && nit == 0)
1268        {
1269            clock_t ti2;
1270
1271            ti2 = clock();
1272            for (j = 0; j < nzout; j++)
1273            {
1274                jin  = indnzout[j];
1275                ptmw = Cache->FillRow(jin);
1276                for (i = 0; i < ell; i++)
1277                    st[i] += alpha[jin] * y[jin] * ptmw[i];
1278            }
1279            if (verbosity > 1)
1280                SG_SINFO(
1281                 "  G*x0 time: %.2lf\n", (float64_t)(clock()-ti2)/CLOCKS_PER_SEC);
1282        }
1283    }
1284
1285    /*** sort the vectors for cache managing ***/
1286
1287    t = clock() - t;
1288    if (verbosity > 1)
1289        SG_SINFO("  Gradient updating time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC)
1290    else if (verbosity > 0)
1291        SG_SINFO("  %8.2lf |", (float64_t)t/CLOCKS_PER_SEC)
1292    tot_st_time += (float64_t)t/CLOCKS_PER_SEC;
1293
1294    /*** global updating of the solution vector ***/
1295    for (i = 0; i < chunk_size; i++)
1296        alpha_in(i) = sp_alpha[i];
1297
1298    if (verbosity > 1)
1299    {
1300        j = k = 0;
1301        for (i = 0; i < ell; i++)
1302        {
1303            if (is_free(i))  j++;
1304            if (is_bound(i)) k++;
1305        }
1306        SG_SINFO("  SV: %d", j+k)
1307        SG_SINFO(",  BSV: %d\n", k)
1308    }
1309    Cache->Iteration();
1310    nit = nit+1;
1311  } while (!optimal() && !(CSignal::cancel_computations()));
1312  /* End of the problem resolution loop                                      */
1313  /***************************************************************************/
1314
1315  t = clock();
1316  if ((t-ti) > 0)
1317      total_time += (float64_t)(t-ti) / CLOCKS_PER_SEC;
1318  else
1319      total_time += (float64_t)(ti-t) / CLOCKS_PER_SEC;
1320  ti = t;
1321
1322  memcpy(solution, alpha, ell * sizeof(float64_t));
1323
1324  /* objective function evaluation */
1325  fval = 0.0;
1326  for (i = 0; i < ell; i++)
1327      fval += alpha[i]*(y[i]*st[i]*0.5 - 1.0);
1328
1329  SG_SINFO("\n------+-----------+-----------+-------------+")
1330  SG_SINFO("-----------+--------------\n")
1331  SG_SINFO(
1332      "\n- TOTAL ITERATIONS: %i\n", nit);
1333
1334  if (verbosity > 1)
1335  {
1336      j = 0;
1337      k = 0;
1338      z = 0;
1339      for (i = ell - 1; i >= 0; i--)
1340      {
1341           if (cec[i] > 0) j++;
1342           if (cec[i] > 1) k++;
1343           if (cec[i] > 2) z++;
1344      }
1345      SG_SINFO(
1346        "- Variables entering the working set at least one time:  %i\n", j);
1347      SG_SINFO(
1348        "- Variables entering the working set at least two times:  %i\n", k);
1349      SG_SINFO(
1350        "- Variables entering the working set at least three times:  %i\n", z);
1351  }
1352
1353
1354  SG_FREE(bmem);
1355  SG_FREE(bmemrid);
1356  SG_FREE(pbmr);
1357  SG_FREE(cec);
1358  SG_FREE(ing);
1359  SG_FREE(inaux);
1360  SG_FREE(indnzin);
1361  SG_FREE(index_in);
1362  SG_FREE(inold);
1363  SG_FREE(incom);
1364  SG_FREE(indnzout);
1365  SG_FREE(index_out);
1366  SG_FREE(vau);
1367  SG_FREE(alpha);
1368  SG_FREE(weight);
1369  SG_FREE(grad);
1370  SG_FREE(stloc);
1371  SG_FREE(st);
1372  SG_FREE(sp_h);
1373  SG_FREE(sp_hloc);
1374  SG_FREE(sp_y);
1375  SG_FREE(sp_D);
1376  SG_FREE(sp_alpha);
1377  delete Cache;
1378
1379  aux = KER->KernelEvaluations;
1380  SG_SINFO("- Total CPU time: %lf\n", total_time)
1381  if (verbosity > 0)
1382  {
1383      SG_SINFO(
1384              "- Total kernel evaluations: %.0lf\n", aux);
1385      SG_SINFO(
1386              "- Total inner solver iterations: %i\n", tot_vpm_iter);
1387      if (projection_projector == 1)
1388          SG_SINFO(
1389              "- Total projector iterations: %i\n", tot_vpm_secant);
1390      SG_SINFO(
1391              "- Total preparation time: %lf\n", tot_prep_time);
1392      SG_SINFO(
1393              "- Total inner solver time: %lf\n", tot_vpm_time);
1394      SG_SINFO(
1395              "- Total gradient updating time: %lf\n", tot_st_time);
1396  }
1397  SG_SINFO("- Objective function value: %lf\n", fval)
1398  objective_value=fval;
1399  return aux;
1400}
1401
1402/******************************************************************************/
1403/*** Quick sort for integer vectors                                         ***/
1404/******************************************************************************/
1405void quick_si(int32_t a[], int32_t n)
1406{
1407  int32_t i, j, s, d, l, x, w, ps[20], pd[20];
1408
1409  l     = 0;
1410  ps[0] = 0;
1411  pd[0] = n-1;
1412  do
1413  {
1414      s = ps[l];
1415      d = pd[l];
1416      l--;
1417      do
1418      {
1419          i = s;
1420          j = d;
1421          x = a[(s+d)/2];
1422          do
1423          {
1424              while (a[i] < x) i++;
1425              while (a[j] > x) j--;
1426              if (i <= j)
1427              {
1428                  w    = a[i];
1429                  a[i] = a[j];
1430                  i++;
1431                  a[j] = w;
1432                  j--;
1433              }
1434          } while(i<=j);
1435          if (j-s > d-i)
1436          {
1437              l++;
1438              ps[l] = s;
1439              pd[l] = j;
1440              s     = i;
1441          }
1442          else
1443          {
1444              if (i < d)
1445              {
1446                  l++;
1447                  ps[l] = i;
1448                  pd[l] = d;
1449              }
1450          d = j;
1451          }
1452      } while (s < d);
1453  } while (l >= 0);
1454}
1455
1456/******************************************************************************/
1457/*** Quick sort for real vectors returning also the exchanges               ***/
1458/******************************************************************************/
1459void quick_s2(float64_t a[], int32_t n, int32_t ia[])
1460{
1461  int32_t     i, j, s, d, l, iw, ps[20], pd[20];
1462  float64_t  x, w;
1463
1464  l     = 0;
1465  ps[0] = 0;
1466  pd[0] = n-1;
1467  do
1468  {
1469      s = ps[l];
1470      d = pd[l];
1471      l--;
1472      do
1473      {
1474          i = s;
1475          j = d;
1476          x = a[(s+d)/2];
1477          do
1478          {
1479              while (a[i] < x) i++;
1480              while (a[j] > x) j--;
1481              if (i <= j)
1482              {
1483                  iw    = ia[i];
1484                  w     = a[i];
1485                  ia[i] = ia[j];
1486                  a[i]  = a[j];
1487                  i++;
1488                  ia[j] = iw;
1489                  a[j]  = w;
1490                  j--;
1491              }
1492          } while (i <= j);
1493          if (j-s > d-i)
1494          {
1495              l++;
1496              ps[l] = s;
1497              pd[l] = j;
1498              s     = i;
1499          }
1500          else
1501          {
1502              if (i < d)
1503              {
1504                  l++;
1505                  ps[l] = i;
1506                  pd[l] = d;
1507              }
1508              d = j;
1509          }
1510      } while (s < d);
1511  } while(l>=0);
1512}
1513
1514/******************************************************************************/
1515/*** Quick sort for integer vectors returning also the exchanges            ***/
1516/******************************************************************************/
1517void quick_s3(int32_t a[], int32_t n, int32_t ia[])
1518{
1519  int32_t i, j, s, d, l, iw, w, x, ps[20], pd[20];
1520
1521  l     = 0;
1522  ps[0] = 0;
1523  pd[0] = n-1;
1524  do
1525  {
1526      s = ps[l];
1527      d = pd[l];
1528      l--;
1529      do
1530      {
1531          i = s;
1532          j = d;
1533          x = a[(s+d)/2];
1534          do
1535          {
1536              while (a[i] < x) i++;
1537              while (a[j] > x) j--;
1538              if (i <= j)
1539              {
1540                 iw    = ia[i];
1541                 w     = a[i];
1542                 ia[i] = ia[j];
1543                 a[i]  = a[j];
1544                 i++;
1545                 ia[j] = iw;
1546                 a[j]  = w;
1547                 j--;
1548              }
1549          } while (i <= j);
1550          if (j-s > d-i)
1551          {
1552              l++;
1553              ps[l] = s;
1554              pd[l] = j;
1555              s     = i;
1556          }
1557          else
1558          {
1559              if (i < d)
1560              {
1561                  l++;
1562                  ps[l] = i;
1563                  pd[l] = d;
1564              }
1565              d = j;
1566          }
1567      } while (s < d);
1568  } while (l >= 0);
1569}
1570}
1571
1572#endif // DOXYGEN_SHOULD_SKIP_THIS
1573
1574/******************************************************************************/
1575/*** End of gpdtsolve.cpp file                                              ***/
1576/******************************************************************************/