PageRenderTime 52ms CodeModel.GetById 12ms RepoModel.GetById 1ms app.codeStats 0ms

/trunk/octave-forge/extra/NaN/src/linear.cpp

#
C++ | 2204 lines | 1848 code | 242 blank | 114 comment | 369 complexity | 700b83f3e7dac9b7de795d8b21915a7c MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, LGPL-2.1, GPL-3.0, LGPL-3.0
  1. /*
  2. $Id$
  3. Copyright (c) 2007-2009 The LIBLINEAR Project.
  4. Copyright (c) 2010 Alois Schloegl <alois.schloegl@gmail.com>
  5. This function is part of the NaN-toolbox
  6. http://pub.ist.ac.at/~schloegl/matlab/NaN/
  7. This code was extracted from liblinear-1.51 in Jan 2010 and
  8. modified for the use with Octave
  9. This program is free software; you can redistribute it and/or modify
  10. it under the terms of the GNU General Public License as published by
  11. the Free Software Foundation; either version 3 of the License, or
  12. (at your option) any later version.
  13. This program is distributed in the hope that it will be useful,
  14. but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. GNU General Public License for more details.
  17. You should have received a copy of the GNU General Public License
  18. along with this program; if not, see <http://www.gnu.org/licenses/>.
  19. */
  20. #include <math.h>
  21. #include <stdio.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include <stdarg.h>
  25. #include "linear.h"
  26. #include "tron.h"
  27. typedef signed char schar;
  28. template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
  29. #ifndef min
  30. template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
  31. #endif
  32. #ifndef max
  33. template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
  34. #endif
  35. template <class S, class T> static inline void clone(T*& dst, S* src, int n)
  36. {
  37. dst = new T[n];
  38. memcpy((void *)dst,(void *)src,sizeof(T)*n);
  39. }
  40. #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
  41. #define INF HUGE_VAL
  42. static void print_string_stdout(const char *s)
  43. {
  44. fputs(s,stdout);
  45. fflush(stdout);
  46. }
  47. void (*liblinear_print_string) (const char *) = &print_string_stdout;
  48. #if 1
  49. static void info(const char *fmt,...)
  50. {
  51. char buf[BUFSIZ];
  52. va_list ap;
  53. va_start(ap,fmt);
  54. vsprintf(buf,fmt,ap);
  55. va_end(ap);
  56. (*liblinear_print_string)(buf);
  57. }
  58. #else
  59. static void info(const char *fmt,...) {}
  60. #endif
  61. class l2r_lr_fun : public function
  62. {
  63. public:
  64. l2r_lr_fun(const problem *prob, double Cp, double Cn);
  65. ~l2r_lr_fun();
  66. double fun(double *w);
  67. void grad(double *w, double *g);
  68. void Hv(double *s, double *Hs);
  69. int get_nr_variable(void);
  70. private:
  71. void Xv(double *v, double *Xv);
  72. void XTv(double *v, double *XTv);
  73. double *C;
  74. double *z;
  75. double *D;
  76. const problem *prob;
  77. };
  78. l2r_lr_fun::l2r_lr_fun(const problem *prob, double Cp, double Cn)
  79. {
  80. int i;
  81. int l=prob->l;
  82. int *y=prob->y;
  83. this->prob = prob;
  84. z = new double[l];
  85. D = new double[l];
  86. C = new double[l];
  87. for (i=0; i<l; i++)
  88. {
  89. if (y[i] == 1)
  90. C[i] = prob->W[i] * Cp;
  91. else
  92. C[i] = prob->W[i] * Cn;
  93. }
  94. }
  95. l2r_lr_fun::~l2r_lr_fun()
  96. {
  97. delete[] z;
  98. delete[] D;
  99. delete[] C;
  100. }
  101. double l2r_lr_fun::fun(double *w)
  102. {
  103. int i;
  104. double f=0;
  105. int *y=prob->y;
  106. int l=prob->l;
  107. int w_size=get_nr_variable();
  108. Xv(w, z);
  109. for(i=0;i<l;i++)
  110. {
  111. double yz = y[i]*z[i];
  112. if (yz >= 0)
  113. f += C[i]*log(1 + exp(-yz));
  114. else
  115. f += C[i]*(-yz+log(1 + exp(yz)));
  116. }
  117. f = 2*f;
  118. for(i=0;i<w_size;i++)
  119. f += w[i]*w[i];
  120. f /= 2.0;
  121. return(f);
  122. }
  123. void l2r_lr_fun::grad(double *w, double *g)
  124. {
  125. int i;
  126. int *y=prob->y;
  127. int l=prob->l;
  128. int w_size=get_nr_variable();
  129. for(i=0;i<l;i++)
  130. {
  131. z[i] = 1/(1 + exp(-y[i]*z[i]));
  132. D[i] = z[i]*(1-z[i]);
  133. z[i] = C[i]*(z[i]-1)*y[i];
  134. }
  135. XTv(z, g);
  136. for(i=0;i<w_size;i++)
  137. g[i] = w[i] + g[i];
  138. }
  139. int l2r_lr_fun::get_nr_variable(void)
  140. {
  141. return prob->n;
  142. }
  143. void l2r_lr_fun::Hv(double *s, double *Hs)
  144. {
  145. int i;
  146. int l=prob->l;
  147. int w_size=get_nr_variable();
  148. double *wa = new double[l];
  149. Xv(s, wa);
  150. for(i=0;i<l;i++)
  151. wa[i] = C[i]*D[i]*wa[i];
  152. XTv(wa, Hs);
  153. for(i=0;i<w_size;i++)
  154. Hs[i] = s[i] + Hs[i];
  155. delete[] wa;
  156. }
  157. void l2r_lr_fun::Xv(double *v, double *Xv)
  158. {
  159. int i;
  160. int l=prob->l;
  161. feature_node **x=prob->x;
  162. for(i=0;i<l;i++)
  163. {
  164. feature_node *s=x[i];
  165. Xv[i]=0;
  166. while(s->index!=-1)
  167. {
  168. Xv[i]+=v[s->index-1]*s->value;
  169. s++;
  170. }
  171. }
  172. }
  173. void l2r_lr_fun::XTv(double *v, double *XTv)
  174. {
  175. int i;
  176. int l=prob->l;
  177. int w_size=get_nr_variable();
  178. feature_node **x=prob->x;
  179. for(i=0;i<w_size;i++)
  180. XTv[i]=0;
  181. for(i=0;i<l;i++)
  182. {
  183. feature_node *s=x[i];
  184. while(s->index!=-1)
  185. {
  186. XTv[s->index-1]+=v[i]*s->value;
  187. s++;
  188. }
  189. }
  190. }
  191. class l2r_l2_svc_fun : public function
  192. {
  193. public:
  194. l2r_l2_svc_fun(const problem *prob, double Cp, double Cn);
  195. ~l2r_l2_svc_fun();
  196. double fun(double *w);
  197. void grad(double *w, double *g);
  198. void Hv(double *s, double *Hs);
  199. int get_nr_variable(void);
  200. private:
  201. void Xv(double *v, double *Xv);
  202. void subXv(double *v, double *Xv);
  203. void subXTv(double *v, double *XTv);
  204. double *C;
  205. double *z;
  206. double *D;
  207. int *I;
  208. int sizeI;
  209. const problem *prob;
  210. };
  211. l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double Cp, double Cn)
  212. {
  213. int i;
  214. int l=prob->l;
  215. int *y=prob->y;
  216. this->prob = prob;
  217. z = new double[l];
  218. D = new double[l];
  219. C = new double[l];
  220. I = new int[l];
  221. for (i=0; i<l; i++)
  222. {
  223. if (y[i] == 1)
  224. C[i] = prob->W[i] * Cp;
  225. else
  226. C[i] = prob->W[i] * Cn;
  227. }
  228. }
  229. l2r_l2_svc_fun::~l2r_l2_svc_fun()
  230. {
  231. delete[] z;
  232. delete[] D;
  233. delete[] C;
  234. delete[] I;
  235. }
  236. double l2r_l2_svc_fun::fun(double *w)
  237. {
  238. int i;
  239. double f=0;
  240. int *y=prob->y;
  241. int l=prob->l;
  242. int w_size=get_nr_variable();
  243. Xv(w, z);
  244. for(i=0;i<l;i++)
  245. {
  246. z[i] = y[i]*z[i];
  247. double d = 1-z[i];
  248. if (d > 0)
  249. f += C[i]*d*d;
  250. }
  251. f = 2*f;
  252. for(i=0;i<w_size;i++)
  253. f += w[i]*w[i];
  254. f /= 2.0;
  255. return(f);
  256. }
  257. void l2r_l2_svc_fun::grad(double *w, double *g)
  258. {
  259. int i;
  260. int *y=prob->y;
  261. int l=prob->l;
  262. int w_size=get_nr_variable();
  263. sizeI = 0;
  264. for (i=0;i<l;i++)
  265. if (z[i] < 1)
  266. {
  267. z[sizeI] = C[i]*y[i]*(z[i]-1);
  268. I[sizeI] = i;
  269. sizeI++;
  270. }
  271. subXTv(z, g);
  272. for(i=0;i<w_size;i++)
  273. g[i] = w[i] + 2*g[i];
  274. }
  275. int l2r_l2_svc_fun::get_nr_variable(void)
  276. {
  277. return prob->n;
  278. }
  279. void l2r_l2_svc_fun::Hv(double *s, double *Hs)
  280. {
  281. int i;
  282. int l=prob->l;
  283. int w_size=get_nr_variable();
  284. double *wa = new double[l];
  285. subXv(s, wa);
  286. for(i=0;i<sizeI;i++)
  287. wa[i] = C[I[i]]*wa[i];
  288. subXTv(wa, Hs);
  289. for(i=0;i<w_size;i++)
  290. Hs[i] = s[i] + 2*Hs[i];
  291. delete[] wa;
  292. }
  293. void l2r_l2_svc_fun::Xv(double *v, double *Xv)
  294. {
  295. int i;
  296. int l=prob->l;
  297. feature_node **x=prob->x;
  298. for(i=0;i<l;i++)
  299. {
  300. feature_node *s=x[i];
  301. Xv[i]=0;
  302. while(s->index!=-1)
  303. {
  304. Xv[i]+=v[s->index-1]*s->value;
  305. s++;
  306. }
  307. }
  308. }
  309. void l2r_l2_svc_fun::subXv(double *v, double *Xv)
  310. {
  311. int i;
  312. feature_node **x=prob->x;
  313. for(i=0;i<sizeI;i++)
  314. {
  315. feature_node *s=x[I[i]];
  316. Xv[i]=0;
  317. while(s->index!=-1)
  318. {
  319. Xv[i]+=v[s->index-1]*s->value;
  320. s++;
  321. }
  322. }
  323. }
  324. void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
  325. {
  326. int i;
  327. int w_size=get_nr_variable();
  328. feature_node **x=prob->x;
  329. for(i=0;i<w_size;i++)
  330. XTv[i]=0;
  331. for(i=0;i<sizeI;i++)
  332. {
  333. feature_node *s=x[I[i]];
  334. while(s->index!=-1)
  335. {
  336. XTv[s->index-1]+=v[i]*s->value;
  337. s++;
  338. }
  339. }
  340. }
  341. // A coordinate descent algorithm for
  342. // multi-class support vector machines by Crammer and Singer
  343. //
  344. // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
  345. // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
  346. //
  347. // where e^m_i = 0 if y_i = m,
  348. // e^m_i = 1 if y_i != m,
  349. // C^m_i = C if m = y_i,
  350. // C^m_i = 0 if m != y_i,
  351. // and w_m(\alpha) = \sum_i \alpha^m_i x_i
  352. //
  353. // Given:
  354. // x, y, C
  355. // eps is the stopping tolerance
  356. //
  357. // solution will be put in w
  358. #define GETI(i) (i)
  359. // To support weights for instances, use GETI(i) (i)
  360. class Solver_MCSVM_CS
  361. {
  362. public:
  363. Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
  364. ~Solver_MCSVM_CS();
  365. void Solve(double *w);
  366. private:
  367. void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
  368. bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
  369. double *B, *C, *G;
  370. int w_size, l;
  371. int nr_class;
  372. int max_iter;
  373. double eps;
  374. const problem *prob;
  375. };
  376. Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
  377. {
  378. this->w_size = prob->n;
  379. this->l = prob->l;
  380. this->nr_class = nr_class;
  381. this->eps = eps;
  382. this->max_iter = max_iter;
  383. this->prob = prob;
  384. this->B = new double[nr_class];
  385. this->G = new double[nr_class];
  386. this->C = new double[prob->l];
  387. for(int i = 0; i < prob->l; i++)
  388. this->C[i] = prob->W[i] * weighted_C[prob->y[i]];
  389. }
  390. Solver_MCSVM_CS::~Solver_MCSVM_CS()
  391. {
  392. delete[] B;
  393. delete[] G;
  394. delete[] C;
  395. }
  396. int compare_double(const void *a, const void *b)
  397. {
  398. if(*(double *)a > *(double *)b)
  399. return -1;
  400. if(*(double *)a < *(double *)b)
  401. return 1;
  402. return 0;
  403. }
  404. void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
  405. {
  406. int r;
  407. double *D;
  408. clone(D, B, active_i);
  409. if(yi < active_i)
  410. D[yi] += A_i*C_yi;
  411. qsort(D, active_i, sizeof(double), compare_double);
  412. double beta = D[0] - A_i*C_yi;
  413. for(r=1;r<active_i && beta<r*D[r];r++)
  414. beta += D[r];
  415. beta /= r;
  416. for(r=0;r<active_i;r++)
  417. {
  418. if(r == yi)
  419. alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
  420. else
  421. alpha_new[r] = min((double)0, (beta - B[r])/A_i);
  422. }
  423. delete[] D;
  424. }
  425. bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
  426. {
  427. double bound = 0;
  428. if(m == yi)
  429. bound = C[GETI(i)];
  430. if(alpha_i == bound && G[m] < minG)
  431. return true;
  432. return false;
  433. }
  434. void Solver_MCSVM_CS::Solve(double *w)
  435. {
  436. int i, m, s;
  437. int iter = 0;
  438. double *alpha = new double[l*nr_class];
  439. double *alpha_new = new double[nr_class];
  440. int *index = new int[l];
  441. double *QD = new double[l];
  442. int *d_ind = new int[nr_class];
  443. double *d_val = new double[nr_class];
  444. int *alpha_index = new int[nr_class*l];
  445. int *y_index = new int[l];
  446. int active_size = l;
  447. int *active_size_i = new int[l];
  448. double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
  449. bool start_from_all = true;
  450. // initial
  451. for(i=0;i<l*nr_class;i++)
  452. alpha[i] = 0;
  453. for(i=0;i<w_size*nr_class;i++)
  454. w[i] = 0;
  455. for(i=0;i<l;i++)
  456. {
  457. for(m=0;m<nr_class;m++)
  458. alpha_index[i*nr_class+m] = m;
  459. feature_node *xi = prob->x[i];
  460. QD[i] = 0;
  461. while(xi->index != -1)
  462. {
  463. QD[i] += (xi->value)*(xi->value);
  464. xi++;
  465. }
  466. active_size_i[i] = nr_class;
  467. y_index[i] = prob->y[i];
  468. index[i] = i;
  469. }
  470. while(iter < max_iter)
  471. {
  472. double stopping = -INF;
  473. for(i=0;i<active_size;i++)
  474. {
  475. int j = i+rand()%(active_size-i);
  476. swap(index[i], index[j]);
  477. }
  478. for(s=0;s<active_size;s++)
  479. {
  480. i = index[s];
  481. double Ai = QD[i];
  482. double *alpha_i = &alpha[i*nr_class];
  483. int *alpha_index_i = &alpha_index[i*nr_class];
  484. if(Ai > 0)
  485. {
  486. for(m=0;m<active_size_i[i];m++)
  487. G[m] = 1;
  488. if(y_index[i] < active_size_i[i])
  489. G[y_index[i]] = 0;
  490. feature_node *xi = prob->x[i];
  491. while(xi->index!= -1)
  492. {
  493. double *w_i = &w[(xi->index-1)*nr_class];
  494. for(m=0;m<active_size_i[i];m++)
  495. G[m] += w_i[alpha_index_i[m]]*(xi->value);
  496. xi++;
  497. }
  498. double minG = INF;
  499. double maxG = -INF;
  500. for(m=0;m<active_size_i[i];m++)
  501. {
  502. if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
  503. minG = G[m];
  504. if(G[m] > maxG)
  505. maxG = G[m];
  506. }
  507. if(y_index[i] < active_size_i[i])
  508. if(alpha_i[prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
  509. minG = G[y_index[i]];
  510. for(m=0;m<active_size_i[i];m++)
  511. {
  512. if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
  513. {
  514. active_size_i[i]--;
  515. while(active_size_i[i]>m)
  516. {
  517. if(!be_shrunk(i, active_size_i[i], y_index[i],
  518. alpha_i[alpha_index_i[active_size_i[i]]], minG))
  519. {
  520. swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
  521. swap(G[m], G[active_size_i[i]]);
  522. if(y_index[i] == active_size_i[i])
  523. y_index[i] = m;
  524. else if(y_index[i] == m)
  525. y_index[i] = active_size_i[i];
  526. break;
  527. }
  528. active_size_i[i]--;
  529. }
  530. }
  531. }
  532. if(active_size_i[i] <= 1)
  533. {
  534. active_size--;
  535. swap(index[s], index[active_size]);
  536. s--;
  537. continue;
  538. }
  539. if(maxG-minG <= 1e-12)
  540. continue;
  541. else
  542. stopping = max(maxG - minG, stopping);
  543. for(m=0;m<active_size_i[i];m++)
  544. B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
  545. solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
  546. int nz_d = 0;
  547. for(m=0;m<active_size_i[i];m++)
  548. {
  549. double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
  550. alpha_i[alpha_index_i[m]] = alpha_new[m];
  551. if(fabs(d) >= 1e-12)
  552. {
  553. d_ind[nz_d] = alpha_index_i[m];
  554. d_val[nz_d] = d;
  555. nz_d++;
  556. }
  557. }
  558. xi = prob->x[i];
  559. while(xi->index != -1)
  560. {
  561. double *w_i = &w[(xi->index-1)*nr_class];
  562. for(m=0;m<nz_d;m++)
  563. w_i[d_ind[m]] += d_val[m]*xi->value;
  564. xi++;
  565. }
  566. }
  567. }
  568. iter++;
  569. if(iter % 10 == 0)
  570. {
  571. info(".");
  572. }
  573. if(stopping < eps_shrink)
  574. {
  575. if(stopping < eps && start_from_all == true)
  576. break;
  577. else
  578. {
  579. active_size = l;
  580. for(i=0;i<l;i++)
  581. active_size_i[i] = nr_class;
  582. info("*");
  583. eps_shrink = max(eps_shrink/2, eps);
  584. start_from_all = true;
  585. }
  586. }
  587. else
  588. start_from_all = false;
  589. }
  590. info("\noptimization finished, #iter = %d\n",iter);
  591. if (iter >= max_iter)
  592. info("Warning: reaching max number of iterations\n");
  593. // calculate objective value
  594. double v = 0;
  595. int nSV = 0;
  596. for(i=0;i<w_size*nr_class;i++)
  597. v += w[i]*w[i];
  598. v = 0.5*v;
  599. for(i=0;i<l*nr_class;i++)
  600. {
  601. v += alpha[i];
  602. if(fabs(alpha[i]) > 0)
  603. nSV++;
  604. }
  605. for(i=0;i<l;i++)
  606. v -= alpha[i*nr_class+prob->y[i]];
  607. info("Objective value = %lf\n",v);
  608. info("nSV = %d\n",nSV);
  609. delete [] alpha;
  610. delete [] alpha_new;
  611. delete [] index;
  612. delete [] QD;
  613. delete [] d_ind;
  614. delete [] d_val;
  615. delete [] alpha_index;
  616. delete [] y_index;
  617. delete [] active_size_i;
  618. }
  619. // A coordinate descent algorithm for
  620. // L1-loss and L2-loss SVM dual problems
  621. //
  622. // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
  623. // s.t. 0 <= alpha_i <= upper_bound_i,
  624. //
  625. // where Qij = yi yj xi^T xj and
  626. // D is a diagonal matrix
  627. //
  628. // In L1-SVM case:
  629. // upper_bound_i = Cp if y_i = 1
  630. // upper_bound_i = Cn if y_i = -1
  631. // D_ii = 0
  632. // In L2-SVM case:
  633. // upper_bound_i = INF
  634. // D_ii = 1/(2*Cp) if y_i = 1
  635. // D_ii = 1/(2*Cn) if y_i = -1
  636. //
  637. // Given:
  638. // x, y, Cp, Cn
  639. // eps is the stopping tolerance
  640. //
  641. // solution will be put in w
  642. #undef GETI
  643. #define GETI(i) (i)
  644. // To support weights for instances, use GETI(i) (i)
  645. static void solve_l2r_l1l2_svc(
  646. const problem *prob, double *w, double eps,
  647. double Cp, double Cn, int solver_type)
  648. {
  649. int l = prob->l;
  650. int w_size = prob->n;
  651. int i, s, iter = 0;
  652. double C, d, G;
  653. double *QD = new double[l];
  654. int max_iter = 1000;
  655. int *index = new int[l];
  656. double *alpha = new double[l];
  657. schar *y = new schar[l];
  658. int active_size = l;
  659. // PG: projected gradient, for shrinking and stopping
  660. double PG;
  661. double PGmax_old = INF;
  662. double PGmin_old = -INF;
  663. double PGmax_new, PGmin_new;
  664. // default solver_type: L2R_L2LOSS_SVC_DUAL
  665. double *diag = new double[l];
  666. double *upper_bound = new double[l];
  667. double *C_ = new double[l];
  668. for(i=0; i<l; i++)
  669. {
  670. if(prob->y[i]>0)
  671. C_[i] = prob->W[i] * Cp;
  672. else
  673. C_[i] = prob->W[i] * Cn;
  674. diag[i] = 0.5/C_[i];
  675. upper_bound[i] = INF;
  676. }
  677. if(solver_type == L2R_L1LOSS_SVC_DUAL)
  678. {
  679. for(i=0; i<l; i++)
  680. {
  681. diag[i] = 0;
  682. upper_bound[i] = C_[i];
  683. }
  684. }
  685. for(i=0; i<w_size; i++)
  686. w[i] = 0;
  687. for(i=0; i<l; i++)
  688. {
  689. alpha[i] = 0;
  690. if(prob->y[i] > 0)
  691. {
  692. y[i] = +1;
  693. }
  694. else
  695. {
  696. y[i] = -1;
  697. }
  698. QD[i] = diag[GETI(i)];
  699. feature_node *xi = prob->x[i];
  700. while (xi->index != -1)
  701. {
  702. QD[i] += (xi->value)*(xi->value);
  703. xi++;
  704. }
  705. index[i] = i;
  706. }
  707. while (iter < max_iter)
  708. {
  709. PGmax_new = -INF;
  710. PGmin_new = INF;
  711. for (i=0; i<active_size; i++)
  712. {
  713. int j = i+rand()%(active_size-i);
  714. swap(index[i], index[j]);
  715. }
  716. for (s=0;s<active_size;s++)
  717. {
  718. i = index[s];
  719. G = 0;
  720. schar yi = y[i];
  721. feature_node *xi = prob->x[i];
  722. while(xi->index!= -1)
  723. {
  724. G += w[xi->index-1]*(xi->value);
  725. xi++;
  726. }
  727. G = G*yi-1;
  728. C = upper_bound[GETI(i)];
  729. G += alpha[i]*diag[GETI(i)];
  730. PG = 0;
  731. if (alpha[i] == 0)
  732. {
  733. if (G > PGmax_old)
  734. {
  735. active_size--;
  736. swap(index[s], index[active_size]);
  737. s--;
  738. continue;
  739. }
  740. else if (G < 0)
  741. PG = G;
  742. }
  743. else if (alpha[i] == C)
  744. {
  745. if (G < PGmin_old)
  746. {
  747. active_size--;
  748. swap(index[s], index[active_size]);
  749. s--;
  750. continue;
  751. }
  752. else if (G > 0)
  753. PG = G;
  754. }
  755. else
  756. PG = G;
  757. PGmax_new = max(PGmax_new, PG);
  758. PGmin_new = min(PGmin_new, PG);
  759. if(fabs(PG) > 1.0e-12)
  760. {
  761. double alpha_old = alpha[i];
  762. alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
  763. d = (alpha[i] - alpha_old)*yi;
  764. xi = prob->x[i];
  765. while (xi->index != -1)
  766. {
  767. w[xi->index-1] += d*xi->value;
  768. xi++;
  769. }
  770. }
  771. }
  772. iter++;
  773. if(iter % 10 == 0)
  774. info(".");
  775. if(PGmax_new - PGmin_new <= eps)
  776. {
  777. if(active_size == l)
  778. break;
  779. else
  780. {
  781. active_size = l;
  782. info("*");
  783. PGmax_old = INF;
  784. PGmin_old = -INF;
  785. continue;
  786. }
  787. }
  788. PGmax_old = PGmax_new;
  789. PGmin_old = PGmin_new;
  790. if (PGmax_old <= 0)
  791. PGmax_old = INF;
  792. if (PGmin_old >= 0)
  793. PGmin_old = -INF;
  794. }
  795. info("\noptimization finished, #iter = %d\n",iter);
  796. if (iter >= max_iter)
  797. info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
  798. // calculate objective value
  799. double v = 0;
  800. int nSV = 0;
  801. for(i=0; i<w_size; i++)
  802. v += w[i]*w[i];
  803. for(i=0; i<l; i++)
  804. {
  805. v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
  806. if(alpha[i] > 0)
  807. ++nSV;
  808. }
  809. info("Objective value = %lf\n",v/2);
  810. info("nSV = %d\n",nSV);
  811. delete [] upper_bound;
  812. delete [] diag;
  813. delete [] C_;
  814. delete [] QD;
  815. delete [] alpha;
  816. delete [] y;
  817. delete [] index;
  818. }
  819. // A coordinate descent algorithm for
  820. // L1-regularized L2-loss support vector classification
  821. //
  822. // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
  823. //
  824. // Given:
  825. // x, y, Cp, Cn
  826. // eps is the stopping tolerance
  827. //
  828. // solution will be put in w
  829. #undef GETI
  830. #define GETI(i) (i)
  831. // To support weights for instances, use GETI(i) (i)
  832. static void solve_l1r_l2_svc(
  833. problem *prob_col, double *w, double eps,
  834. double Cp, double Cn)
  835. {
  836. int l = prob_col->l;
  837. int w_size = prob_col->n;
  838. int j, s, iter = 0;
  839. int max_iter = 1000;
  840. int active_size = w_size;
  841. int max_num_linesearch = 20;
  842. double sigma = 0.01;
  843. double d, G_loss, G, H;
  844. double Gmax_old = INF;
  845. double Gmax_new;
  846. double Gmax_init;
  847. double d_old, d_diff;
  848. double loss_old, loss_new;
  849. double appxcond, cond;
  850. int *index = new int[w_size];
  851. schar *y = new schar[l];
  852. double *b = new double[l]; // b = 1-ywTx
  853. double *xj_sq = new double[w_size];
  854. feature_node *x;
  855. double *C = new double[l];
  856. for(j=0; j<l; j++)
  857. {
  858. b[j] = 1;
  859. if(prob_col->y[j] > 0)
  860. {
  861. y[j] = 1;
  862. C[j] = prob_col->W[j] * Cp;
  863. }
  864. else
  865. {
  866. y[j] = -1;
  867. C[j] = prob_col->W[j] * Cn;
  868. }
  869. }
  870. for(j=0; j<w_size; j++)
  871. {
  872. w[j] = 0;
  873. index[j] = j;
  874. xj_sq[j] = 0;
  875. x = prob_col->x[j];
  876. while(x->index != -1)
  877. {
  878. int ind = x->index-1;
  879. double val = x->value;
  880. x->value *= y[ind]; // x->value stores yi*xij
  881. xj_sq[j] += C[GETI(ind)]*val*val;
  882. x++;
  883. }
  884. }
  885. while(iter < max_iter)
  886. {
  887. Gmax_new = 0;
  888. for(j=0; j<active_size; j++)
  889. {
  890. int i = j+rand()%(active_size-j);
  891. swap(index[i], index[j]);
  892. }
  893. for(s=0; s<active_size; s++)
  894. {
  895. j = index[s];
  896. G_loss = 0;
  897. H = 0;
  898. x = prob_col->x[j];
  899. while(x->index != -1)
  900. {
  901. int ind = x->index-1;
  902. if(b[ind] > 0)
  903. {
  904. double val = x->value;
  905. double tmp = C[GETI(ind)]*val;
  906. G_loss -= tmp*b[ind];
  907. H += tmp*val;
  908. }
  909. x++;
  910. }
  911. G_loss *= 2;
  912. G = G_loss;
  913. H *= 2;
  914. H = max(H, 1e-12);
  915. double Gp = G+1;
  916. double Gn = G-1;
  917. double violation = 0;
  918. if(w[j] == 0)
  919. {
  920. if(Gp < 0)
  921. violation = -Gp;
  922. else if(Gn > 0)
  923. violation = Gn;
  924. else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
  925. {
  926. active_size--;
  927. swap(index[s], index[active_size]);
  928. s--;
  929. continue;
  930. }
  931. }
  932. else if(w[j] > 0)
  933. violation = fabs(Gp);
  934. else
  935. violation = fabs(Gn);
  936. Gmax_new = max(Gmax_new, violation);
  937. // obtain Newton direction d
  938. if(Gp <= H*w[j])
  939. d = -Gp/H;
  940. else if(Gn >= H*w[j])
  941. d = -Gn/H;
  942. else
  943. d = -w[j];
  944. if(fabs(d) < 1.0e-12)
  945. continue;
  946. double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
  947. d_old = 0;
  948. int num_linesearch;
  949. for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
  950. {
  951. d_diff = d_old - d;
  952. cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
  953. appxcond = xj_sq[j]*d*d + G_loss*d + cond;
  954. if(appxcond <= 0)
  955. {
  956. x = prob_col->x[j];
  957. while(x->index != -1)
  958. {
  959. b[x->index-1] += d_diff*x->value;
  960. x++;
  961. }
  962. break;
  963. }
  964. if(num_linesearch == 0)
  965. {
  966. loss_old = 0;
  967. loss_new = 0;
  968. x = prob_col->x[j];
  969. while(x->index != -1)
  970. {
  971. int ind = x->index-1;
  972. if(b[ind] > 0)
  973. loss_old += C[GETI(ind)]*b[ind]*b[ind];
  974. double b_new = b[ind] + d_diff*x->value;
  975. b[ind] = b_new;
  976. if(b_new > 0)
  977. loss_new += C[GETI(ind)]*b_new*b_new;
  978. x++;
  979. }
  980. }
  981. else
  982. {
  983. loss_new = 0;
  984. x = prob_col->x[j];
  985. while(x->index != -1)
  986. {
  987. int ind = x->index-1;
  988. double b_new = b[ind] + d_diff*x->value;
  989. b[ind] = b_new;
  990. if(b_new > 0)
  991. loss_new += C[GETI(ind)]*b_new*b_new;
  992. x++;
  993. }
  994. }
  995. cond = cond + loss_new - loss_old;
  996. if(cond <= 0)
  997. break;
  998. else
  999. {
  1000. d_old = d;
  1001. d *= 0.5;
  1002. delta *= 0.5;
  1003. }
  1004. }
  1005. w[j] += d;
  1006. // recompute b[] if line search takes too many steps
  1007. if(num_linesearch >= max_num_linesearch)
  1008. {
  1009. info("#");
  1010. for(int i=0; i<l; i++)
  1011. b[i] = 1;
  1012. for(int i=0; i<w_size; i++)
  1013. {
  1014. if(w[i]==0) continue;
  1015. x = prob_col->x[i];
  1016. while(x->index != -1)
  1017. {
  1018. b[x->index-1] -= w[i]*x->value;
  1019. x++;
  1020. }
  1021. }
  1022. }
  1023. }
  1024. if(iter == 0)
  1025. Gmax_init = Gmax_new;
  1026. iter++;
  1027. if(iter % 10 == 0)
  1028. info(".");
  1029. if(Gmax_new <= eps*Gmax_init)
  1030. {
  1031. if(active_size == w_size)
  1032. break;
  1033. else
  1034. {
  1035. active_size = w_size;
  1036. info("*");
  1037. Gmax_old = INF;
  1038. continue;
  1039. }
  1040. }
  1041. Gmax_old = Gmax_new;
  1042. }
  1043. info("\noptimization finished, #iter = %d\n", iter);
  1044. if(iter >= max_iter)
  1045. info("\nWARNING: reaching max number of iterations\n");
  1046. // calculate objective value
  1047. double v = 0;
  1048. int nnz = 0;
  1049. for(j=0; j<w_size; j++)
  1050. {
  1051. x = prob_col->x[j];
  1052. while(x->index != -1)
  1053. {
  1054. x->value *= prob_col->y[x->index-1]; // restore x->value
  1055. x++;
  1056. }
  1057. if(w[j] != 0)
  1058. {
  1059. v += fabs(w[j]);
  1060. nnz++;
  1061. }
  1062. }
  1063. for(j=0; j<l; j++)
  1064. if(b[j] > 0)
  1065. v += C[GETI(j)]*b[j]*b[j];
  1066. info("Objective value = %lf\n", v);
  1067. info("#nonzeros/#features = %d/%d\n", nnz, w_size);
  1068. delete [] C;
  1069. delete [] index;
  1070. delete [] y;
  1071. delete [] b;
  1072. delete [] xj_sq;
  1073. }
  1074. // A coordinate descent algorithm for
  1075. // L1-regularized logistic regression problems
  1076. //
  1077. // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
  1078. //
  1079. // Given:
  1080. // x, y, Cp, Cn
  1081. // eps is the stopping tolerance
  1082. //
  1083. // solution will be put in w
  1084. #undef GETI
  1085. #define GETI(i) (y[i]+1)
  1086. // To support weights for instances, use GETI(i) (i)
  1087. static void solve_l1r_lr(
  1088. const problem *prob_col, double *w, double eps,
  1089. double Cp, double Cn)
  1090. {
  1091. int l = prob_col->l;
  1092. int w_size = prob_col->n;
  1093. int j, s, iter = 0;
  1094. int max_iter = 1000;
  1095. int active_size = w_size;
  1096. int max_num_linesearch = 20;
  1097. double x_min = 0;
  1098. double sigma = 0.01;
  1099. double d, G, H;
  1100. double Gmax_old = INF;
  1101. double Gmax_new;
  1102. double Gmax_init;
  1103. double sum1, appxcond1;
  1104. double sum2, appxcond2;
  1105. double cond;
  1106. int *index = new int[w_size];
  1107. schar *y = new schar[l];
  1108. double *exp_wTx = new double[l];
  1109. double *exp_wTx_new = new double[l];
  1110. double *xj_max = new double[w_size];
  1111. double *C_sum = new double[w_size];
  1112. double *xjneg_sum = new double[w_size];
  1113. double *xjpos_sum = new double[w_size];
  1114. feature_node *x;
  1115. double *C = new double[l];
  1116. for(j=0; j<l; j++)
  1117. {
  1118. exp_wTx[j] = 1;
  1119. if(prob_col->y[j] > 0)
  1120. {
  1121. y[j] = 1;
  1122. C[j] = prob_col->W[j] * Cp;
  1123. }
  1124. else
  1125. {
  1126. y[j] = -1;
  1127. C[j] = prob_col->W[j] * Cn;
  1128. }
  1129. }
  1130. for(j=0; j<w_size; j++)
  1131. {
  1132. w[j] = 0;
  1133. index[j] = j;
  1134. xj_max[j] = 0;
  1135. C_sum[j] = 0;
  1136. xjneg_sum[j] = 0;
  1137. xjpos_sum[j] = 0;
  1138. x = prob_col->x[j];
  1139. while(x->index != -1)
  1140. {
  1141. int ind = x->index-1;
  1142. double val = x->value;
  1143. x_min = min(x_min, val);
  1144. xj_max[j] = max(xj_max[j], val);
  1145. C_sum[j] += C[GETI(ind)];
  1146. if(y[ind] == -1)
  1147. xjneg_sum[j] += C[GETI(ind)]*val;
  1148. else
  1149. xjpos_sum[j] += C[GETI(ind)]*val;
  1150. x++;
  1151. }
  1152. }
  1153. while(iter < max_iter)
  1154. {
  1155. Gmax_new = 0;
  1156. for(j=0; j<active_size; j++)
  1157. {
  1158. int i = j+rand()%(active_size-j);
  1159. swap(index[i], index[j]);
  1160. }
  1161. for(s=0; s<active_size; s++)
  1162. {
  1163. j = index[s];
  1164. sum1 = 0;
  1165. sum2 = 0;
  1166. H = 0;
  1167. x = prob_col->x[j];
  1168. while(x->index != -1)
  1169. {
  1170. int ind = x->index-1;
  1171. double exp_wTxind = exp_wTx[ind];
  1172. double tmp1 = x->value/(1+exp_wTxind);
  1173. double tmp2 = C[GETI(ind)]*tmp1;
  1174. double tmp3 = tmp2*exp_wTxind;
  1175. sum2 += tmp2;
  1176. sum1 += tmp3;
  1177. H += tmp1*tmp3;
  1178. x++;
  1179. }
  1180. G = -sum2 + xjneg_sum[j];
  1181. double Gp = G+1;
  1182. double Gn = G-1;
  1183. double violation = 0;
  1184. if(w[j] == 0)
  1185. {
  1186. if(Gp < 0)
  1187. violation = -Gp;
  1188. else if(Gn > 0)
  1189. violation = Gn;
  1190. else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
  1191. {
  1192. active_size--;
  1193. swap(index[s], index[active_size]);
  1194. s--;
  1195. continue;
  1196. }
  1197. }
  1198. else if(w[j] > 0)
  1199. violation = fabs(Gp);
  1200. else
  1201. violation = fabs(Gn);
  1202. Gmax_new = max(Gmax_new, violation);
  1203. // obtain Newton direction d
  1204. if(Gp <= H*w[j])
  1205. d = -Gp/H;
  1206. else if(Gn >= H*w[j])
  1207. d = -Gn/H;
  1208. else
  1209. d = -w[j];
  1210. if(fabs(d) < 1.0e-12)
  1211. continue;
  1212. d = min(max(d,-10.0),10.0);
  1213. double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
  1214. int num_linesearch;
  1215. for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
  1216. {
  1217. cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
  1218. if(x_min >= 0)
  1219. {
  1220. double tmp = exp(d*xj_max[j]);
  1221. appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
  1222. appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
  1223. if(min(appxcond1,appxcond2) <= 0)
  1224. {
  1225. x = prob_col->x[j];
  1226. while(x->index != -1)
  1227. {
  1228. exp_wTx[x->index-1] *= exp(d*x->value);
  1229. x++;
  1230. }
  1231. break;
  1232. }
  1233. }
  1234. cond += d*xjneg_sum[j];
  1235. int i = 0;
  1236. x = prob_col->x[j];
  1237. while(x->index != -1)
  1238. {
  1239. int ind = x->index-1;
  1240. double exp_dx = exp(d*x->value);
  1241. exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
  1242. cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
  1243. x++; i++;
  1244. }
  1245. if(cond <= 0)
  1246. {
  1247. int i = 0;
  1248. x = prob_col->x[j];
  1249. while(x->index != -1)
  1250. {
  1251. int ind = x->index-1;
  1252. exp_wTx[ind] = exp_wTx_new[i];
  1253. x++; i++;
  1254. }
  1255. break;
  1256. }
  1257. else
  1258. {
  1259. d *= 0.5;
  1260. delta *= 0.5;
  1261. }
  1262. }
  1263. w[j] += d;
  1264. // recompute exp_wTx[] if line search takes too many steps
  1265. if(num_linesearch >= max_num_linesearch)
  1266. {
  1267. info("#");
  1268. for(int i=0; i<l; i++)
  1269. exp_wTx[i] = 0;
  1270. for(int i=0; i<w_size; i++)
  1271. {
  1272. if(w[i]==0) continue;
  1273. x = prob_col->x[i];
  1274. while(x->index != -1)
  1275. {
  1276. exp_wTx[x->index-1] += w[i]*x->value;
  1277. x++;
  1278. }
  1279. }
  1280. for(int i=0; i<l; i++)
  1281. exp_wTx[i] = exp(exp_wTx[i]);
  1282. }
  1283. }
  1284. if(iter == 0)
  1285. Gmax_init = Gmax_new;
  1286. iter++;
  1287. if(iter % 10 == 0)
  1288. info(".");
  1289. if(Gmax_new <= eps*Gmax_init)
  1290. {
  1291. if(active_size == w_size)
  1292. break;
  1293. else
  1294. {
  1295. active_size = w_size;
  1296. info("*");
  1297. Gmax_old = INF;
  1298. continue;
  1299. }
  1300. }
  1301. Gmax_old = Gmax_new;
  1302. }
  1303. info("\noptimization finished, #iter = %d\n", iter);
  1304. if(iter >= max_iter)
  1305. info("\nWARNING: reaching max number of iterations\n");
  1306. // calculate objective value
  1307. double v = 0;
  1308. int nnz = 0;
  1309. for(j=0; j<w_size; j++)
  1310. if(w[j] != 0)
  1311. {
  1312. v += fabs(w[j]);
  1313. nnz++;
  1314. }
  1315. for(j=0; j<l; j++)
  1316. if(y[j] == 1)
  1317. v += C[GETI(j)]*log(1+1/exp_wTx[j]);
  1318. else
  1319. v += C[GETI(j)]*log(1+exp_wTx[j]);
  1320. info("Objective value = %lf\n", v);
  1321. info("#nonzeros/#features = %d/%d\n", nnz, w_size);
  1322. delete [] C;
  1323. delete [] index;
  1324. delete [] y;
  1325. delete [] exp_wTx;
  1326. delete [] exp_wTx_new;
  1327. delete [] xj_max;
  1328. delete [] C_sum;
  1329. delete [] xjneg_sum;
  1330. delete [] xjpos_sum;
  1331. }
  1332. // transpose matrix X from row format to column format
  1333. static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
  1334. {
  1335. int i;
  1336. int l = prob->l;
  1337. int n = prob->n;
  1338. int nnz = 0;
  1339. int *col_ptr = new int[n+1];
  1340. feature_node *x_space;
  1341. prob_col->l = l;
  1342. prob_col->n = n;
  1343. prob_col->y = new int[l];
  1344. prob_col->x = new feature_node*[n];
  1345. prob_col->W = new double[l];
  1346. for(i=0; i<l; i++)
  1347. {
  1348. prob_col->y[i] = prob->y[i];
  1349. prob_col->W[i] = prob->W[i];
  1350. }
  1351. for(i=0; i<n+1; i++)
  1352. col_ptr[i] = 0;
  1353. for(i=0; i<l; i++)
  1354. {
  1355. feature_node *x = prob->x[i];
  1356. while(x->index != -1)
  1357. {
  1358. nnz++;
  1359. col_ptr[x->index]++;
  1360. x++;
  1361. }
  1362. }
  1363. for(i=1; i<n+1; i++)
  1364. col_ptr[i] += col_ptr[i-1] + 1;
  1365. x_space = new feature_node[nnz+n];
  1366. for(i=0; i<n; i++)
  1367. prob_col->x[i] = &x_space[col_ptr[i]];
  1368. for(i=0; i<l; i++)
  1369. {
  1370. feature_node *x = prob->x[i];
  1371. while(x->index != -1)
  1372. {
  1373. int ind = x->index-1;
  1374. x_space[col_ptr[ind]].index = i+1; // starts from 1
  1375. x_space[col_ptr[ind]].value = x->value;
  1376. col_ptr[ind]++;
  1377. x++;
  1378. }
  1379. }
  1380. for(i=0; i<n; i++)
  1381. x_space[col_ptr[i]].index = -1;
  1382. *x_space_ret = x_space;
  1383. delete [] col_ptr;
  1384. }
  1385. // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
  1386. // perm, length l, must be allocated before calling this subroutine
  1387. static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
  1388. {
  1389. int l = prob->l;
  1390. int max_nr_class = 16;
  1391. int nr_class = 0;
  1392. int *label = Malloc(int,max_nr_class);
  1393. int *count = Malloc(int,max_nr_class);
  1394. int *data_label = Malloc(int,l);
  1395. int i;
  1396. for(i=0;i<l;i++)
  1397. {
  1398. int this_label = prob->y[i];
  1399. int j;
  1400. for(j=0;j<nr_class;j++)
  1401. {
  1402. if(this_label == label[j])
  1403. {
  1404. ++count[j];
  1405. break;
  1406. }
  1407. }
  1408. data_label[i] = j;
  1409. if(j == nr_class)
  1410. {
  1411. if(nr_class == max_nr_class)
  1412. {
  1413. max_nr_class *= 2;
  1414. label = (int *)realloc(label,max_nr_class*sizeof(int));
  1415. count = (int *)realloc(count,max_nr_class*sizeof(int));
  1416. }
  1417. label[nr_class] = this_label;
  1418. count[nr_class] = 1;
  1419. ++nr_class;
  1420. }
  1421. }
  1422. int *start = Malloc(int,nr_class);
  1423. start[0] = 0;
  1424. for(i=1;i<nr_class;i++)
  1425. start[i] = start[i-1]+count[i-1];
  1426. for(i=0;i<l;i++)
  1427. {
  1428. perm[start[data_label[i]]] = i;
  1429. ++start[data_label[i]];
  1430. }
  1431. start[0] = 0;
  1432. for(i=1;i<nr_class;i++)
  1433. start[i] = start[i-1]+count[i-1];
  1434. *nr_class_ret = nr_class;
  1435. *label_ret = label;
  1436. *start_ret = start;
  1437. *count_ret = count;
  1438. free(data_label);
  1439. }
  1440. static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
  1441. {
  1442. double eps=param->eps;
  1443. int pos = 0;
  1444. int neg = 0;
  1445. for(int i=0;i<prob->l;i++)
  1446. if(prob->y[i]==+1)
  1447. pos++;
  1448. neg = prob->l - pos;
  1449. function *fun_obj=NULL;
  1450. switch(param->solver_type)
  1451. {
  1452. case L2R_LR:
  1453. {
  1454. fun_obj=new l2r_lr_fun(prob, Cp, Cn);
  1455. TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
  1456. tron_obj.set_print_string(liblinear_print_string);
  1457. tron_obj.tron(w);
  1458. delete fun_obj;
  1459. break;
  1460. }
  1461. case L2R_L2LOSS_SVC:
  1462. {
  1463. fun_obj=new l2r_l2_svc_fun(prob, Cp, Cn);
  1464. TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
  1465. tron_obj.set_print_string(liblinear_print_string);
  1466. tron_obj.tron(w);
  1467. delete fun_obj;
  1468. break;
  1469. }
  1470. case L2R_L2LOSS_SVC_DUAL:
  1471. solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
  1472. break;
  1473. case L2R_L1LOSS_SVC_DUAL:
  1474. solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
  1475. break;
  1476. case L1R_L2LOSS_SVC:
  1477. {
  1478. problem prob_col;
  1479. feature_node *x_space = NULL;
  1480. transpose(prob, &x_space ,&prob_col);
  1481. solve_l1r_l2_svc(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
  1482. delete [] prob_col.y;
  1483. delete [] prob_col.x;
  1484. delete [] prob_col.W;
  1485. delete [] x_space;
  1486. break;
  1487. }
  1488. case L1R_LR:
  1489. {
  1490. problem prob_col;
  1491. feature_node *x_space = NULL;
  1492. transpose(prob, &x_space ,&prob_col);
  1493. solve_l1r_lr(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
  1494. delete [] prob_col.y;
  1495. delete [] prob_col.x;
  1496. delete [] prob_col.W;
  1497. delete [] x_space;
  1498. break;
  1499. }
  1500. default:
  1501. fprintf(stderr, "Error: unknown solver_type\n");
  1502. break;
  1503. }
  1504. }
  1505. //
  1506. // Remove zero weighed data as libsvm and some liblinear solvers require C > 0.
  1507. //
  1508. static void remove_zero_weight(problem *newprob, const problem *prob)
  1509. {
  1510. int i;
  1511. int l = 0;
  1512. for(i=0;i<prob->l;i++)
  1513. if(prob->W[i] > 0) l++;
  1514. *newprob = *prob;
  1515. newprob->l = l;
  1516. newprob->x = Malloc(feature_node*,l);
  1517. newprob->y = Malloc(int,l);
  1518. newprob->W = Malloc(double,l);
  1519. int j = 0;
  1520. for(i=0;i<prob->l;i++)
  1521. if(prob->W[i] > 0)
  1522. {
  1523. newprob->x[j] = prob->x[i];
  1524. newprob->y[j] = prob->y[i];
  1525. newprob->W[j] = prob->W[i];
  1526. j++;
  1527. }
  1528. }
  1529. //
  1530. // Interface functions
  1531. //
  1532. model* train(const problem *prob, const parameter *param)
  1533. {
  1534. problem newprob;
  1535. remove_zero_weight(&newprob, prob);
  1536. prob = &newprob;
  1537. int i,j;
  1538. int l = prob->l;
  1539. int n = prob->n;
  1540. int w_size = prob->n;
  1541. model *model_ = Malloc(model,1);
  1542. if(prob->bias>=0)
  1543. model_->nr_feature=n-1;
  1544. else
  1545. model_->nr_feature=n;
  1546. model_->param = *param;
  1547. model_->bias = prob->bias;
  1548. int nr_class;
  1549. int *label = NULL;
  1550. int *start = NULL;
  1551. int *count = NULL;
  1552. int *perm = Malloc(int,l);
  1553. // group training data of the same class
  1554. group_classes(prob,&nr_class,&label,&start,&count,perm);
  1555. model_->nr_class=nr_class;
  1556. model_->label = Malloc(int,nr_class);
  1557. for(i=0;i<nr_class;i++)
  1558. model_->label[i] = label[i];
  1559. // calculate weighted C
  1560. double *weighted_C = Malloc(double, nr_class);
  1561. for(i=0;i<nr_class;i++)
  1562. weighted_C[i] = param->C;
  1563. for(i=0;i<param->nr_weight;i++)
  1564. {
  1565. for(j=0;j<nr_class;j++)
  1566. if(param->weight_label[i] == label[j])
  1567. break;
  1568. if(j == nr_class)
  1569. fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
  1570. else
  1571. weighted_C[j] *= param->weight[i];
  1572. }
  1573. // constructing the subproblem
  1574. feature_node **x = Malloc(feature_node *,l);
  1575. double *W = Malloc(double,l);
  1576. for(i=0;i<l;i++)
  1577. {
  1578. x[i] = prob->x[perm[i]];
  1579. W[i] = prob->W[perm[i]];
  1580. }
  1581. int k;
  1582. problem sub_prob;
  1583. sub_prob.l = l;
  1584. sub_prob.n = n;
  1585. sub_prob.x = Malloc(feature_node *,sub_prob.l);
  1586. sub_prob.y = Malloc(int,sub_prob.l);
  1587. sub_prob.W = Malloc(double,sub_prob.l);
  1588. for(k=0; k<sub_prob.l; k++)
  1589. {
  1590. sub_prob.x[k] = x[k];
  1591. sub_prob.W[k] = W[k];
  1592. }
  1593. // multi-class svm by Crammer and Singer
  1594. if(param->solver_type == MCSVM_CS)
  1595. {
  1596. model_->w=Malloc(double, n*nr_class);
  1597. for(i=0;i<nr_class;i++)
  1598. for(j=start[i];j<start[i]+count[i];j++)
  1599. sub_prob.y[j] = i;
  1600. Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
  1601. Solver.Solve(model_->w);
  1602. }
  1603. else
  1604. {
  1605. if(nr_class == 2)
  1606. {
  1607. model_->w=Malloc(double, w_size);
  1608. int e0 = start[0]+count[0];
  1609. k=0;
  1610. for(; k<e0; k++)
  1611. sub_prob.y[k] = +1;
  1612. for(; k<sub_prob.l; k++)
  1613. sub_prob.y[k] = -1;
  1614. train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
  1615. }
  1616. else
  1617. {
  1618. model_->w=Malloc(double, w_size*nr_class);
  1619. double *w=Malloc(double, w_size);
  1620. for(i=0;i<nr_class;i++)
  1621. {
  1622. int si = start[i];
  1623. int ei = si+count[i];
  1624. k=0;
  1625. for(; k<si; k++)
  1626. sub_prob.y[k] = -1;
  1627. for(; k<ei; k++)
  1628. sub_prob.y[k] = +1;
  1629. for(; k<sub_prob.l; k++)
  1630. sub_prob.y[k] = -1;
  1631. train_one(&sub_prob, param, w, weighted_C[i], param->C);
  1632. for(int j=0;j<w_size;j++)
  1633. model_->w[j*nr_class+i] = w[j];
  1634. }
  1635. free(w);
  1636. }
  1637. }
  1638. free(x);
  1639. free(W);
  1640. free(label);
  1641. free(start);
  1642. free(count);
  1643. free(perm);
  1644. free(sub_prob.x);
  1645. free(sub_prob.y);
  1646. free(sub_prob.W);
  1647. free(weighted_C);
  1648. free(newprob.x);
  1649. free(newprob.y);
  1650. free(newprob.W);
  1651. return model_;
  1652. }
  1653. void destroy_model(struct model *model_)
  1654. {
  1655. if(model_->w != NULL)
  1656. free(model_->w);
  1657. if(model_->label != NULL)
  1658. free(model_->label);
  1659. free(model_);
  1660. }
  1661. static const char *solver_type_table[]=
  1662. {
  1663. "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC","L2R_L1LOSS_SVC_DUAL","MCSVM_CS", "L1R_L2LOSS_SVC","L1R_LR", NULL
  1664. };
  1665. int save_model(const char *model_file_name, const struct model *model_)
  1666. {
  1667. int i;
  1668. int nr_feature=model_->nr_feature;
  1669. int n;
  1670. const parameter& param = model_->param;
  1671. if(model_->bias>=0)
  1672. n=nr_feature+1;
  1673. else
  1674. n=nr_feature;
  1675. int w_size = n;
  1676. FILE *fp = fopen(model_file_name,"w");
  1677. if(fp==NULL) return -1;
  1678. int nr_w;
  1679. if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
  1680. nr_w=1;
  1681. else
  1682. nr_w=model_->nr_class;
  1683. fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
  1684. fprintf(fp, "nr_class %d\n", model_->nr_class);
  1685. fprintf(fp, "label");
  1686. for(i=0; i<model_->nr_class; i++)
  1687. fprintf(fp, " %d", model_->label[i]);
  1688. fprintf(fp, "\n");
  1689. fprintf(fp, "nr_feature %d\n", nr_feature);
  1690. fprintf(fp, "bias %.16g\n", model_->bias);
  1691. fprintf(fp, "w\n");
  1692. for(i=0; i<w_size; i++)
  1693. {
  1694. int j;
  1695. for(j=0; j<nr_w; j++)
  1696. fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
  1697. fprintf(fp, "\n");
  1698. }
  1699. if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
  1700. else return 0;
  1701. }
  1702. struct model *load_model(const char *model_file_name)
  1703. {
  1704. FILE *fp = fopen(model_file_name,"r");
  1705. if(fp==NULL) return NULL;
  1706. int i;
  1707. int nr_feature;
  1708. int n;
  1709. int nr_class;
  1710. double bias;
  1711. model *model_ = Malloc(model,1);
  1712. parameter& param = model_->param;
  1713. model_->label = NULL;
  1714. char cmd[81];
  1715. while(1)
  1716. {
  1717. fscanf(fp,"%80s",cmd);
  1718. if(strcmp(cmd,"solver_type")==0)
  1719. {
  1720. fscanf(fp,"%80s",cmd);
  1721. int i;
  1722. for(i=0;solver_type_table[i];i++)
  1723. {
  1724. if(strcmp(solver_type_table[i],cmd)==0)
  1725. {
  1726. param.solver_type=i;
  1727. break;
  1728. }
  1729. }
  1730. if(solver_type_table[i] == NULL)
  1731. {
  1732. fprintf(stderr,"unknown solver type.\n");
  1733. free(model_->label);
  1734. free(model_);
  1735. return NULL;
  1736. }
  1737. }
  1738. else if(strcmp(cmd,"nr_class")==0)
  1739. {
  1740. fscanf(fp,"%d",&nr_class);
  1741. model_->nr_class=nr_class;
  1742. }
  1743. else if(strcmp(cmd,"nr_feature")==0)
  1744. {
  1745. fscanf(fp,"%d",&nr_feature);
  1746. model_->nr_feature=nr_feature;
  1747. }
  1748. else if(strcmp(cmd,"bias")==0)
  1749. {
  1750. fscanf(fp,"%lf",&bias);
  1751. model_->bias=bias;
  1752. }
  1753. else if(strcmp(cmd,"w")==0)
  1754. {
  1755. break;
  1756. }
  1757. else if(strcmp(cmd,"label")==0)
  1758. {
  1759. int nr_class = model_->nr_class;
  1760. model_->label = Malloc(int,nr_class);
  1761. for(int i=0;i<nr_class;i++)
  1762. fscanf(fp,"%d",&model_->label[i]);
  1763. }
  1764. else
  1765. {
  1766. fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
  1767. free(model_);
  1768. return NULL;
  1769. }
  1770. }
  1771. nr_feature=model_->nr_feature;
  1772. if(model_->bias>=0)
  1773. n=nr_feature+1;
  1774. else
  1775. n=nr_feature;
  1776. int w_size = n;
  1777. int nr_w;
  1778. if(nr_class==2 && param.solver_type != MCSVM_CS)
  1779. nr_w = 1;
  1780. else
  1781. nr_w = nr_class;
  1782. model_->w=Malloc(double, w_size*nr_w);
  1783. for(i=0; i<w_size; i++)
  1784. {
  1785. int j;
  1786. for(j=0; j<nr_w; j++)
  1787. fscanf(fp, "%lf ", &model_->w[i*nr_w+j]);
  1788. fscanf(fp, "\n");
  1789. }
  1790. if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
  1791. return model_;
  1792. }
  1793. int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
  1794. {
  1795. int idx;
  1796. int n;
  1797. if(model_->bias>=0)
  1798. n=model_->nr_feature+1;
  1799. else
  1800. n=model_->nr_feature;
  1801. double *w=model_->w;
  1802. int nr_class=model_->nr_class;
  1803. int i;
  1804. int nr_w;
  1805. if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
  1806. nr_w = 1;
  1807. else
  1808. nr_w = nr_class;
  1809. const feature_node *lx=x;
  1810. for(i=0;i<nr_w;i++)
  1811. dec_values[i] = 0;
  1812. for(; (idx=lx->index)!=-1; lx++)
  1813. {
  1814. // the dimension of testing data may exceed that of training
  1815. if(idx<=n)
  1816. for(i=0;i<nr_w;i++)
  1817. dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
  1818. }
  1819. if(nr_class==2)
  1820. return (dec_values[0]>0)?model_->label[0]:model_->label[1];
  1821. else
  1822. {
  1823. int dec_max_idx = 0;
  1824. for(i=1;i<nr_class;i++)
  1825. {
  1826. if(dec_values[i] > dec_values[dec_max_idx])
  1827. dec_max_idx = i;
  1828. }
  1829. return model_->label[dec_max_idx];
  1830. }
  1831. }
  1832. int predict(const model *model_, const feature_node *x)
  1833. {
  1834. double *dec_values = Malloc(double, model_->nr_class);
  1835. int label=predict_values(model_, x, dec_values);
  1836. free(dec_values);
  1837. return label;
  1838. }
  1839. int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
  1840. {
  1841. if(model_->param.solver_type==L2R_LR)
  1842. {
  1843. int i;
  1844. int nr_class=model_->nr_class;
  1845. int nr_w;
  1846. if(nr_class==2)
  1847. nr_w = 1;
  1848. else
  1849. nr_w = nr_class;
  1850. int label=predict_values(model_, x, prob_estimates);
  1851. for(i=0;i<nr_w;i++)
  1852. prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
  1853. if(nr_class==2) // for binary classification
  1854. prob_estimates[1]=1.-prob_estimates[0];
  1855. else
  1856. {
  1857. double sum=0;
  1858. for(i=0; i<nr_class; i++)
  1859. sum+=prob_estimates[i];
  1860. for(i=0; i<nr_class; i++)
  1861. prob_estimates[i]=prob_estimates[i]/sum;
  1862. }
  1863. return label;
  1864. }
  1865. else
  1866. return 0;
  1867. }
  1868. void destroy_param(parameter* param)
  1869. {
  1870. if(param->weight_label != NULL)
  1871. free(param->weight_label);
  1872. if(param->weight != NULL)
  1873. free(param->weight);
  1874. }
  1875. const char *check_parameter(const problem *prob, const parameter *param)
  1876. {
  1877. if(param->eps <= 0)
  1878. return "eps <= 0";
  1879. if(param->C <= 0)
  1880. return "C <= 0";
  1881. if(param->solver_type != L2R_LR
  1882. && param->solver_type != L2R_L2LOSS_SVC_DUAL
  1883. && param->solver_type != L2R_L2LOSS_SVC
  1884. && param->solver_type != L2R_L1LOSS_SVC_DUAL
  1885. && param->solver_type != MCSVM_CS
  1886. && param->solver_type != L1R_L2LOSS_SVC
  1887. && param->solver_type != L1R_LR)
  1888. return "unknown solver type";
  1889. return NULL;
  1890. }
  1891. void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target)
  1892. {
  1893. int i;
  1894. int *fold_start = Malloc(int,nr_fold+1);
  1895. int l = prob->l;
  1896. int *perm = Malloc(int,l);
  1897. for(i=0;i<l;i++) perm[i]=i;
  1898. for(i=0;i<l;i++)
  1899. {
  1900. int j = i+rand()%(l-i);
  1901. swap(perm[i],perm[j]);
  1902. }
  1903. for(i=0;i<=nr_fold;i++)
  1904. fold_start[i]=i*l/nr_fold;
  1905. for(i=0;i<nr_fold;i++)
  1906. {
  1907. int begin = fold_start[i];
  1908. int end = fold_start[i+1];
  1909. int j,k;
  1910. struct problem subprob;
  1911. subprob.bias = prob->bias;
  1912. subprob.n = prob->n;
  1913. subprob.l = l-(end-begin);
  1914. subprob.x = Malloc(struct feature_node*,subprob.l);
  1915. subprob.y = Malloc(int,subprob.l);
  1916. subprob.W = Malloc(double,subprob.l);
  1917. k=0;
  1918. for(j=0;j<begin;j++)
  1919. {
  1920. subprob.x[k] = prob->x[perm[j]];
  1921. subprob.y[k] = prob->y[perm[j]];
  1922. subprob.W[k] = prob->W[perm[j]];
  1923. ++k;
  1924. }
  1925. for(j=end;j<l;j++)
  1926. {
  1927. subprob.x[k] = prob->x[perm[j]];
  1928. subprob.y[k] = prob->y[perm[j]];
  1929. subprob.W[k] = prob->W[perm[j]];
  1930. ++k;
  1931. }
  1932. struct model *submodel = train(&subprob,param);
  1933. for(j=begin;j<end;j++)
  1934. target[perm[j]] = predict(submodel,prob->x[perm[j]]);
  1935. destroy_model(submodel);
  1936. free(subprob.x);
  1937. free(subprob.y);
  1938. free(subprob.W);
  1939. }
  1940. free(fold_start);
  1941. free(perm);
  1942. }
  1943. int get_nr_feature(const model *model_)
  1944. {
  1945. return model_->nr_feature;
  1946. }
  1947. int get_nr_class(const model *model_)
  1948. {
  1949. return model_->nr_class;
  1950. }
  1951. void get_labels(const model *model_, int* label)
  1952. {
  1953. if (model_->label != NULL)
  1954. for(int i=0;i<model_->nr_class;i++)
  1955. label[i] = model_->label[i];
  1956. }