PageRenderTime 51ms CodeModel.GetById 9ms RepoModel.GetById 0ms app.codeStats 1ms

/testCode/detection/liblinear/linear.cpp

http://car-aot.googlecode.com/
C++ | 2113 lines | 1787 code | 240 blank | 86 comment | 365 complexity | d05f4a558463e995852a8dbaf7346f1e MD5 | raw file
Possible License(s): BSD-3-Clause
  1. #include <math.h>
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include <stdarg.h>
  6. #include "linear.h"
  7. #include "tron.h"
  8. typedef signed char schar;
  9. template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
  10. #ifndef min
  11. template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
  12. #endif
  13. #ifndef max
  14. template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
  15. #endif
  16. template <class S, class T> static inline void clone(T*& dst, S* src, int n)
  17. {
  18. dst = new T[n];
  19. memcpy((void *)dst,(void *)src,sizeof(T)*n);
  20. }
  21. #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
  22. #define INF HUGE_VAL
  23. static void print_string_stdout(const char *s)
  24. {
  25. fputs(s,stdout);
  26. fflush(stdout);
  27. }
  28. static void (*liblinear_print_string) (const char *) = &print_string_stdout;
  29. #if 1
  30. static void info(const char *fmt,...)
  31. {
  32. char buf[BUFSIZ];
  33. va_list ap;
  34. va_start(ap,fmt);
  35. vsprintf(buf,fmt,ap);
  36. va_end(ap);
  37. (*liblinear_print_string)(buf);
  38. }
  39. #else
  40. static void info(const char *fmt,...) {}
  41. #endif
  42. class l2r_lr_fun : public function
  43. {
  44. public:
  45. l2r_lr_fun(const problem *prob, double Cp, double Cn);
  46. ~l2r_lr_fun();
  47. double fun(double *w);
  48. void grad(double *w, double *g);
  49. void Hv(double *s, double *Hs);
  50. int get_nr_variable(void);
  51. private:
  52. void Xv(double *v, double *Xv);
  53. void XTv(double *v, double *XTv);
  54. double *C;
  55. double *z;
  56. double *D;
  57. const problem *prob;
  58. };
  59. l2r_lr_fun::l2r_lr_fun(const problem *prob, double Cp, double Cn)
  60. {
  61. int i;
  62. int l=prob->l;
  63. int *y=prob->y;
  64. this->prob = prob;
  65. z = new double[l];
  66. D = new double[l];
  67. C = new double[l];
  68. for (i=0; i<l; i++)
  69. {
  70. if (y[i] == 1)
  71. C[i] = Cp;
  72. else
  73. C[i] = Cn;
  74. }
  75. }
  76. l2r_lr_fun::~l2r_lr_fun()
  77. {
  78. delete[] z;
  79. delete[] D;
  80. delete[] C;
  81. }
  82. double l2r_lr_fun::fun(double *w)
  83. {
  84. int i;
  85. double f=0;
  86. int *y=prob->y;
  87. int l=prob->l;
  88. int w_size=get_nr_variable();
  89. Xv(w, z);
  90. for(i=0;i<l;i++)
  91. {
  92. double yz = y[i]*z[i];
  93. if (yz >= 0)
  94. f += C[i]*log(1 + exp(-yz));
  95. else
  96. f += C[i]*(-yz+log(1 + exp(yz)));
  97. }
  98. f = 2*f;
  99. for(i=0;i<w_size-1;i++)
  100. f += w[i]*w[i];
  101. f /= 2.0;
  102. return(f);
  103. }
  104. void l2r_lr_fun::grad(double *w, double *g)
  105. {
  106. int i;
  107. int *y=prob->y;
  108. int l=prob->l;
  109. int w_size=get_nr_variable();
  110. for(i=0;i<l;i++)
  111. {
  112. z[i] = 1/(1 + exp(-y[i]*z[i]));
  113. D[i] = z[i]*(1-z[i]);
  114. z[i] = C[i]*(z[i]-1)*y[i];
  115. }
  116. XTv(z, g);
  117. for(i=0;i<w_size-1;i++)
  118. g[i] = w[i] + g[i];
  119. }
  120. int l2r_lr_fun::get_nr_variable(void)
  121. {
  122. return prob->n;
  123. }
  124. void l2r_lr_fun::Hv(double *s, double *Hs)
  125. {
  126. int i;
  127. int l=prob->l;
  128. int w_size=get_nr_variable();
  129. double *wa = new double[l];
  130. Xv(s, wa);
  131. for(i=0;i<l;i++)
  132. wa[i] = C[i]*D[i]*wa[i];
  133. XTv(wa, Hs);
  134. for(i=0;i<w_size-1;i++)
  135. Hs[i] = s[i] + Hs[i];
  136. delete[] wa;
  137. }
  138. void l2r_lr_fun::Xv(double *v, double *Xv)
  139. {
  140. int i;
  141. int l=prob->l;
  142. feature_node **x=prob->x;
  143. for(i=0;i<l;i++)
  144. {
  145. feature_node *s=x[i];
  146. Xv[i]=0;
  147. while(s->index!=-1)
  148. {
  149. Xv[i]+=v[s->index-1]*s->value;
  150. s++;
  151. }
  152. }
  153. }
  154. void l2r_lr_fun::XTv(double *v, double *XTv)
  155. {
  156. int i;
  157. int l=prob->l;
  158. int w_size=get_nr_variable();
  159. feature_node **x=prob->x;
  160. for(i=0;i<w_size;i++)
  161. XTv[i]=0;
  162. for(i=0;i<l;i++)
  163. {
  164. feature_node *s=x[i];
  165. while(s->index!=-1)
  166. {
  167. XTv[s->index-1]+=v[i]*s->value;
  168. s++;
  169. }
  170. }
  171. }
  172. class l2r_l2_svc_fun : public function
  173. {
  174. public:
  175. l2r_l2_svc_fun(const problem *prob, double Cp, double Cn);
  176. ~l2r_l2_svc_fun();
  177. double fun(double *w);
  178. void grad(double *w, double *g);
  179. void Hv(double *s, double *Hs);
  180. int get_nr_variable(void);
  181. private:
  182. void Xv(double *v, double *Xv);
  183. void subXv(double *v, double *Xv);
  184. void subXTv(double *v, double *XTv);
  185. double *C;
  186. double *z;
  187. double *D;
  188. int *I;
  189. int sizeI;
  190. const problem *prob;
  191. };
  192. l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double Cp, double Cn)
  193. {
  194. int i;
  195. int l=prob->l;
  196. int *y=prob->y;
  197. this->prob = prob;
  198. z = new double[l];
  199. D = new double[l];
  200. C = new double[l];
  201. I = new int[l];
  202. for (i=0; i<l; i++)
  203. {
  204. if (y[i] == 1)
  205. C[i] = Cp;
  206. else
  207. C[i] = Cn;
  208. }
  209. }
  210. l2r_l2_svc_fun::~l2r_l2_svc_fun()
  211. {
  212. delete[] z;
  213. delete[] D;
  214. delete[] C;
  215. delete[] I;
  216. }
  217. double l2r_l2_svc_fun::fun(double *w)
  218. {
  219. int i;
  220. double f=0;
  221. int *y=prob->y;
  222. int l=prob->l;
  223. int w_size=get_nr_variable();
  224. Xv(w, z);
  225. for(i=0;i<l;i++)
  226. {
  227. z[i] = y[i]*z[i];
  228. double d = 1-z[i];
  229. if (d > 0)
  230. f += C[i]*d*d;
  231. }
  232. f = 2*f;
  233. for(i=0;i<w_size;i++)
  234. f += w[i]*w[i];
  235. f /= 2.0;
  236. return(f);
  237. }
  238. void l2r_l2_svc_fun::grad(double *w, double *g)
  239. {
  240. int i;
  241. int *y=prob->y;
  242. int l=prob->l;
  243. int w_size=get_nr_variable();
  244. sizeI = 0;
  245. for (i=0;i<l;i++)
  246. if (z[i] < 1)
  247. {
  248. z[sizeI] = C[i]*y[i]*(z[i]-1);
  249. I[sizeI] = i;
  250. sizeI++;
  251. }
  252. subXTv(z, g);
  253. for(i=0;i<w_size;i++)
  254. g[i] = w[i] + 2*g[i];
  255. }
  256. int l2r_l2_svc_fun::get_nr_variable(void)
  257. {
  258. return prob->n;
  259. }
  260. void l2r_l2_svc_fun::Hv(double *s, double *Hs)
  261. {
  262. int i;
  263. int l=prob->l;
  264. int w_size=get_nr_variable();
  265. double *wa = new double[l];
  266. subXv(s, wa);
  267. for(i=0;i<sizeI;i++)
  268. wa[i] = C[I[i]]*wa[i];
  269. subXTv(wa, Hs);
  270. for(i=0;i<w_size;i++)
  271. Hs[i] = s[i] + 2*Hs[i];
  272. delete[] wa;
  273. }
  274. void l2r_l2_svc_fun::Xv(double *v, double *Xv)
  275. {
  276. int i;
  277. int l=prob->l;
  278. feature_node **x=prob->x;
  279. for(i=0;i<l;i++)
  280. {
  281. feature_node *s=x[i];
  282. Xv[i]=0;
  283. while(s->index!=-1)
  284. {
  285. Xv[i]+=v[s->index-1]*s->value;
  286. s++;
  287. }
  288. }
  289. }
  290. void l2r_l2_svc_fun::subXv(double *v, double *Xv)
  291. {
  292. int i;
  293. feature_node **x=prob->x;
  294. for(i=0;i<sizeI;i++)
  295. {
  296. feature_node *s=x[I[i]];
  297. Xv[i]=0;
  298. while(s->index!=-1)
  299. {
  300. Xv[i]+=v[s->index-1]*s->value;
  301. s++;
  302. }
  303. }
  304. }
  305. void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
  306. {
  307. int i;
  308. int w_size=get_nr_variable();
  309. feature_node **x=prob->x;
  310. for(i=0;i<w_size;i++)
  311. XTv[i]=0;
  312. for(i=0;i<sizeI;i++)
  313. {
  314. feature_node *s=x[I[i]];
  315. while(s->index!=-1)
  316. {
  317. XTv[s->index-1]+=v[i]*s->value;
  318. s++;
  319. }
  320. }
  321. }
  322. // A coordinate descent algorithm for
  323. // multi-class support vector machines by Crammer and Singer
  324. //
  325. // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
  326. // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
  327. //
  328. // where e^m_i = 0 if y_i = m,
  329. // e^m_i = 1 if y_i != m,
  330. // C^m_i = C if m = y_i,
  331. // C^m_i = 0 if m != y_i,
  332. // and w_m(\alpha) = \sum_i \alpha^m_i x_i
  333. //
  334. // Given:
  335. // x, y, C
  336. // eps is the stopping tolerance
  337. //
  338. // solution will be put in w
  339. #define GETI(i) (prob->y[i])
  340. // To support weights for instances, use GETI(i) (i)
  341. class Solver_MCSVM_CS
  342. {
  343. public:
  344. Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000);
  345. ~Solver_MCSVM_CS();
  346. void Solve(double *w);
  347. private:
  348. void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new);
  349. bool be_shrunk(int i, int m, int yi, double alpha_i, double minG);
  350. double *B, *C, *G;
  351. int w_size, l;
  352. int nr_class;
  353. int max_iter;
  354. double eps;
  355. const problem *prob;
  356. };
  357. Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter)
  358. {
  359. this->w_size = prob->n;
  360. this->l = prob->l;
  361. this->nr_class = nr_class;
  362. this->eps = eps;
  363. this->max_iter = max_iter;
  364. this->prob = prob;
  365. this->B = new double[nr_class];
  366. this->G = new double[nr_class];
  367. this->C = weighted_C;
  368. }
  369. Solver_MCSVM_CS::~Solver_MCSVM_CS()
  370. {
  371. delete[] B;
  372. delete[] G;
  373. }
  374. int compare_double(const void *a, const void *b)
  375. {
  376. if(*(double *)a > *(double *)b)
  377. return -1;
  378. if(*(double *)a < *(double *)b)
  379. return 1;
  380. return 0;
  381. }
  382. void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
  383. {
  384. int r;
  385. double *D;
  386. clone(D, B, active_i);
  387. if(yi < active_i)
  388. D[yi] += A_i*C_yi;
  389. qsort(D, active_i, sizeof(double), compare_double);
  390. double beta = D[0] - A_i*C_yi;
  391. for(r=1;r<active_i && beta<r*D[r];r++)
  392. beta += D[r];
  393. beta /= r;
  394. for(r=0;r<active_i;r++)
  395. {
  396. if(r == yi)
  397. alpha_new[r] = min(C_yi, (beta-B[r])/A_i);
  398. else
  399. alpha_new[r] = min((double)0, (beta - B[r])/A_i);
  400. }
  401. delete[] D;
  402. }
  403. bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
  404. {
  405. double bound = 0;
  406. if(m == yi)
  407. bound = C[GETI(i)];
  408. if(alpha_i == bound && G[m] < minG)
  409. return true;
  410. return false;
  411. }
  412. void Solver_MCSVM_CS::Solve(double *w)
  413. {
  414. int i, m, s;
  415. int iter = 0;
  416. double *alpha = new double[l*nr_class];
  417. double *alpha_new = new double[nr_class];
  418. int *index = new int[l];
  419. double *QD = new double[l];
  420. int *d_ind = new int[nr_class];
  421. double *d_val = new double[nr_class];
  422. int *alpha_index = new int[nr_class*l];
  423. int *y_index = new int[l];
  424. int active_size = l;
  425. int *active_size_i = new int[l];
  426. double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
  427. bool start_from_all = true;
  428. // initial
  429. for(i=0;i<l*nr_class;i++)
  430. alpha[i] = 0;
  431. for(i=0;i<w_size*nr_class;i++)
  432. w[i] = 0;
  433. for(i=0;i<l;i++)
  434. {
  435. for(m=0;m<nr_class;m++)
  436. alpha_index[i*nr_class+m] = m;
  437. feature_node *xi = prob->x[i];
  438. QD[i] = 0;
  439. while(xi->index != -1)
  440. {
  441. QD[i] += (xi->value)*(xi->value);
  442. xi++;
  443. }
  444. active_size_i[i] = nr_class;
  445. y_index[i] = prob->y[i];
  446. index[i] = i;
  447. }
  448. while(iter < max_iter)
  449. {
  450. double stopping = -INF;
  451. for(i=0;i<active_size;i++)
  452. {
  453. int j = i+rand()%(active_size-i);
  454. swap(index[i], index[j]);
  455. }
  456. for(s=0;s<active_size;s++)
  457. {
  458. i = index[s];
  459. double Ai = QD[i];
  460. double *alpha_i = &alpha[i*nr_class];
  461. int *alpha_index_i = &alpha_index[i*nr_class];
  462. if(Ai > 0)
  463. {
  464. for(m=0;m<active_size_i[i];m++)
  465. G[m] = 1;
  466. if(y_index[i] < active_size_i[i])
  467. G[y_index[i]] = 0;
  468. feature_node *xi = prob->x[i];
  469. while(xi->index!= -1)
  470. {
  471. double *w_i = &w[(xi->index-1)*nr_class];
  472. for(m=0;m<active_size_i[i];m++)
  473. G[m] += w_i[alpha_index_i[m]]*(xi->value);
  474. xi++;
  475. }
  476. double minG = INF;
  477. double maxG = -INF;
  478. for(m=0;m<active_size_i[i];m++)
  479. {
  480. if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
  481. minG = G[m];
  482. if(G[m] > maxG)
  483. maxG = G[m];
  484. }
  485. if(y_index[i] < active_size_i[i])
  486. if(alpha_i[prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
  487. minG = G[y_index[i]];
  488. for(m=0;m<active_size_i[i];m++)
  489. {
  490. if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
  491. {
  492. active_size_i[i]--;
  493. while(active_size_i[i]>m)
  494. {
  495. if(!be_shrunk(i, active_size_i[i], y_index[i],
  496. alpha_i[alpha_index_i[active_size_i[i]]], minG))
  497. {
  498. swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
  499. swap(G[m], G[active_size_i[i]]);
  500. if(y_index[i] == active_size_i[i])
  501. y_index[i] = m;
  502. else if(y_index[i] == m)
  503. y_index[i] = active_size_i[i];
  504. break;
  505. }
  506. active_size_i[i]--;
  507. }
  508. }
  509. }
  510. if(active_size_i[i] <= 1)
  511. {
  512. active_size--;
  513. swap(index[s], index[active_size]);
  514. s--;
  515. continue;
  516. }
  517. if(maxG-minG <= 1e-12)
  518. continue;
  519. else
  520. stopping = max(maxG - minG, stopping);
  521. for(m=0;m<active_size_i[i];m++)
  522. B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
  523. solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
  524. int nz_d = 0;
  525. for(m=0;m<active_size_i[i];m++)
  526. {
  527. double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
  528. alpha_i[alpha_index_i[m]] = alpha_new[m];
  529. if(fabs(d) >= 1e-12)
  530. {
  531. d_ind[nz_d] = alpha_index_i[m];
  532. d_val[nz_d] = d;
  533. nz_d++;
  534. }
  535. }
  536. xi = prob->x[i];
  537. while(xi->index != -1)
  538. {
  539. double *w_i = &w[(xi->index-1)*nr_class];
  540. for(m=0;m<nz_d;m++)
  541. w_i[d_ind[m]] += d_val[m]*xi->value;
  542. xi++;
  543. }
  544. }
  545. }
  546. iter++;
  547. if(iter % 10 == 0)
  548. {
  549. info(".");
  550. }
  551. if(stopping < eps_shrink)
  552. {
  553. if(stopping < eps && start_from_all == true)
  554. break;
  555. else
  556. {
  557. active_size = l;
  558. for(i=0;i<l;i++)
  559. active_size_i[i] = nr_class;
  560. info("*");
  561. eps_shrink = max(eps_shrink/2, eps);
  562. start_from_all = true;
  563. }
  564. }
  565. else
  566. start_from_all = false;
  567. }
  568. info("\noptimization finished, #iter = %d\n",iter);
  569. if (iter >= max_iter)
  570. info("Warning: reaching max number of iterations\n");
  571. // calculate objective value
  572. double v = 0;
  573. int nSV = 0;
  574. for(i=0;i<w_size*nr_class;i++)
  575. v += w[i]*w[i];
  576. v = 0.5*v;
  577. for(i=0;i<l*nr_class;i++)
  578. {
  579. v += alpha[i];
  580. if(fabs(alpha[i]) > 0)
  581. nSV++;
  582. }
  583. for(i=0;i<l;i++)
  584. v -= alpha[i*nr_class+prob->y[i]];
  585. info("Objective value = %lf\n",v);
  586. info("nSV = %d\n",nSV);
  587. delete [] alpha;
  588. delete [] alpha_new;
  589. delete [] index;
  590. delete [] QD;
  591. delete [] d_ind;
  592. delete [] d_val;
  593. delete [] alpha_index;
  594. delete [] y_index;
  595. delete [] active_size_i;
  596. }
  597. // A coordinate descent algorithm for
  598. // L1-loss and L2-loss SVM dual problems
  599. //
  600. // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
  601. // s.t. 0 <= alpha_i <= upper_bound_i,
  602. //
  603. // where Qij = yi yj xi^T xj and
  604. // D is a diagonal matrix
  605. //
  606. // In L1-SVM case:
  607. // upper_bound_i = Cp if y_i = 1
  608. // upper_bound_i = Cn if y_i = -1
  609. // D_ii = 0
  610. // In L2-SVM case:
  611. // upper_bound_i = INF
  612. // D_ii = 1/(2*Cp) if y_i = 1
  613. // D_ii = 1/(2*Cn) if y_i = -1
  614. //
  615. // Given:
  616. // x, y, Cp, Cn
  617. // eps is the stopping tolerance
  618. //
  619. // solution will be put in w
  620. #undef GETI
  621. #define GETI(i) (y[i]+1)
  622. // To support weights for instances, use GETI(i) (i)
  623. static void solve_l2r_l1l2_svc(
  624. const problem *prob, double *w, double eps,
  625. double Cp, double Cn, int solver_type)
  626. {
  627. int l = prob->l;
  628. int w_size = prob->n;
  629. int i, s, iter = 0;
  630. double C, d, G;
  631. double *QD = new double[l];
  632. int max_iter = 1000;
  633. int *index = new int[l];
  634. double *alpha = new double[l];
  635. schar *y = new schar[l];
  636. int active_size = l;
  637. // PG: projected gradient, for shrinking and stopping
  638. double PG;
  639. double PGmax_old = INF;
  640. double PGmin_old = -INF;
  641. double PGmax_new, PGmin_new;
  642. // default solver_type: L2R_L2LOSS_SVC_DUAL
  643. double diag[3] = {0.5/Cn, 0, 0.5/Cp};
  644. double upper_bound[3] = {INF, 0, INF};
  645. if(solver_type == L2R_L1LOSS_SVC_DUAL)
  646. {
  647. diag[0] = 0;
  648. diag[2] = 0;
  649. upper_bound[0] = Cn;
  650. upper_bound[2] = Cp;
  651. }
  652. for(i=0; i<w_size; i++)
  653. w[i] = 0;
  654. for(i=0; i<l; i++)
  655. {
  656. alpha[i] = 0;
  657. if(prob->y[i] > 0)
  658. {
  659. y[i] = +1;
  660. }
  661. else
  662. {
  663. y[i] = -1;
  664. }
  665. QD[i] = diag[GETI(i)];
  666. feature_node *xi = prob->x[i];
  667. while (xi->index != -1)
  668. {
  669. QD[i] += (xi->value)*(xi->value);
  670. xi++;
  671. }
  672. index[i] = i;
  673. }
  674. while (iter < max_iter)
  675. {
  676. PGmax_new = -INF;
  677. PGmin_new = INF;
  678. for (i=0; i<active_size; i++)
  679. {
  680. int j = i+rand()%(active_size-i);
  681. swap(index[i], index[j]);
  682. }
  683. for (s=0;s<active_size;s++)
  684. {
  685. i = index[s];
  686. G = 0;
  687. schar yi = y[i];
  688. feature_node *xi = prob->x[i];
  689. while(xi->index!= -1)
  690. {
  691. G += w[xi->index-1]*(xi->value);
  692. xi++;
  693. }
  694. G = G*yi-1;
  695. C = upper_bound[GETI(i)];
  696. G += alpha[i]*diag[GETI(i)];
  697. PG = 0;
  698. if (alpha[i] == 0)
  699. {
  700. if (G > PGmax_old)
  701. {
  702. active_size--;
  703. swap(index[s], index[active_size]);
  704. s--;
  705. continue;
  706. }
  707. else if (G < 0)
  708. PG = G;
  709. }
  710. else if (alpha[i] == C)
  711. {
  712. if (G < PGmin_old)
  713. {
  714. active_size--;
  715. swap(index[s], index[active_size]);
  716. s--;
  717. continue;
  718. }
  719. else if (G > 0)
  720. PG = G;
  721. }
  722. else
  723. PG = G;
  724. PGmax_new = max(PGmax_new, PG);
  725. PGmin_new = min(PGmin_new, PG);
  726. if(fabs(PG) > 1.0e-12)
  727. {
  728. double alpha_old = alpha[i];
  729. alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
  730. d = (alpha[i] - alpha_old)*yi;
  731. xi = prob->x[i];
  732. while (xi->index != -1)
  733. {
  734. w[xi->index-1] += d*xi->value;
  735. xi++;
  736. }
  737. }
  738. }
  739. iter++;
  740. if(iter % 10 == 0)
  741. info(".");
  742. if(PGmax_new - PGmin_new <= eps)
  743. {
  744. if(active_size == l)
  745. break;
  746. else
  747. {
  748. active_size = l;
  749. info("*");
  750. PGmax_old = INF;
  751. PGmin_old = -INF;
  752. continue;
  753. }
  754. }
  755. PGmax_old = PGmax_new;
  756. PGmin_old = PGmin_new;
  757. if (PGmax_old <= 0)
  758. PGmax_old = INF;
  759. if (PGmin_old >= 0)
  760. PGmin_old = -INF;
  761. }
  762. info("\noptimization finished, #iter = %d\n",iter);
  763. if (iter >= max_iter)
  764. info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
  765. // calculate objective value
  766. double v = 0;
  767. int nSV = 0;
  768. for(i=0; i<w_size; i++)
  769. v += w[i]*w[i];
  770. for(i=0; i<l; i++)
  771. {
  772. v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
  773. if(alpha[i] > 0)
  774. ++nSV;
  775. }
  776. info("Objective value = %lf\n",v/2);
  777. info("nSV = %d\n",nSV);
  778. delete [] QD;
  779. delete [] alpha;
  780. delete [] y;
  781. delete [] index;
  782. }
  783. // A coordinate descent algorithm for
  784. // L1-regularized L2-loss support vector classification
  785. //
  786. // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
  787. //
  788. // Given:
  789. // x, y, Cp, Cn
  790. // eps is the stopping tolerance
  791. //
  792. // solution will be put in w
  793. #undef GETI
  794. #define GETI(i) (y[i]+1)
  795. // To support weights for instances, use GETI(i) (i)
  796. static void solve_l1r_l2_svc(
  797. problem *prob_col, double *w, double eps,
  798. double Cp, double Cn)
  799. {
  800. int l = prob_col->l;
  801. int w_size = prob_col->n;
  802. int j, s, iter = 0;
  803. int max_iter = 1000;
  804. int active_size = w_size;
  805. int max_num_linesearch = 20;
  806. double sigma = 0.01;
  807. double d, G_loss, G, H;
  808. double Gmax_old = INF;
  809. double Gmax_new;
  810. double Gmax_init;
  811. double d_old, d_diff;
  812. double loss_old, loss_new;
  813. double appxcond, cond;
  814. int *index = new int[w_size];
  815. schar *y = new schar[l];
  816. double *b = new double[l]; // b = 1-ywTx
  817. double *xj_sq = new double[w_size];
  818. feature_node *x;
  819. double C[3] = {Cn,0,Cp};
  820. for(j=0; j<l; j++)
  821. {
  822. b[j] = 1;
  823. if(prob_col->y[j] > 0)
  824. y[j] = 1;
  825. else
  826. y[j] = -1;
  827. }
  828. for(j=0; j<w_size; j++)
  829. {
  830. w[j] = 0;
  831. index[j] = j;
  832. xj_sq[j] = 0;
  833. x = prob_col->x[j];
  834. while(x->index != -1)
  835. {
  836. int ind = x->index-1;
  837. double val = x->value;
  838. x->value *= y[ind]; // x->value stores yi*xij
  839. xj_sq[j] += C[GETI(ind)]*val*val;
  840. x++;
  841. }
  842. }
  843. while(iter < max_iter)
  844. {
  845. Gmax_new = 0;
  846. for(j=0; j<active_size; j++)
  847. {
  848. int i = j+rand()%(active_size-j);
  849. swap(index[i], index[j]);
  850. }
  851. for(s=0; s<active_size; s++)
  852. {
  853. j = index[s];
  854. G_loss = 0;
  855. H = 0;
  856. x = prob_col->x[j];
  857. while(x->index != -1)
  858. {
  859. int ind = x->index-1;
  860. if(b[ind] > 0)
  861. {
  862. double val = x->value;
  863. double tmp = C[GETI(ind)]*val;
  864. G_loss -= tmp*b[ind];
  865. H += tmp*val;
  866. }
  867. x++;
  868. }
  869. G_loss *= 2;
  870. G = G_loss;
  871. H *= 2;
  872. H = max(H, 1e-12);
  873. double Gp = G+1;
  874. double Gn = G-1;
  875. double violation = 0;
  876. if(w[j] == 0)
  877. {
  878. if(Gp < 0)
  879. violation = -Gp;
  880. else if(Gn > 0)
  881. violation = Gn;
  882. else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
  883. {
  884. active_size--;
  885. swap(index[s], index[active_size]);
  886. s--;
  887. continue;
  888. }
  889. }
  890. else if(w[j] > 0)
  891. violation = fabs(Gp);
  892. else
  893. violation = fabs(Gn);
  894. Gmax_new = max(Gmax_new, violation);
  895. // obtain Newton direction d
  896. if(Gp <= H*w[j])
  897. d = -Gp/H;
  898. else if(Gn >= H*w[j])
  899. d = -Gn/H;
  900. else
  901. d = -w[j];
  902. if(fabs(d) < 1.0e-12)
  903. continue;
  904. double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
  905. d_old = 0;
  906. int num_linesearch;
  907. for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
  908. {
  909. d_diff = d_old - d;
  910. cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
  911. appxcond = xj_sq[j]*d*d + G_loss*d + cond;
  912. if(appxcond <= 0)
  913. {
  914. x = prob_col->x[j];
  915. while(x->index != -1)
  916. {
  917. b[x->index-1] += d_diff*x->value;
  918. x++;
  919. }
  920. break;
  921. }
  922. if(num_linesearch == 0)
  923. {
  924. loss_old = 0;
  925. loss_new = 0;
  926. x = prob_col->x[j];
  927. while(x->index != -1)
  928. {
  929. int ind = x->index-1;
  930. if(b[ind] > 0)
  931. loss_old += C[GETI(ind)]*b[ind]*b[ind];
  932. double b_new = b[ind] + d_diff*x->value;
  933. b[ind] = b_new;
  934. if(b_new > 0)
  935. loss_new += C[GETI(ind)]*b_new*b_new;
  936. x++;
  937. }
  938. }
  939. else
  940. {
  941. loss_new = 0;
  942. x = prob_col->x[j];
  943. while(x->index != -1)
  944. {
  945. int ind = x->index-1;
  946. double b_new = b[ind] + d_diff*x->value;
  947. b[ind] = b_new;
  948. if(b_new > 0)
  949. loss_new += C[GETI(ind)]*b_new*b_new;
  950. x++;
  951. }
  952. }
  953. cond = cond + loss_new - loss_old;
  954. if(cond <= 0)
  955. break;
  956. else
  957. {
  958. d_old = d;
  959. d *= 0.5;
  960. delta *= 0.5;
  961. }
  962. }
  963. w[j] += d;
  964. // recompute b[] if line search takes too many steps
  965. if(num_linesearch >= max_num_linesearch)
  966. {
  967. info("#");
  968. for(int i=0; i<l; i++)
  969. b[i] = 1;
  970. for(int i=0; i<w_size; i++)
  971. {
  972. if(w[i]==0) continue;
  973. x = prob_col->x[i];
  974. while(x->index != -1)
  975. {
  976. b[x->index-1] -= w[i]*x->value;
  977. x++;
  978. }
  979. }
  980. }
  981. }
  982. if(iter == 0)
  983. Gmax_init = Gmax_new;
  984. iter++;
  985. if(iter % 10 == 0)
  986. info(".");
  987. if(Gmax_new <= eps*Gmax_init)
  988. {
  989. if(active_size == w_size)
  990. break;
  991. else
  992. {
  993. active_size = w_size;
  994. info("*");
  995. Gmax_old = INF;
  996. continue;
  997. }
  998. }
  999. Gmax_old = Gmax_new;
  1000. }
  1001. info("\noptimization finished, #iter = %d\n", iter);
  1002. if(iter >= max_iter)
  1003. info("\nWARNING: reaching max number of iterations\n");
  1004. // calculate objective value
  1005. double v = 0;
  1006. int nnz = 0;
  1007. for(j=0; j<w_size; j++)
  1008. {
  1009. x = prob_col->x[j];
  1010. while(x->index != -1)
  1011. {
  1012. x->value *= prob_col->y[x->index-1]; // restore x->value
  1013. x++;
  1014. }
  1015. if(w[j] != 0)
  1016. {
  1017. v += fabs(w[j]);
  1018. nnz++;
  1019. }
  1020. }
  1021. for(j=0; j<l; j++)
  1022. if(b[j] > 0)
  1023. v += C[GETI(j)]*b[j]*b[j];
  1024. info("Objective value = %lf\n", v);
  1025. info("#nonzeros/#features = %d/%d\n", nnz, w_size);
  1026. delete [] index;
  1027. delete [] y;
  1028. delete [] b;
  1029. delete [] xj_sq;
  1030. }
  1031. // A coordinate descent algorithm for
  1032. // L1-regularized logistic regression problems
  1033. //
  1034. // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
  1035. //
  1036. // Given:
  1037. // x, y, Cp, Cn
  1038. // eps is the stopping tolerance
  1039. //
  1040. // solution will be put in w
  1041. #undef GETI
  1042. #define GETI(i) (y[i]+1)
  1043. // To support weights for instances, use GETI(i) (i)
  1044. static void solve_l1r_lr(
  1045. const problem *prob_col, double *w, double eps,
  1046. double Cp, double Cn)
  1047. {
  1048. int l = prob_col->l;
  1049. int w_size = prob_col->n;
  1050. int j, s, iter = 0;
  1051. int max_iter = 1000;
  1052. int active_size = w_size;
  1053. int max_num_linesearch = 20;
  1054. double x_min = 0;
  1055. double sigma = 0.01;
  1056. double d, G, H;
  1057. double Gmax_old = INF;
  1058. double Gmax_new;
  1059. double Gmax_init;
  1060. double sum1, appxcond1;
  1061. double sum2, appxcond2;
  1062. double cond;
  1063. int *index = new int[w_size];
  1064. schar *y = new schar[l];
  1065. double *exp_wTx = new double[l];
  1066. double *exp_wTx_new = new double[l];
  1067. double *xj_max = new double[w_size];
  1068. double *C_sum = new double[w_size];
  1069. double *xjneg_sum = new double[w_size];
  1070. double *xjpos_sum = new double[w_size];
  1071. feature_node *x;
  1072. double C[3] = {Cn,0,Cp};
  1073. for(j=0; j<l; j++)
  1074. {
  1075. exp_wTx[j] = 1;
  1076. if(prob_col->y[j] > 0)
  1077. y[j] = 1;
  1078. else
  1079. y[j] = -1;
  1080. }
  1081. for(j=0; j<w_size; j++)
  1082. {
  1083. w[j] = 0;
  1084. index[j] = j;
  1085. xj_max[j] = 0;
  1086. C_sum[j] = 0;
  1087. xjneg_sum[j] = 0;
  1088. xjpos_sum[j] = 0;
  1089. x = prob_col->x[j];
  1090. while(x->index != -1)
  1091. {
  1092. int ind = x->index-1;
  1093. double val = x->value;
  1094. x_min = min(x_min, val);
  1095. xj_max[j] = max(xj_max[j], val);
  1096. C_sum[j] += C[GETI(ind)];
  1097. if(y[ind] == -1)
  1098. xjneg_sum[j] += C[GETI(ind)]*val;
  1099. else
  1100. xjpos_sum[j] += C[GETI(ind)]*val;
  1101. x++;
  1102. }
  1103. }
  1104. while(iter < max_iter)
  1105. {
  1106. Gmax_new = 0;
  1107. for(j=0; j<active_size; j++)
  1108. {
  1109. int i = j+rand()%(active_size-j);
  1110. swap(index[i], index[j]);
  1111. }
  1112. for(s=0; s<active_size; s++)
  1113. {
  1114. j = index[s];
  1115. sum1 = 0;
  1116. sum2 = 0;
  1117. H = 0;
  1118. x = prob_col->x[j];
  1119. while(x->index != -1)
  1120. {
  1121. int ind = x->index-1;
  1122. double exp_wTxind = exp_wTx[ind];
  1123. double tmp1 = x->value/(1+exp_wTxind);
  1124. double tmp2 = C[GETI(ind)]*tmp1;
  1125. double tmp3 = tmp2*exp_wTxind;
  1126. sum2 += tmp2;
  1127. sum1 += tmp3;
  1128. H += tmp1*tmp3;
  1129. x++;
  1130. }
  1131. G = -sum2 + xjneg_sum[j];
  1132. double Gp = G+1;
  1133. double Gn = G-1;
  1134. double violation = 0;
  1135. if(w[j] == 0)
  1136. {
  1137. if(Gp < 0)
  1138. violation = -Gp;
  1139. else if(Gn > 0)
  1140. violation = Gn;
  1141. else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
  1142. {
  1143. active_size--;
  1144. swap(index[s], index[active_size]);
  1145. s--;
  1146. continue;
  1147. }
  1148. }
  1149. else if(w[j] > 0)
  1150. violation = fabs(Gp);
  1151. else
  1152. violation = fabs(Gn);
  1153. Gmax_new = max(Gmax_new, violation);
  1154. // obtain Newton direction d
  1155. if(Gp <= H*w[j])
  1156. d = -Gp/H;
  1157. else if(Gn >= H*w[j])
  1158. d = -Gn/H;
  1159. else
  1160. d = -w[j];
  1161. if(fabs(d) < 1.0e-12)
  1162. continue;
  1163. d = min(max(d,-10.0),10.0);
  1164. double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
  1165. int num_linesearch;
  1166. for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
  1167. {
  1168. cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
  1169. if(x_min >= 0)
  1170. {
  1171. double tmp = exp(d*xj_max[j]);
  1172. appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
  1173. appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
  1174. if(min(appxcond1,appxcond2) <= 0)
  1175. {
  1176. x = prob_col->x[j];
  1177. while(x->index != -1)
  1178. {
  1179. exp_wTx[x->index-1] *= exp(d*x->value);
  1180. x++;
  1181. }
  1182. break;
  1183. }
  1184. }
  1185. cond += d*xjneg_sum[j];
  1186. int i = 0;
  1187. x = prob_col->x[j];
  1188. while(x->index != -1)
  1189. {
  1190. int ind = x->index-1;
  1191. double exp_dx = exp(d*x->value);
  1192. exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
  1193. cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
  1194. x++; i++;
  1195. }
  1196. if(cond <= 0)
  1197. {
  1198. int i = 0;
  1199. x = prob_col->x[j];
  1200. while(x->index != -1)
  1201. {
  1202. int ind = x->index-1;
  1203. exp_wTx[ind] = exp_wTx_new[i];
  1204. x++; i++;
  1205. }
  1206. break;
  1207. }
  1208. else
  1209. {
  1210. d *= 0.5;
  1211. delta *= 0.5;
  1212. }
  1213. }
  1214. w[j] += d;
  1215. // recompute exp_wTx[] if line search takes too many steps
  1216. if(num_linesearch >= max_num_linesearch)
  1217. {
  1218. info("#");
  1219. for(int i=0; i<l; i++)
  1220. exp_wTx[i] = 0;
  1221. for(int i=0; i<w_size; i++)
  1222. {
  1223. if(w[i]==0) continue;
  1224. x = prob_col->x[i];
  1225. while(x->index != -1)
  1226. {
  1227. exp_wTx[x->index-1] += w[i]*x->value;
  1228. x++;
  1229. }
  1230. }
  1231. for(int i=0; i<l; i++)
  1232. exp_wTx[i] = exp(exp_wTx[i]);
  1233. }
  1234. }
  1235. if(iter == 0)
  1236. Gmax_init = Gmax_new;
  1237. iter++;
  1238. if(iter % 10 == 0)
  1239. info(".");
  1240. if(Gmax_new <= eps*Gmax_init)
  1241. {
  1242. if(active_size == w_size)
  1243. break;
  1244. else
  1245. {
  1246. active_size = w_size;
  1247. info("*");
  1248. Gmax_old = INF;
  1249. continue;
  1250. }
  1251. }
  1252. Gmax_old = Gmax_new;
  1253. }
  1254. info("\noptimization finished, #iter = %d\n", iter);
  1255. if(iter >= max_iter)
  1256. info("\nWARNING: reaching max number of iterations\n");
  1257. // calculate objective value
  1258. double v = 0;
  1259. int nnz = 0;
  1260. for(j=0; j<w_size; j++)
  1261. if(w[j] != 0)
  1262. {
  1263. v += fabs(w[j]);
  1264. nnz++;
  1265. }
  1266. for(j=0; j<l; j++)
  1267. if(y[j] == 1)
  1268. v += C[GETI(j)]*log(1+1/exp_wTx[j]);
  1269. else
  1270. v += C[GETI(j)]*log(1+exp_wTx[j]);
  1271. info("Objective value = %lf\n", v);
  1272. info("#nonzeros/#features = %d/%d\n", nnz, w_size);
  1273. delete [] index;
  1274. delete [] y;
  1275. delete [] exp_wTx;
  1276. delete [] exp_wTx_new;
  1277. delete [] xj_max;
  1278. delete [] C_sum;
  1279. delete [] xjneg_sum;
  1280. delete [] xjpos_sum;
  1281. }
  1282. // transpose matrix X from row format to column format
  1283. static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
  1284. {
  1285. int i;
  1286. int l = prob->l;
  1287. int n = prob->n;
  1288. int nnz = 0;
  1289. int *col_ptr = new int[n+1];
  1290. feature_node *x_space;
  1291. prob_col->l = l;
  1292. prob_col->n = n;
  1293. prob_col->y = new int[l];
  1294. prob_col->x = new feature_node*[n];
  1295. for(i=0; i<l; i++)
  1296. prob_col->y[i] = prob->y[i];
  1297. for(i=0; i<n+1; i++)
  1298. col_ptr[i] = 0;
  1299. for(i=0; i<l; i++)
  1300. {
  1301. feature_node *x = prob->x[i];
  1302. while(x->index != -1)
  1303. {
  1304. nnz++;
  1305. col_ptr[x->index]++;
  1306. x++;
  1307. }
  1308. }
  1309. for(i=1; i<n+1; i++)
  1310. col_ptr[i] += col_ptr[i-1] + 1;
  1311. x_space = new feature_node[nnz+n];
  1312. for(i=0; i<n; i++)
  1313. prob_col->x[i] = &x_space[col_ptr[i]];
  1314. for(i=0; i<l; i++)
  1315. {
  1316. feature_node *x = prob->x[i];
  1317. while(x->index != -1)
  1318. {
  1319. int ind = x->index-1;
  1320. x_space[col_ptr[ind]].index = i+1; // starts from 1
  1321. x_space[col_ptr[ind]].value = x->value;
  1322. col_ptr[ind]++;
  1323. x++;
  1324. }
  1325. }
  1326. for(i=0; i<n; i++)
  1327. x_space[col_ptr[i]].index = -1;
  1328. *x_space_ret = x_space;
  1329. delete [] col_ptr;
  1330. }
  1331. // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
  1332. // perm, length l, must be allocated before calling this subroutine
  1333. static void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
  1334. {
  1335. int l = prob->l;
  1336. int max_nr_class = 16;
  1337. int nr_class = 0;
  1338. int *label = Malloc(int,max_nr_class);
  1339. int *count = Malloc(int,max_nr_class);
  1340. int *data_label = Malloc(int,l);
  1341. int i;
  1342. for(i=0;i<l;i++)
  1343. {
  1344. int this_label = prob->y[i];
  1345. int j;
  1346. for(j=0;j<nr_class;j++)
  1347. {
  1348. if(this_label == label[j])
  1349. {
  1350. ++count[j];
  1351. break;
  1352. }
  1353. }
  1354. data_label[i] = j;
  1355. if(j == nr_class)
  1356. {
  1357. if(nr_class == max_nr_class)
  1358. {
  1359. max_nr_class *= 2;
  1360. label = (int *)realloc(label,max_nr_class*sizeof(int));
  1361. count = (int *)realloc(count,max_nr_class*sizeof(int));
  1362. }
  1363. label[nr_class] = this_label;
  1364. count[nr_class] = 1;
  1365. ++nr_class;
  1366. }
  1367. }
  1368. int *start = Malloc(int,nr_class);
  1369. start[0] = 0;
  1370. for(i=1;i<nr_class;i++)
  1371. start[i] = start[i-1]+count[i-1];
  1372. for(i=0;i<l;i++)
  1373. {
  1374. perm[start[data_label[i]]] = i;
  1375. ++start[data_label[i]];
  1376. }
  1377. start[0] = 0;
  1378. for(i=1;i<nr_class;i++)
  1379. start[i] = start[i-1]+count[i-1];
  1380. *nr_class_ret = nr_class;
  1381. *label_ret = label;
  1382. *start_ret = start;
  1383. *count_ret = count;
  1384. free(data_label);
  1385. }
  1386. static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
  1387. {
  1388. double eps=param->eps;
  1389. int pos = 0;
  1390. int neg = 0;
  1391. for(int i=0;i<prob->l;i++)
  1392. if(prob->y[i]==+1)
  1393. pos++;
  1394. neg = prob->l - pos;
  1395. function *fun_obj=NULL;
  1396. switch(param->solver_type)
  1397. {
  1398. case L2R_LR:
  1399. {
  1400. fun_obj=new l2r_lr_fun(prob, Cp, Cn);
  1401. TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
  1402. tron_obj.set_print_string(liblinear_print_string);
  1403. tron_obj.tron(w);
  1404. delete fun_obj;
  1405. break;
  1406. }
  1407. case L2R_L2LOSS_SVC:
  1408. {
  1409. fun_obj=new l2r_l2_svc_fun(prob, Cp, Cn);
  1410. TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
  1411. tron_obj.set_print_string(liblinear_print_string);
  1412. tron_obj.tron(w);
  1413. delete fun_obj;
  1414. break;
  1415. }
  1416. case L2R_L2LOSS_SVC_DUAL:
  1417. solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
  1418. break;
  1419. case L2R_L1LOSS_SVC_DUAL:
  1420. solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
  1421. break;
  1422. case L1R_L2LOSS_SVC:
  1423. {
  1424. problem prob_col;
  1425. feature_node *x_space = NULL;
  1426. transpose(prob, &x_space ,&prob_col);
  1427. solve_l1r_l2_svc(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
  1428. delete [] prob_col.y;
  1429. delete [] prob_col.x;
  1430. delete [] x_space;
  1431. break;
  1432. }
  1433. case L1R_LR:
  1434. {
  1435. problem prob_col;
  1436. feature_node *x_space = NULL;
  1437. transpose(prob, &x_space ,&prob_col);
  1438. solve_l1r_lr(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
  1439. delete [] prob_col.y;
  1440. delete [] prob_col.x;
  1441. delete [] x_space;
  1442. break;
  1443. }
  1444. default:
  1445. fprintf(stderr, "Error: unknown solver_type\n");
  1446. break;
  1447. }
  1448. }
  1449. //
  1450. // Interface functions
  1451. //
  1452. model* train(const problem *prob, const parameter *param)
  1453. {
  1454. int i,j;
  1455. int l = prob->l;
  1456. int n = prob->n;
  1457. int w_size = prob->n;
  1458. model *model_ = Malloc(model,1);
  1459. if(prob->bias>=0)
  1460. model_->nr_feature=n-1;
  1461. else
  1462. model_->nr_feature=n;
  1463. model_->param = *param;
  1464. model_->bias = prob->bias;
  1465. int nr_class;
  1466. int *label = NULL;
  1467. int *start = NULL;
  1468. int *count = NULL;
  1469. int *perm = Malloc(int,l);
  1470. // group training data of the same class
  1471. group_classes(prob,&nr_class,&label,&start,&count,perm);
  1472. model_->nr_class=nr_class;
  1473. model_->label = Malloc(int,nr_class);
  1474. for(i=0;i<nr_class;i++)
  1475. model_->label[i] = label[i];
  1476. // calculate weighted C
  1477. double *weighted_C = Malloc(double, nr_class);
  1478. for(i=0;i<nr_class;i++)
  1479. weighted_C[i] = param->C;
  1480. for(i=0;i<param->nr_weight;i++)
  1481. {
  1482. for(j=0;j<nr_class;j++)
  1483. if(param->weight_label[i] == label[j])
  1484. break;
  1485. if(j == nr_class)
  1486. fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
  1487. else
  1488. weighted_C[j] *= param->weight[i];
  1489. }
  1490. // constructing the subproblem
  1491. feature_node **x = Malloc(feature_node *,l);
  1492. for(i=0;i<l;i++)
  1493. x[i] = prob->x[perm[i]];
  1494. int k;
  1495. problem sub_prob;
  1496. sub_prob.l = l;
  1497. sub_prob.n = n;
  1498. sub_prob.x = Malloc(feature_node *,sub_prob.l);
  1499. sub_prob.y = Malloc(int,sub_prob.l);
  1500. for(k=0; k<sub_prob.l; k++)
  1501. sub_prob.x[k] = x[k];
  1502. // multi-class svm by Crammer and Singer
  1503. if(param->solver_type == MCSVM_CS)
  1504. {
  1505. model_->w=Malloc(double, n*nr_class);
  1506. for(i=0;i<nr_class;i++)
  1507. for(j=start[i];j<start[i]+count[i];j++)
  1508. sub_prob.y[j] = i;
  1509. Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
  1510. Solver.Solve(model_->w);
  1511. }
  1512. else
  1513. {
  1514. if(nr_class == 2)
  1515. {
  1516. model_->w=Malloc(double, w_size);
  1517. int e0 = start[0]+count[0];
  1518. k=0;
  1519. for(; k<e0; k++)
  1520. sub_prob.y[k] = +1;
  1521. for(; k<sub_prob.l; k++)
  1522. sub_prob.y[k] = -1;
  1523. train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
  1524. }
  1525. else
  1526. {
  1527. model_->w=Malloc(double, w_size*nr_class);
  1528. double *w=Malloc(double, w_size);
  1529. for(i=0;i<nr_class;i++)
  1530. {
  1531. int si = start[i];
  1532. int ei = si+count[i];
  1533. k=0;
  1534. for(; k<si; k++)
  1535. sub_prob.y[k] = -1;
  1536. for(; k<ei; k++)
  1537. sub_prob.y[k] = +1;
  1538. for(; k<sub_prob.l; k++)
  1539. sub_prob.y[k] = -1;
  1540. train_one(&sub_prob, param, w, weighted_C[i], param->C);
  1541. for(int j=0;j<w_size;j++)
  1542. model_->w[j*nr_class+i] = w[j];
  1543. }
  1544. free(w);
  1545. }
  1546. }
  1547. free(x);
  1548. free(label);
  1549. free(start);
  1550. free(count);
  1551. free(perm);
  1552. free(sub_prob.x);
  1553. free(sub_prob.y);
  1554. free(weighted_C);
  1555. return model_;
  1556. }
  1557. void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target)
  1558. {
  1559. int i;
  1560. int *fold_start = Malloc(int,nr_fold+1);
  1561. int l = prob->l;
  1562. int *perm = Malloc(int,l);
  1563. for(i=0;i<l;i++) perm[i]=i;
  1564. for(i=0;i<l;i++)
  1565. {
  1566. int j = i+rand()%(l-i);
  1567. swap(perm[i],perm[j]);
  1568. }
  1569. for(i=0;i<=nr_fold;i++)
  1570. fold_start[i]=i*l/nr_fold;
  1571. for(i=0;i<nr_fold;i++)
  1572. {
  1573. int begin = fold_start[i];
  1574. int end = fold_start[i+1];
  1575. int j,k;
  1576. struct problem subprob;
  1577. subprob.bias = prob->bias;
  1578. subprob.n = prob->n;
  1579. subprob.l = l-(end-begin);
  1580. subprob.x = Malloc(struct feature_node*,subprob.l);
  1581. subprob.y = Malloc(int,subprob.l);
  1582. k=0;
  1583. for(j=0;j<begin;j++)
  1584. {
  1585. subprob.x[k] = prob->x[perm[j]];
  1586. subprob.y[k] = prob->y[perm[j]];
  1587. ++k;
  1588. }
  1589. for(j=end;j<l;j++)
  1590. {
  1591. subprob.x[k] = prob->x[perm[j]];
  1592. subprob.y[k] = prob->y[perm[j]];
  1593. ++k;
  1594. }
  1595. struct model *submodel = train(&subprob,param);
  1596. for(j=begin;j<end;j++)
  1597. target[perm[j]] = predict(submodel,prob->x[perm[j]]);
  1598. free_and_destroy_model(&submodel);
  1599. free(subprob.x);
  1600. free(subprob.y);
  1601. }
  1602. free(fold_start);
  1603. free(perm);
  1604. }
  1605. int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
  1606. {
  1607. int idx;
  1608. int n;
  1609. if(model_->bias>=0)
  1610. n=model_->nr_feature+1;
  1611. else
  1612. n=model_->nr_feature;
  1613. double *w=model_->w;
  1614. int nr_class=model_->nr_class;
  1615. int i;
  1616. int nr_w;
  1617. if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
  1618. nr_w = 1;
  1619. else
  1620. nr_w = nr_class;
  1621. const feature_node *lx=x;
  1622. for(i=0;i<nr_w;i++)
  1623. dec_values[i] = 0;
  1624. for(; (idx=lx->index)!=-1; lx++)
  1625. {
  1626. // the dimension of testing data may exceed that of training
  1627. if(idx<=n)
  1628. for(i=0;i<nr_w;i++)
  1629. dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
  1630. }
  1631. if(nr_class==2)
  1632. return (dec_values[0]>0)?model_->label[0]:model_->label[1];
  1633. else
  1634. {
  1635. int dec_max_idx = 0;
  1636. for(i=1;i<nr_class;i++)
  1637. {
  1638. if(dec_values[i] > dec_values[dec_max_idx])
  1639. dec_max_idx = i;
  1640. }
  1641. return model_->label[dec_max_idx];
  1642. }
  1643. }
  1644. int predict(const model *model_, const feature_node *x)
  1645. {
  1646. double *dec_values = Malloc(double, model_->nr_class);
  1647. int label=predict_values(model_, x, dec_values);
  1648. free(dec_values);
  1649. return label;
  1650. }
  1651. int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
  1652. {
  1653. if(check_probability_model(model_))
  1654. {
  1655. int i;
  1656. int nr_class=model_->nr_class;
  1657. int nr_w;
  1658. if(nr_class==2)
  1659. nr_w = 1;
  1660. else
  1661. nr_w = nr_class;
  1662. int label=predict_values(model_, x, prob_estimates);
  1663. for(i=0;i<nr_w;i++)
  1664. prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
  1665. if(nr_class==2) // for binary classification
  1666. prob_estimates[1]=1.-prob_estimates[0];
  1667. else
  1668. {
  1669. double sum=0;
  1670. for(i=0; i<nr_class; i++)
  1671. sum+=prob_estimates[i];
  1672. for(i=0; i<nr_class; i++)
  1673. prob_estimates[i]=prob_estimates[i]/sum;
  1674. }
  1675. return label;
  1676. }
  1677. else
  1678. return 0;
  1679. }
  1680. static const char *solver_type_table[]=
  1681. {
  1682. "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC","L2R_L1LOSS_SVC_DUAL","MCSVM_CS", "L1R_L2LOSS_SVC","L1R_LR", NULL
  1683. };
  1684. int save_model(const char *model_file_name, const struct model *model_)
  1685. {
  1686. int i;
  1687. int nr_feature=model_->nr_feature;
  1688. int n;
  1689. const parameter& param = model_->param;
  1690. if(model_->bias>=0)
  1691. n=nr_feature+1;
  1692. else
  1693. n=nr_feature;
  1694. int w_size = n;
  1695. FILE *fp = fopen(model_file_name,"w");
  1696. if(fp==NULL) return -1;
  1697. int nr_w;
  1698. if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
  1699. nr_w=1;
  1700. else
  1701. nr_w=model_->nr_class;
  1702. fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
  1703. fprintf(fp, "nr_class %d\n", model_->nr_class);
  1704. fprintf(fp, "label");
  1705. for(i=0; i<model_->nr_class; i++)
  1706. fprintf(fp, " %d", model_->label[i]);
  1707. fprintf(fp, "\n");
  1708. fprintf(fp, "nr_feature %d\n", nr_feature);
  1709. fprintf(fp, "bias %.16g\n", model_->bias);
  1710. fprintf(fp, "w\n");
  1711. for(i=0; i<w_size; i++)
  1712. {
  1713. int j;
  1714. for(j=0; j<nr_w; j++)
  1715. fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
  1716. fprintf(fp, "\n");
  1717. }
  1718. if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
  1719. else return 0;
  1720. }
  1721. struct model *load_model(const char *model_file_name)
  1722. {
  1723. FILE *fp = fopen(model_file_name,"r");
  1724. if(fp==NULL) return NULL;
  1725. int i;
  1726. int nr_feature;
  1727. int n;
  1728. int nr_class;
  1729. double bias;
  1730. model *model_ = Malloc(model,1);
  1731. parameter& param = model_->param;
  1732. model_->label = NULL;
  1733. char cmd[81];
  1734. while(1)
  1735. {
  1736. fscanf(fp,"%80s",cmd);
  1737. if(strcmp(cmd,"solver_type")==0)
  1738. {
  1739. fscanf(fp,"%80s",cmd);
  1740. int i;
  1741. for(i=0;solver_type_table[i];i++)
  1742. {
  1743. if(strcmp(solver_type_table[i],cmd)==0)
  1744. {
  1745. param.solver_type=i;
  1746. break;
  1747. }
  1748. }
  1749. if(solver_type_table[i] == NULL)
  1750. {
  1751. fprintf(stderr,"unknown solver type.\n");
  1752. free(model_->label);
  1753. free(model_);
  1754. return NULL;
  1755. }
  1756. }
  1757. else if(strcmp(cmd,"nr_class")==0)
  1758. {
  1759. fscanf(fp,"%d",&nr_class);
  1760. model_->nr_class=nr_class;
  1761. }
  1762. else if(strcmp(cmd,"nr_feature")==0)
  1763. {
  1764. fscanf(fp,"%d",&nr_feature);
  1765. model_->nr_feature=nr_feature;
  1766. }
  1767. else if(strcmp(cmd,"bias")==0)
  1768. {
  1769. fscanf(fp,"%lf",&bias);
  1770. model_->bias=bias;
  1771. }
  1772. else if(strcmp(cmd,"w")==0)
  1773. {
  1774. break;
  1775. }
  1776. else if(strcmp(cmd,"label")==0)
  1777. {
  1778. int nr_class = model_->nr_class;
  1779. model_->label = Malloc(int,nr_class);
  1780. for(int i=0;i<nr_class;i++)
  1781. fscanf(fp,"%d",&model_->label[i]);
  1782. }
  1783. else
  1784. {
  1785. fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
  1786. free(model_);
  1787. return NULL;
  1788. }
  1789. }
  1790. nr_feature=model_->nr_feature;
  1791. if(model_->bias>=0)
  1792. n=nr_feature+1;
  1793. else
  1794. n=nr_feature;
  1795. int w_size = n;
  1796. int nr_w;
  1797. if(nr_class==2 && param.solver_type != MCSVM_CS)
  1798. nr_w = 1;
  1799. else
  1800. nr_w = nr_class;
  1801. model_->w=Malloc(double, w_size*nr_w);
  1802. for(i=0; i<w_size; i++)
  1803. {
  1804. int j;
  1805. for(j=0; j<nr_w; j++)
  1806. fscanf(fp, "%lf ", &model_->w[i*nr_w+j]);
  1807. fscanf(fp, "\n");
  1808. }
  1809. if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
  1810. return model_;
  1811. }
  1812. int get_nr_feature(const model *model_)
  1813. {
  1814. return model_->nr_feature;
  1815. }
  1816. int get_nr_class(const model *model_)
  1817. {
  1818. return model_->nr_class;
  1819. }
  1820. void get_labels(const model *model_, int* label)
  1821. {
  1822. if (model_->label != NULL)
  1823. for(int i=0;i<model_->nr_class;i++)
  1824. label[i] = model_->label[i];
  1825. }
  1826. void free_model_content(struct model *model_ptr)
  1827. {
  1828. if(model_ptr->w != NULL)
  1829. free(model_ptr->w);
  1830. if(model_ptr->label != NULL)
  1831. free(model_ptr->label);
  1832. }
  1833. void free_and_destroy_model(struct model **model_ptr_ptr)
  1834. {
  1835. struct model *model_ptr = *model_ptr_ptr;
  1836. free_model_content(model_ptr);
  1837. if(model_ptr != NULL)
  1838. free(model_ptr);
  1839. }
  1840. void destroy_param(parameter* param)
  1841. {
  1842. if(param->weight_label != NULL)
  1843. free(param->weight_label);
  1844. if(param->weight != NULL)
  1845. free(param->weight);
  1846. }
  1847. const char *check_parameter(const problem *prob, const parameter *param)
  1848. {
  1849. if(param->eps <= 0)
  1850. return "eps <= 0";
  1851. if(param->C <= 0)
  1852. return "C <= 0";
  1853. if(param->solver_type != L2R_LR
  1854. && param->solver_type != L2R_L2LOSS_SVC_DUAL
  1855. && param->solver_type != L2R_L2LOSS_SVC
  1856. && param->solver_type != L2R_L1LOSS_SVC_DUAL
  1857. && param->solver_type != MCSVM_CS
  1858. && param->solver_type != L1R_L2LOSS_SVC
  1859. && param->solver_type != L1R_LR)
  1860. return "unknown solver type";
  1861. return NULL;
  1862. }
  1863. int check_probability_model(const struct model *model_)
  1864. {
  1865. return (model_->param.solver_type==L2R_LR || model_->param.solver_type==L1R_LR);
  1866. }
  1867. void set_print_string_function(void (*print_func)(const char*))
  1868. {
  1869. if (print_func == NULL)
  1870. liblinear_print_string = &print_string_stdout;
  1871. else
  1872. liblinear_print_string = print_func;
  1873. }