PageRenderTime 52ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/code_comm/Rsvm.c

http://research-code-base-animesh.googlecode.com/
C | 515 lines | 385 code | 77 blank | 53 comment | 61 complexity | b506d31c6cb3be81a373c9cc7ce80b52 MD5 | raw file
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <string.h>
  4. #include "svm.h"
  5. #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
  6. /*
  7. * svm_model
  8. */
  9. struct svm_model
  10. {
  11. struct svm_parameter param; /* parameter */
  12. int nr_class; /* number of classes, = 2 in
  13. regression/one class svm */
  14. int l; /* total #SV */
  15. struct svm_node **SV; /* SVs (SV[l]) */
  16. double **sv_coef; /* coefficients for SVs in decision functions
  17. (sv_coef[n-1][l]) */
  18. double *rho; /* constants in decision functions
  19. (rho[n*(n-1)/2]) */
  20. double *probA; /* pairwise probability information */
  21. double *probB;
  22. /* for classification only */
  23. int *label; /* label of each class (label[n]) */
  24. int *nSV; /* number of SVs for each class (nSV[n]) */
  25. /* nSV[0] + nSV[1] + ... + nSV[n-1] = l */
  26. /* XXX */
  27. int free_sv; /* 1 if svm_model is created by
  28. svm_load_model */
  29. /* 0 if svm_model is created by
  30. svm_train */
  31. };
  32. /*
  33. * results from cross-validation
  34. */
  35. struct crossresults
  36. {
  37. double* results;
  38. double total1;
  39. double total2;
  40. };
  41. struct svm_node ** sparsify (double *x, int r, int c)
  42. {
  43. struct svm_node** sparse;
  44. int i, ii, count;
  45. sparse = (struct svm_node **) malloc (r * sizeof(struct svm_node *));
  46. for (i = 0; i < r; i++) {
  47. /* determine nr. of non-zero elements */
  48. for (count = ii = 0; ii < c; ii++)
  49. if (x[i * c + ii] != 0) count++;
  50. /* allocate memory for column elements */
  51. sparse[i] = (struct svm_node *) malloc ((count + 1) * sizeof(struct svm_node));
  52. /* set column elements */
  53. for (count = ii = 0; ii < c; ii++)
  54. if (x[i * c + ii] != 0) {
  55. sparse[i][count].index = ii + 1;
  56. sparse[i][count].value = x[i * c + ii];
  57. count++;
  58. }
  59. /* set termination element */
  60. sparse[i][count].index = -1;
  61. }
  62. return sparse;
  63. }
  64. struct svm_node ** transsparse (double *x, int r, int *rowindex, int *colindex)
  65. {
  66. struct svm_node** sparse;
  67. int i, ii, count = 0, nnz = 0;
  68. sparse = (struct svm_node **) malloc (r * sizeof(struct svm_node*));
  69. for (i = 0; i < r; i++) {
  70. /* allocate memory for column elements */
  71. nnz = rowindex[i+1] - rowindex[i];
  72. sparse[i] = (struct svm_node *) malloc ((nnz + 1) * sizeof(struct svm_node));
  73. /* set column elements */
  74. for (ii = 0; ii < nnz; ii++) {
  75. sparse[i][ii].index = colindex[count];
  76. sparse[i][ii].value = x[count];
  77. count++;
  78. }
  79. /* set termination element */
  80. sparse[i][ii].index = -1;
  81. }
  82. return sparse;
  83. }
  84. /* Cross-Validation-routine from svm-train */
  85. void do_cross_validation(struct svm_problem *prob,
  86. struct svm_parameter *param,
  87. int nr_fold,
  88. double* cresults,
  89. double* ctotal1,
  90. double* ctotal2)
  91. {
  92. int i;
  93. int total_correct = 0;
  94. double total_error = 0;
  95. double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
  96. /* random shuffle */
  97. for(i=0; i<prob->l; i++)
  98. {
  99. int j = rand()%(prob->l-i);
  100. struct svm_node *tx;
  101. double ty;
  102. tx = prob->x[i];
  103. prob->x[i] = prob->x[j];
  104. prob->x[j] = tx;
  105. ty = prob->y[i];
  106. prob->y[i] = prob->y[j];
  107. prob->y[j] = ty;
  108. }
  109. for(i=0; i<nr_fold; i++)
  110. {
  111. int begin = i*prob->l/nr_fold;
  112. int end = (i+1)*prob->l/nr_fold;
  113. int j,k;
  114. struct svm_problem subprob;
  115. subprob.l = prob->l-(end-begin);
  116. subprob.x = Malloc(struct svm_node*,subprob.l);
  117. subprob.y = Malloc(double,subprob.l);
  118. k=0;
  119. for(j = 0; j < begin; j++)
  120. {
  121. subprob.x[k] = prob->x[j];
  122. subprob.y[k] = prob->y[j];
  123. ++k;
  124. }
  125. for(j = end; j<prob->l; j++)
  126. {
  127. subprob.x[k] = prob->x[j];
  128. subprob.y[k] = prob->y[j];
  129. ++k;
  130. }
  131. if(param->svm_type == EPSILON_SVR ||
  132. param->svm_type == NU_SVR)
  133. {
  134. struct svm_model *submodel = svm_train(&subprob,param);
  135. double error = 0;
  136. for(j=begin;j<end;j++)
  137. {
  138. double v = svm_predict(submodel,prob->x[j]);
  139. double y = prob->y[j];
  140. error += (v-y)*(v-y);
  141. sumv += v;
  142. sumy += y;
  143. sumvv += v*v;
  144. sumyy += y*y;
  145. sumvy += v*y;
  146. }
  147. svm_destroy_model(submodel);
  148. /* printf("Mean squared error = %g\n",
  149. error/(end-begin)); */
  150. cresults[i] = error/(end-begin);
  151. total_error += error;
  152. }
  153. else
  154. {
  155. struct svm_model *submodel = svm_train(&subprob,param);
  156. int correct = 0;
  157. for(j=begin;j<end;j++)
  158. {
  159. double v = svm_predict(submodel,prob->x[j]);
  160. if(v == prob->y[j])
  161. ++correct;
  162. }
  163. svm_destroy_model(submodel);
  164. /* printf("Accuracy = %g%% (%d/%d)\n", */
  165. /* 100.0*correct/(end-begin),correct,(end-begin)); */
  166. cresults[i] = 100.0*correct/(end-begin);
  167. total_correct += correct;
  168. }
  169. free(subprob.x);
  170. free(subprob.y);
  171. }
  172. if(param->svm_type == EPSILON_SVR || param->svm_type == NU_SVR)
  173. {
  174. /* printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
  175. printf("Cross Validation Squared correlation coefficient = %g\n",
  176. ((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
  177. ((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
  178. ); */
  179. *ctotal1 = total_error/prob->l;
  180. *ctotal2 = ((prob->l * sumvy - sumv * sumy) *
  181. (prob->l * sumvy - sumv*sumy)) /
  182. ((prob->l * sumvv - sumv * sumv) *
  183. (prob->l * sumyy - sumy * sumy));
  184. }
  185. else
  186. /* printf("Cross Validation Accuracy =
  187. %g%%\n",100.0*total_correct/prob.l); */
  188. *ctotal1 = 100.0 * total_correct / prob->l;
  189. }
  190. void svmtrain (double *x, int *r, int *c,
  191. double *y,
  192. int *rowindex, int *colindex,
  193. int *svm_type,
  194. int *kernel_type,
  195. int *degree,
  196. double *gamma,
  197. double *coef0,
  198. double *cost,
  199. double *nu,
  200. int *weightlabels,
  201. double *weights,
  202. int *nweights,
  203. double *cache,
  204. double *tolerance,
  205. double *epsilon,
  206. int *shrinking,
  207. int *cross,
  208. int *sparse,
  209. int *probability,
  210. int *nclasses,
  211. int *nr,
  212. int *index,
  213. int *labels,
  214. int *nSV,
  215. double *rho,
  216. double *coefs,
  217. double *sigma,
  218. double *probA,
  219. double *probB,
  220. double *cresults,
  221. double *ctotal1,
  222. double *ctotal2,
  223. char **error)
  224. {
  225. struct svm_parameter par;
  226. struct svm_problem prob;
  227. struct svm_model *model = NULL;
  228. int i, ii;
  229. const char* s;
  230. /* set parameters */
  231. par.svm_type = *svm_type;
  232. par.kernel_type = *kernel_type;
  233. par.degree = *degree;
  234. par.gamma = *gamma;
  235. par.coef0 = *coef0;
  236. par.cache_size = *cache;
  237. par.eps = *tolerance;
  238. par.C = *cost;
  239. par.nu = *nu;
  240. par.nr_weight = *nweights;
  241. if (par.nr_weight > 0) {
  242. par.weight = (double *) malloc (sizeof(double) * par.nr_weight);
  243. memcpy(par.weight, weights, par.nr_weight * sizeof(double));
  244. par.weight_label = (int *) malloc (sizeof(int) * par.nr_weight);
  245. memcpy(par.weight_label, weightlabels, par.nr_weight * sizeof(int));
  246. }
  247. par.p = *epsilon;
  248. par.shrinking = *shrinking;
  249. par.probability = *probability;
  250. /* set problem */
  251. prob.l = *r;
  252. prob.y = y;
  253. if (*sparse > 0)
  254. prob.x = transsparse(x, *r, rowindex, colindex);
  255. else
  256. prob.x = sparsify(x, *r, *c);
  257. /* check parameters & copy error message */
  258. s = svm_check_parameter(&prob, &par);
  259. if (s) {
  260. strcpy(*error, s);
  261. } else {
  262. /* call svm_train */
  263. model = svm_train(&prob, &par);
  264. /* set up return values */
  265. for (ii = 0; ii < model->l; ii++)
  266. for (i = 0; i < *r; i++)
  267. if (prob.x[i] == model->SV[ii]) index[ii] = i+1;
  268. *nr = model->l;
  269. *nclasses = model->nr_class;
  270. memcpy (rho, model->rho, *nclasses * (*nclasses - 1)/2 * sizeof(double));
  271. if (*probability && par.svm_type != ONE_CLASS) {
  272. if (par.svm_type == EPSILON_SVR || par.svm_type == NU_SVR)
  273. *sigma = svm_get_svr_probability(model);
  274. else {
  275. memcpy(probA, model->probA,
  276. *nclasses * (*nclasses - 1)/2 * sizeof(double));
  277. memcpy(probB, model->probB,
  278. *nclasses * (*nclasses - 1)/2 * sizeof(double));
  279. }
  280. }
  281. for (i = 0; i < *nclasses-1; i++)
  282. memcpy (coefs + i * *nr, model->sv_coef[i], *nr * sizeof (double));
  283. if (*svm_type < 2) {
  284. memcpy (labels, model->label, *nclasses * sizeof(int));
  285. memcpy (nSV, model->nSV, *nclasses * sizeof(int));
  286. }
  287. /* Perform cross-validation, if requested */
  288. if (*cross > 0)
  289. do_cross_validation (&prob, &par, *cross, cresults,
  290. ctotal1, ctotal2);
  291. /* clean up memory */
  292. svm_destroy_model(model);
  293. }
  294. /* clean up memory */
  295. if (par.nr_weight > 0) {
  296. free(par.weight);
  297. free(par.weight_label);
  298. }
  299. for (i = 0; i < *r; i++) free (prob.x[i]);
  300. free (prob.x);
  301. }
  302. void svmpredict (int *decisionvalues,
  303. int *probability,
  304. double *v, int *r, int *c,
  305. int *rowindex,
  306. int *colindex,
  307. double *coefs,
  308. double *rho,
  309. int *compprob,
  310. double *probA,
  311. double *probB,
  312. int *nclasses,
  313. int *totnSV,
  314. int *labels,
  315. int *nSV,
  316. int *sparsemodel,
  317. int *svm_type,
  318. int *kernel_type,
  319. int *degree,
  320. double *gamma,
  321. double *coef0,
  322. double *x, int *xr,
  323. int *xrowindex,
  324. int *xcolindex,
  325. int *sparsex,
  326. double *ret,
  327. double *dec,
  328. double *prob)
  329. {
  330. struct svm_model m;
  331. struct svm_node ** train;
  332. int i;
  333. /* set up model */
  334. m.l = *totnSV;
  335. m.nr_class = *nclasses;
  336. m.sv_coef = (double **) malloc (m.nr_class * sizeof(double));
  337. for (i = 0; i < m.nr_class - 1; i++) {
  338. m.sv_coef[i] = (double *) malloc (m.l * sizeof (double));
  339. memcpy (m.sv_coef[i], coefs + i*m.l, m.l * sizeof (double));
  340. }
  341. if (*sparsemodel > 0)
  342. m.SV = transsparse(v, *r, rowindex, colindex);
  343. else
  344. m.SV = sparsify(v, *r, *c);
  345. m.rho = rho;
  346. m.probA = probA;
  347. m.probB = probB;
  348. m.label = labels;
  349. m.nSV = nSV;
  350. /* set up parameter */
  351. m.param.svm_type = *svm_type;
  352. m.param.kernel_type = *kernel_type;
  353. m.param.degree = *degree;
  354. m.param.gamma = *gamma;
  355. m.param.coef0 = *coef0;
  356. m.param.probability = *compprob;
  357. m.free_sv = 1;
  358. /* create sparse training matrix */
  359. if (*sparsex > 0)
  360. train = transsparse(x, *xr, xrowindex, xcolindex);
  361. else
  362. train = sparsify(x, *xr, *c);
  363. /* call svm-predict-function for each x-row, possibly using probability
  364. estimator, if requested */
  365. if (*probability && svm_check_probability_model(&m)) {
  366. for (i = 0; i < *xr; i++)
  367. ret[i] = svm_predict_probability(&m, train[i], prob + i * *nclasses);
  368. } else {
  369. for (i = 0; i < *xr; i++)
  370. ret[i] = svm_predict(&m, train[i]);
  371. }
  372. /* optionally, compute decision values */
  373. if (*decisionvalues)
  374. for (i = 0; i < *xr; i++)
  375. svm_predict_values(&m, train[i], dec + i * *nclasses * (*nclasses - 1) / 2);
  376. /* clean up memory */
  377. for (i = 0; i < *xr; i++)
  378. free (train[i]);
  379. free (train);
  380. for (i = 0; i < *r; i++)
  381. free (m.SV[i]);
  382. free (m.SV);
  383. for (i = 0; i < m.nr_class - 1; i++)
  384. free(m.sv_coef[i]);
  385. free(m.sv_coef);
  386. }
  387. void svmwrite (double *v, int *r, int *c,
  388. int *rowindex,
  389. int *colindex,
  390. double *coefs,
  391. double *rho,
  392. double *probA,
  393. double *probB,
  394. int *nclasses,
  395. int *totnSV,
  396. int *labels,
  397. int *nSV,
  398. int *sparsemodel,
  399. int *svm_type,
  400. int *kernel_type,
  401. int *degree,
  402. double *gamma,
  403. double *coef0,
  404. char **filename)
  405. {
  406. struct svm_model m;
  407. int i;
  408. char *fname = *filename;
  409. /* set up model */
  410. m.l = *totnSV;
  411. m.nr_class = *nclasses;
  412. m.sv_coef = (double **) malloc (m.nr_class * sizeof(double));
  413. for (i = 0; i < m.nr_class - 1; i++) {
  414. m.sv_coef[i] = (double *) malloc (m.l * sizeof (double));
  415. memcpy (m.sv_coef[i], coefs + i*m.l, m.l * sizeof (double));
  416. }
  417. if (*sparsemodel > 0)
  418. m.SV = transsparse(v, *r, rowindex, colindex);
  419. else
  420. m.SV = sparsify(v, *r, *c);
  421. m.rho = rho;
  422. m.label = labels;
  423. m.nSV = nSV;
  424. m.probA = probA;
  425. m.probB = probB;
  426. /* set up parameter */
  427. m.param.svm_type = *svm_type;
  428. m.param.kernel_type = *kernel_type;
  429. m.param.degree = *degree;
  430. m.param.gamma = *gamma;
  431. m.param.coef0 = *coef0;
  432. m.free_sv = 1;
  433. /* write svm model */
  434. svm_save_model(fname, &m);
  435. for (i = 0; i < m.nr_class - 1; i++)
  436. free(m.sv_coef[i]);
  437. free(m.sv_coef);
  438. }