PageRenderTime 84ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/svm_learn_main.c

https://bitbucket.org/madhuraraju/pysvmlight
C | 397 lines | 344 code | 25 blank | 28 comment | 52 complexity | 91aa953aec8b93b9f9b3aab41ac0a814 MD5 | raw file
  1. /***********************************************************************/
  2. /* */
  3. /* svm_learn_main.c */
  4. /* */
  5. /* Command line interface to the learning module of the */
  6. /* Support Vector Machine. */
  7. /* */
  8. /* Author: Thorsten Joachims */
  9. /* Date: 02.07.02 */
  10. /* */
  11. /* Copyright (c) 2000 Thorsten Joachims - All rights reserved */
  12. /* */
  13. /* This software is available for non-commercial use only. It must */
  14. /* not be modified and distributed without prior permission of the */
  15. /* author. The author is not responsible for implications from the */
  16. /* use of this software. */
  17. /* */
  18. /***********************************************************************/
  19. /* if svm-learn is used out of C++, define it as extern "C" */
  20. #ifdef __cplusplus
  21. extern "C" {
  22. #endif
  23. # include "svm_common.h"
  24. # include "svm_learn.h"
  25. #ifdef __cplusplus
  26. }
  27. #endif
  28. char docfile[200]; /* file with training examples */
  29. char modelfile[200]; /* file for resulting classifier */
  30. char restartfile[200]; /* file with initial alphas */
  31. void read_input_parameters(int, char **, char *, char *, char *, long *,
  32. LEARN_PARM *, KERNEL_PARM *);
  33. void wait_any_key();
  34. void print_help();
  35. int main (int argc, char* argv[])
  36. {
  37. DOC **docs; /* training examples */
  38. long totwords,totdoc,i;
  39. double *target;
  40. double *alpha_in=NULL;
  41. KERNEL_CACHE *kernel_cache;
  42. LEARN_PARM learn_parm;
  43. KERNEL_PARM kernel_parm;
  44. MODEL *model=(MODEL *)my_malloc(sizeof(MODEL));
  45. read_input_parameters(argc,argv,docfile,modelfile,restartfile,&verbosity,
  46. &learn_parm,&kernel_parm);
  47. read_documents(docfile,&docs,&target,&totwords,&totdoc);
  48. if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc);
  49. if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */
  50. kernel_cache=NULL;
  51. }
  52. else {
  53. /* Always get a new kernel cache. It is not possible to use the
  54. same cache for two different training runs */
  55. kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size);
  56. }
  57. if(learn_parm.type == CLASSIFICATION) {
  58. svm_learn_classification(docs,target,totdoc,totwords,&learn_parm,
  59. &kernel_parm,kernel_cache,model,alpha_in);
  60. }
  61. else if(learn_parm.type == REGRESSION) {
  62. svm_learn_regression(docs,target,totdoc,totwords,&learn_parm,
  63. &kernel_parm,&kernel_cache,model);
  64. }
  65. else if(learn_parm.type == RANKING) {
  66. svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm,
  67. &kernel_parm,&kernel_cache,model);
  68. }
  69. else if(learn_parm.type == OPTIMIZATION) {
  70. svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm,
  71. &kernel_parm,kernel_cache,model,alpha_in);
  72. }
  73. if(kernel_cache) {
  74. /* Free the memory used for the cache. */
  75. kernel_cache_cleanup(kernel_cache);
  76. }
  77. /* Warning: The model contains references to the original data 'docs'.
  78. If you want to free the original data, and only keep the model, you
  79. have to make a deep copy of 'model'. */
  80. /* deep_copy_of_model=copy_model(model); */
  81. write_model(modelfile,model);
  82. free(alpha_in);
  83. free_model(model,0);
  84. for(i=0;i<totdoc;i++)
  85. free_example(docs[i],1);
  86. free(docs);
  87. free(target);
  88. return(0);
  89. }
  90. /*---------------------------------------------------------------------------*/
  91. void read_input_parameters(int argc,char *argv[],char *docfile,char *modelfile,
  92. char *restartfile,long *verbosity,
  93. LEARN_PARM *learn_parm,KERNEL_PARM *kernel_parm)
  94. {
  95. long i;
  96. char type[100];
  97. /* set default */
  98. strcpy (modelfile, "svm_model");
  99. strcpy (learn_parm->predfile, "trans_predictions");
  100. strcpy (learn_parm->alphafile, "");
  101. strcpy (restartfile, "");
  102. (*verbosity)=1;
  103. learn_parm->biased_hyperplane=1;
  104. learn_parm->sharedslack=0;
  105. learn_parm->remove_inconsistent=0;
  106. learn_parm->skip_final_opt_check=0;
  107. learn_parm->svm_maxqpsize=10;
  108. learn_parm->svm_newvarsinqp=0;
  109. learn_parm->svm_iter_to_shrink=-9999;
  110. learn_parm->maxiter=100000;
  111. learn_parm->kernel_cache_size=40;
  112. learn_parm->svm_c=0.0;
  113. learn_parm->eps=0.1;
  114. learn_parm->transduction_posratio=-1.0;
  115. learn_parm->svm_costratio=1.0;
  116. learn_parm->svm_costratio_unlab=1.0;
  117. learn_parm->svm_unlabbound=1E-5;
  118. learn_parm->epsilon_crit=0.001;
  119. learn_parm->epsilon_a=1E-15;
  120. learn_parm->compute_loo=0;
  121. learn_parm->rho=1.0;
  122. learn_parm->xa_depth=0;
  123. kernel_parm->kernel_type=0;
  124. kernel_parm->poly_degree=3;
  125. kernel_parm->rbf_gamma=1.0;
  126. kernel_parm->coef_lin=1;
  127. kernel_parm->coef_const=1;
  128. strcpy(kernel_parm->custom,"empty");
  129. strcpy(type,"c");
  130. for(i=1;(i<argc) && ((argv[i])[0] == '-');i++) {
  131. switch ((argv[i])[1])
  132. {
  133. case '?': print_help(); exit(0);
  134. case 'z': i++; strcpy(type,argv[i]); break;
  135. case 'v': i++; (*verbosity)=atol(argv[i]); break;
  136. case 'b': i++; learn_parm->biased_hyperplane=atol(argv[i]); break;
  137. case 'i': i++; learn_parm->remove_inconsistent=atol(argv[i]); break;
  138. case 'f': i++; learn_parm->skip_final_opt_check=!atol(argv[i]); break;
  139. case 'q': i++; learn_parm->svm_maxqpsize=atol(argv[i]); break;
  140. case 'n': i++; learn_parm->svm_newvarsinqp=atol(argv[i]); break;
  141. case '#': i++; learn_parm->maxiter=atol(argv[i]); break;
  142. case 'h': i++; learn_parm->svm_iter_to_shrink=atol(argv[i]); break;
  143. case 'm': i++; learn_parm->kernel_cache_size=atol(argv[i]); break;
  144. case 'c': i++; learn_parm->svm_c=atof(argv[i]); break;
  145. case 'w': i++; learn_parm->eps=atof(argv[i]); break;
  146. case 'p': i++; learn_parm->transduction_posratio=atof(argv[i]); break;
  147. case 'j': i++; learn_parm->svm_costratio=atof(argv[i]); break;
  148. case 'e': i++; learn_parm->epsilon_crit=atof(argv[i]); break;
  149. case 'o': i++; learn_parm->rho=atof(argv[i]); break;
  150. case 'k': i++; learn_parm->xa_depth=atol(argv[i]); break;
  151. case 'x': i++; learn_parm->compute_loo=atol(argv[i]); break;
  152. case 't': i++; kernel_parm->kernel_type=atol(argv[i]); break;
  153. case 'd': i++; kernel_parm->poly_degree=atol(argv[i]); break;
  154. case 'g': i++; kernel_parm->rbf_gamma=atof(argv[i]); break;
  155. case 's': i++; kernel_parm->coef_lin=atof(argv[i]); break;
  156. case 'r': i++; kernel_parm->coef_const=atof(argv[i]); break;
  157. case 'u': i++; strcpy(kernel_parm->custom,argv[i]); break;
  158. case 'l': i++; strcpy(learn_parm->predfile,argv[i]); break;
  159. case 'a': i++; strcpy(learn_parm->alphafile,argv[i]); break;
  160. case 'y': i++; strcpy(restartfile,argv[i]); break;
  161. default: printf("\nUnrecognized option %s!\n\n",argv[i]);
  162. print_help();
  163. exit(0);
  164. }
  165. }
  166. if(i>=argc) {
  167. printf("\nNot enough input parameters!\n\n");
  168. wait_any_key();
  169. print_help();
  170. exit(0);
  171. }
  172. strcpy (docfile, argv[i]);
  173. if((i+1)<argc) {
  174. strcpy (modelfile, argv[i+1]);
  175. }
  176. if(learn_parm->svm_iter_to_shrink == -9999) {
  177. if(kernel_parm->kernel_type == LINEAR)
  178. learn_parm->svm_iter_to_shrink=2;
  179. else
  180. learn_parm->svm_iter_to_shrink=100;
  181. }
  182. if(strcmp(type,"c")==0) {
  183. learn_parm->type=CLASSIFICATION;
  184. }
  185. else if(strcmp(type,"r")==0) {
  186. learn_parm->type=REGRESSION;
  187. }
  188. else if(strcmp(type,"p")==0) {
  189. learn_parm->type=RANKING;
  190. }
  191. else if(strcmp(type,"o")==0) {
  192. learn_parm->type=OPTIMIZATION;
  193. }
  194. else if(strcmp(type,"s")==0) {
  195. learn_parm->type=OPTIMIZATION;
  196. learn_parm->sharedslack=1;
  197. }
  198. else {
  199. printf("\nUnknown type '%s': Valid types are 'c' (classification), 'r' regession, and 'p' preference ranking.\n",type);
  200. wait_any_key();
  201. print_help();
  202. exit(0);
  203. }
  204. if((learn_parm->skip_final_opt_check)
  205. && (kernel_parm->kernel_type == LINEAR)) {
  206. printf("\nIt does not make sense to skip the final optimality check for linear kernels.\n\n");
  207. learn_parm->skip_final_opt_check=0;
  208. }
  209. if((learn_parm->skip_final_opt_check)
  210. && (learn_parm->remove_inconsistent)) {
  211. printf("\nIt is necessary to do the final optimality check when removing inconsistent \nexamples.\n");
  212. wait_any_key();
  213. print_help();
  214. exit(0);
  215. }
  216. if((learn_parm->svm_maxqpsize<2)) {
  217. printf("\nMaximum size of QP-subproblems not in valid range: %ld [2..]\n",learn_parm->svm_maxqpsize);
  218. wait_any_key();
  219. print_help();
  220. exit(0);
  221. }
  222. if((learn_parm->svm_maxqpsize<learn_parm->svm_newvarsinqp)) {
  223. printf("\nMaximum size of QP-subproblems [%ld] must be larger than the number of\n",learn_parm->svm_maxqpsize);
  224. printf("new variables [%ld] entering the working set in each iteration.\n",learn_parm->svm_newvarsinqp);
  225. wait_any_key();
  226. print_help();
  227. exit(0);
  228. }
  229. if(learn_parm->svm_iter_to_shrink<1) {
  230. printf("\nMaximum number of iterations for shrinking not in valid range: %ld [1,..]\n",learn_parm->svm_iter_to_shrink);
  231. wait_any_key();
  232. print_help();
  233. exit(0);
  234. }
  235. if(learn_parm->svm_c<0) {
  236. printf("\nThe C parameter must be greater than zero!\n\n");
  237. wait_any_key();
  238. print_help();
  239. exit(0);
  240. }
  241. if(learn_parm->transduction_posratio>1) {
  242. printf("\nThe fraction of unlabeled examples to classify as positives must\n");
  243. printf("be less than 1.0 !!!\n\n");
  244. wait_any_key();
  245. print_help();
  246. exit(0);
  247. }
  248. if(learn_parm->svm_costratio<=0) {
  249. printf("\nThe COSTRATIO parameter must be greater than zero!\n\n");
  250. wait_any_key();
  251. print_help();
  252. exit(0);
  253. }
  254. if(learn_parm->epsilon_crit<=0) {
  255. printf("\nThe epsilon parameter must be greater than zero!\n\n");
  256. wait_any_key();
  257. print_help();
  258. exit(0);
  259. }
  260. if(learn_parm->rho<0) {
  261. printf("\nThe parameter rho for xi/alpha-estimates and leave-one-out pruning must\n");
  262. printf("be greater than zero (typically 1.0 or 2.0, see T. Joachims, Estimating the\n");
  263. printf("Generalization Performance of an SVM Efficiently, ICML, 2000.)!\n\n");
  264. wait_any_key();
  265. print_help();
  266. exit(0);
  267. }
  268. if((learn_parm->xa_depth<0) || (learn_parm->xa_depth>100)) {
  269. printf("\nThe parameter depth for ext. xi/alpha-estimates must be in [0..100] (zero\n");
  270. printf("for switching to the conventional xa/estimates described in T. Joachims,\n");
  271. printf("Estimating the Generalization Performance of an SVM Efficiently, ICML, 2000.)\n");
  272. wait_any_key();
  273. print_help();
  274. exit(0);
  275. }
  276. }
  277. void wait_any_key()
  278. {
  279. printf("\n(more)\n");
  280. (void)getc(stdin);
  281. }
  282. void print_help()
  283. {
  284. printf("\nSVM-light %s: Support Vector Machine, learning module %s\n",VERSION,VERSION_DATE);
  285. copyright_notice();
  286. printf(" usage: svm_learn [options] example_file model_file\n\n");
  287. printf("Arguments:\n");
  288. printf(" example_file-> file with training data\n");
  289. printf(" model_file -> file to store learned decision rule in\n");
  290. printf("General options:\n");
  291. printf(" -? -> this help\n");
  292. printf(" -v [0..3] -> verbosity level (default 1)\n");
  293. printf("Learning options:\n");
  294. printf(" -z {c,r,p} -> select between classification (c), regression (r),\n");
  295. printf(" and preference ranking (p) (default classification)\n");
  296. printf(" -c float -> C: trade-off between training error\n");
  297. printf(" and margin (default [avg. x*x]^-1)\n");
  298. printf(" -w [0..] -> epsilon width of tube for regression\n");
  299. printf(" (default 0.1)\n");
  300. printf(" -j float -> Cost: cost-factor, by which training errors on\n");
  301. printf(" positive examples outweight errors on negative\n");
  302. printf(" examples (default 1) (see [4])\n");
  303. printf(" -b [0,1] -> use biased hyperplane (i.e. x*w+b>0) instead\n");
  304. printf(" of unbiased hyperplane (i.e. x*w>0) (default 1)\n");
  305. printf(" -i [0,1] -> remove inconsistent training examples\n");
  306. printf(" and retrain (default 0)\n");
  307. printf("Performance estimation options:\n");
  308. printf(" -x [0,1] -> compute leave-one-out estimates (default 0)\n");
  309. printf(" (see [5])\n");
  310. printf(" -o ]0..2] -> value of rho for XiAlpha-estimator and for pruning\n");
  311. printf(" leave-one-out computation (default 1.0) (see [2])\n");
  312. printf(" -k [0..100] -> search depth for extended XiAlpha-estimator \n");
  313. printf(" (default 0)\n");
  314. printf("Transduction options (see [3]):\n");
  315. printf(" -p [0..1] -> fraction of unlabeled examples to be classified\n");
  316. printf(" into the positive class (default is the ratio of\n");
  317. printf(" positive and negative examples in the training data)\n");
  318. printf("Kernel options:\n");
  319. printf(" -t int -> type of kernel function:\n");
  320. printf(" 0: linear (default)\n");
  321. printf(" 1: polynomial (s a*b+c)^d\n");
  322. printf(" 2: radial basis function exp(-gamma ||a-b||^2)\n");
  323. printf(" 3: sigmoid tanh(s a*b + c)\n");
  324. printf(" 4: user defined kernel from kernel.h\n");
  325. printf(" -d int -> parameter d in polynomial kernel\n");
  326. printf(" -g float -> parameter gamma in rbf kernel\n");
  327. printf(" -s float -> parameter s in sigmoid/poly kernel\n");
  328. printf(" -r float -> parameter c in sigmoid/poly kernel\n");
  329. printf(" -u string -> parameter of user defined kernel\n");
  330. printf("Optimization options (see [1]):\n");
  331. printf(" -q [2..] -> maximum size of QP-subproblems (default 10)\n");
  332. printf(" -n [2..q] -> number of new variables entering the working set\n");
  333. printf(" in each iteration (default n = q). Set n<q to prevent\n");
  334. printf(" zig-zagging.\n");
  335. printf(" -m [5..] -> size of cache for kernel evaluations in MB (default 40)\n");
  336. printf(" The larger the faster...\n");
  337. printf(" -e float -> eps: Allow that error for termination criterion\n");
  338. printf(" [y [w*x+b] - 1] >= eps (default 0.001)\n");
  339. printf(" -y [0,1] -> restart the optimization from alpha values in file\n");
  340. printf(" specified by -a option. (default 0)\n");
  341. printf(" -h [5..] -> number of iterations a variable needs to be\n");
  342. printf(" optimal before considered for shrinking (default 100)\n");
  343. printf(" -f [0,1] -> do final optimality check for variables removed\n");
  344. printf(" by shrinking. Although this test is usually \n");
  345. printf(" positive, there is no guarantee that the optimum\n");
  346. printf(" was found if the test is omitted. (default 1)\n");
  347. printf(" -y string -> if option is given, reads alphas from file with given\n");
  348. printf(" and uses them as starting point. (default 'disabled')\n");
  349. printf(" -# int -> terminate optimization, if no progress after this\n");
  350. printf(" number of iterations. (default 100000)\n");
  351. printf("Output options:\n");
  352. printf(" -l string -> file to write predicted labels of unlabeled\n");
  353. printf(" examples into after transductive learning\n");
  354. printf(" -a string -> write all alphas to this file after learning\n");
  355. printf(" (in the same order as in the training set)\n");
  356. wait_any_key();
  357. printf("\nMore details in:\n");
  358. printf("[1] T. Joachims, Making Large-Scale SVM Learning Practical. Advances in\n");
  359. printf(" Kernel Methods - Support Vector Learning, B. Schölkopf and C. Burges and\n");
  360. printf(" A. Smola (ed.), MIT Press, 1999.\n");
  361. printf("[2] T. Joachims, Estimating the Generalization performance of an SVM\n");
  362. printf(" Efficiently. International Conference on Machine Learning (ICML), 2000.\n");
  363. printf("[3] T. Joachims, Transductive Inference for Text Classification using Support\n");
  364. printf(" Vector Machines. International Conference on Machine Learning (ICML),\n");
  365. printf(" 1999.\n");
  366. printf("[4] K. Morik, P. Brockhausen, and T. Joachims, Combining statistical learning\n");
  367. printf(" with a knowledge-based approach - A case study in intensive care \n");
  368. printf(" monitoring. International Conference on Machine Learning (ICML), 1999.\n");
  369. printf("[5] T. Joachims, Learning to Classify Text Using Support Vector\n");
  370. printf(" Machines: Methods, Theory, and Algorithms. Dissertation, Kluwer,\n");
  371. printf(" 2002.\n\n");
  372. }