PageRenderTime 64ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/JNI_SVM-light-6.01/src/svmlight-6.01/svm_jni.c

https://bitbucket.org/nickheiner/weather-prediction-machine-learning
C | 1169 lines | 850 code | 195 blank | 124 comment | 229 complexity | 2e4fb6365fbd71995f525a30453f652d MD5 | raw file
  1. // vim:fdm=marker:foldmarker={%{,}%}:
  2. # include <jni.h>
  3. # include "svm_jni.h"
  4. JavaParamIDs* GetJParamIDs(JNIEnv * env, jobjectArray* tdata) {
  5. JavaParamIDs *ids = my_malloc(sizeof(struct javaparamids));
  6. // Finde und Bestimme Klassentyp von SVMLightModel
  7. ids->SVMLightModelCls = (*env)->FindClass(env,"jnisvmlight/SVMLightModel");
  8. if (ids->SVMLightModelCls == 0) {
  9. perror("Class 'SVMLightModel' can't be found!: perror()");
  10. exit(1);
  11. }
  12. // Bestimme IDs der Membervariablen aus der Klasse 'SVMLightModel'
  13. ids->ID_string_format = (*env)->GetFieldID(env, ids->SVMLightModelCls, "m_format", "Ljava/lang/String;");
  14. ids->ID_long_kType = (*env)->GetFieldID(env, ids->SVMLightModelCls, "m_kType", "J");
  15. ids->ID_long_dParam = (*env)->GetFieldID(env, ids->SVMLightModelCls, "m_dParam", "J");
  16. ids->ID_double_gParam = (*env)->GetFieldID(env, ids->SVMLightModelCls, "m_gParam", "D");
  17. ids->ID_double_sParam = (*env)->GetFieldID(env, ids->SVMLightModelCls, "m_sParam", "D");
  18. ids->ID_double_rParam = (*env)->GetFieldID(env, ids->SVMLightModelCls, "m_rParam", "D");
  19. ids->ID_string_uParam = (*env)->GetFieldID(env, ids->SVMLightModelCls, "m_uParam", "Ljava/lang/String;");
  20. ids->ID_long_highFeatIdx = (*env)->GetFieldID(env, ids->SVMLightModelCls, "m_highFeatIdx", "J");
  21. ids->ID_long_trainDocs = (*env)->GetFieldID(env, ids->SVMLightModelCls, "m_trainDocs", "J");
  22. ids->ID_long_numSupVecs = (*env)->GetFieldID(env, ids->SVMLightModelCls, "m_numSupVecs", "J");
  23. ids->ID_double_threshold = (*env)->GetFieldID(env, ids->SVMLightModelCls, "m_threshold", "D");
  24. ids->ID_doubleArray_linWeights = (*env)->GetFieldID(env, ids->SVMLightModelCls, "m_linWeights", "[D");
  25. ids->ID_labeledFeatureVectorArray_docs =
  26. (*env)->GetFieldID(env, ids->SVMLightModelCls, "m_docs", "[Ljnisvmlight/LabeledFeatureVector;");
  27. if ((ids->ID_string_format && ids->ID_long_kType && ids->ID_long_dParam
  28. && ids->ID_double_sParam && ids->ID_double_rParam && ids->ID_string_uParam &&
  29. ids->ID_long_highFeatIdx && ids->ID_long_trainDocs && ids->ID_long_numSupVecs
  30. && ids->ID_double_threshold && ids->ID_labeledFeatureVectorArray_docs) == 0) {
  31. perror("Can't access JFieldIDs: perror()");
  32. exit(1);
  33. }
  34. // Bestimme ID des Konstruktors der Klasse SVMLightModel
  35. ids->ConstructorID_SVMLightModelCls =
  36. (*env)->GetMethodID( env, ids->SVMLightModelCls,
  37. "<init>",
  38. "(Ljava/lang/String;JJDDDLjava/lang/String;JJJD[Ljnisvmlight/LabeledFeatureVector;)V"
  39. );
  40. if ( ids->ConstructorID_SVMLightModelCls == 0) {
  41. perror("Can't determine the constructor-method of SVMLightModel: perror()");
  42. exit(1);
  43. }
  44. // Bestimmen der Groesse des uebergebenen Arrays (mit Trainingsdokumenten) tdata
  45. ids->tDataSize = (*env)->GetArrayLength(env, *tdata);
  46. if (ids->tDataSize<1) {
  47. perror("\nArray is containing no training documents!\n");
  48. }
  49. // Abgreifen des erstes Trainingsdokuments
  50. jobject traindoc = (*env)->GetObjectArrayElement(env, *tdata, 0);
  51. if (traindoc == NULL) {
  52. perror("\ntraining document is null!\n");
  53. }
  54. // Klassentyp des Trainingsdokuments bestimmen
  55. ids->tDataCls = (*env)->GetObjectClass(env, traindoc);
  56. if (ids->tDataCls == 0) {
  57. perror("Can't determine the class of training documents: perror()");
  58. exit(1);
  59. }
  60. // Die IDs der Membervaribalen aus der Klasse des Trainingsdokuments bestimmen
  61. ids->ID_double_label = (*env)->GetFieldID(env, ids->tDataCls, "m_label", "D");
  62. ids->ID_double_factor = (*env)->GetFieldID(env, ids->tDataCls, "m_factor", "D");
  63. ids->ID_intArray_dimensions = (*env)->GetFieldID(env, ids->tDataCls, "m_dims", "[I");
  64. ids->ID_doubleArray_values = (*env)->GetFieldID(env, ids->tDataCls, "m_vals", "[D");
  65. // ids->MemVarID_size = (*env)->GetFieldID(env, ids->tDataCls, "m_size", "I");
  66. if (((ids->ID_double_label) && (ids->ID_intArray_dimensions) && (ids->ID_doubleArray_values)) == 0) {
  67. perror("Can't determine jfieldIDs (training documents): perror()");
  68. exit(1);
  69. }
  70. // Die ID des Konstruktors fuer die Klasse eines Trainingsdokuments bestimmen
  71. ids->ConstructorID_tDataCls = (*env)->GetMethodID(env, ids->tDataCls, "<init>", "()V");
  72. if ( ids->ConstructorID_tDataCls == 0) {
  73. perror("Can't determine the constructor-method of a training document: perror()");
  74. exit(1);
  75. }
  76. return ids;
  77. }
  78. JTrainParams* GetJTrainParamIDs(JNIEnv * env, jobject* tparam) {
  79. JTrainParams *tids = my_malloc(sizeof(struct jtrainparams));
  80. tids->env=env;
  81. jclass tparamCls = (*env)->FindClass(env,"jnisvmlight/TrainingParameters");
  82. if (tparamCls == 0) {
  83. perror("Can't determine the class of 'TrainingParameters': perror()");
  84. exit(1);
  85. }
  86. tids->ID_LearnParam_lp = (*env)->GetFieldID(env, tparamCls, "m_lp", "Ljnisvmlight/LearnParam;");
  87. tids->ID_KernelParam_kp = (*env)->GetFieldID(env, tparamCls, "m_kp", "Ljnisvmlight/KernelParam;");
  88. if ((tids->ID_LearnParam_lp && tids->ID_KernelParam_kp) == 0) {
  89. perror("Can't find member variable 'm_lp' or 'm_kp': perror()");
  90. exit(1);
  91. }
  92. tids->lp = (*env)->GetObjectField(env, *tparam, tids->ID_LearnParam_lp);
  93. tids->kp = (*env)->GetObjectField(env, *tparam, tids->ID_KernelParam_kp);
  94. if (tids->lp == NULL || tids->kp == NULL) {
  95. perror("Can't access 'm_lp' or 'm_kp': perror()");
  96. exit(1);
  97. }
  98. jclass lpCls = (*env)->GetObjectClass(env,tids->lp);
  99. jclass kpCls = (*env)->GetObjectClass(env,tids->kp);
  100. if ((lpCls && kpCls) == 0) {
  101. perror("Can't determine the class of 'm_lp' or 'm_kp': perror()");
  102. exit(1);
  103. }
  104. tids->ID_int_verbosity = (*env)->GetFieldID(env, lpCls, "verbosity", "I");
  105. tids->ID_long_type = (*env)->GetFieldID(env, lpCls, "type", "J");
  106. tids->ID_double_svm_c = (*env)->GetFieldID(env, lpCls, "svm_c", "D");
  107. tids->ID_double_eps = (*env)->GetFieldID(env, lpCls, "eps", "D");
  108. tids->ID_double_svm_costratio = (*env)->GetFieldID(env, lpCls, "svm_costratio", "D");
  109. tids->ID_double_transduction_posratio = (*env)->GetFieldID(env, lpCls, "transduction_posratio", "D");
  110. tids->ID_long_biased_hyperplane = (*env)->GetFieldID(env, lpCls, "biased_hyperplane", "J");
  111. tids->ID_long_sharedslack = (*env)->GetFieldID(env, lpCls, "sharedslack", "J");
  112. tids->ID_long_svm_maxqpsize = (*env)->GetFieldID(env, lpCls, "svm_maxqpsize", "J");
  113. tids->ID_long_svm_newvarsinqp = (*env)->GetFieldID(env, lpCls, "svm_newvarsinqp", "J");
  114. tids->ID_long_kernel_cache_size = (*env)->GetFieldID(env, lpCls, "kernel_cache_size", "J");
  115. tids->ID_double_epsilon_crit = (*env)->GetFieldID(env, lpCls, "epsilon_crit", "D");
  116. tids->ID_double_epsilon_shrink = (*env)->GetFieldID(env, lpCls, "epsilon_shrink", "D");
  117. tids->ID_long_svm_iter_to_shrink = (*env)->GetFieldID(env, lpCls, "svm_iter_to_shrink", "J");
  118. tids->ID_long_maxiter = (*env)->GetFieldID(env, lpCls, "maxiter", "J");
  119. tids->ID_long_remove_inconsistent = (*env)->GetFieldID(env, lpCls, "remove_inconsistent", "J");
  120. tids->ID_long_skip_final_opt_check = (*env)->GetFieldID(env, lpCls, "skip_final_opt_check", "J");
  121. tids->ID_long_compute_loo = (*env)->GetFieldID(env, lpCls, "compute_loo", "J");
  122. tids->ID_double_rho = (*env)->GetFieldID(env, lpCls, "rho", "D");
  123. tids->ID_long_xa_depth = (*env)->GetFieldID(env, lpCls, "xa_depth", "J");
  124. tids->ID_string_predfile = (*env)->GetFieldID(env, lpCls, "predfile", "Ljava/lang/String;");
  125. tids->ID_string_alphafile = (*env)->GetFieldID(env, lpCls, "alphafile", "Ljava/lang/String;");
  126. tids->ID_double_epsilon_const = (*env)->GetFieldID(env, lpCls, "epsilon_const", "D");
  127. tids->ID_double_epsilon_a = (*env)->GetFieldID(env, lpCls, "epsilon_a", "D");
  128. tids->ID_double_opt_precision = (*env)->GetFieldID(env, lpCls, "opt_precision", "D");
  129. tids->ID_long_svm_c_steps = (*env)->GetFieldID(env, lpCls, "svm_c_steps", "J");
  130. tids->ID_double_svm_c_factor = (*env)->GetFieldID(env, lpCls, "svm_c_factor", "D");
  131. tids->ID_double_svm_costratio_unlab = (*env)->GetFieldID(env, lpCls, "svm_costratio_unlab", "D");
  132. tids->ID_double_svm_unlabbound = (*env)->GetFieldID(env, lpCls, "svm_unlabbound", "D");
  133. tids->ID_double_svm_cost = (*env)->GetFieldID(env, lpCls, "svm_cost", "D");
  134. tids->ID_long_totwords = (*env)->GetFieldID(env, lpCls, "totwords", "J");
  135. if ((tids->ID_int_verbosity && tids->ID_long_type && tids->ID_double_svm_c && tids->ID_double_eps && tids->ID_double_svm_costratio &&
  136. tids->ID_double_transduction_posratio && tids->ID_long_biased_hyperplane && tids->ID_long_sharedslack && tids->ID_long_svm_maxqpsize &&
  137. tids->ID_long_svm_newvarsinqp && tids->ID_long_kernel_cache_size && tids->ID_double_epsilon_crit && tids->ID_double_epsilon_shrink &&
  138. tids->ID_long_svm_iter_to_shrink && tids->ID_long_maxiter && tids->ID_long_remove_inconsistent && tids->ID_long_skip_final_opt_check &&
  139. tids->ID_long_compute_loo && tids->ID_double_rho && tids->ID_long_xa_depth && tids->ID_string_predfile && tids->ID_string_alphafile &&
  140. tids->ID_double_epsilon_const && tids->ID_double_epsilon_a && tids->ID_double_opt_precision && tids->ID_long_svm_c_steps &&
  141. tids->ID_double_svm_c_factor && tids->ID_double_svm_costratio_unlab && tids->ID_double_svm_unlabbound && tids->ID_double_svm_cost &&
  142. tids->ID_long_totwords) == 0) {
  143. perror("Can't determine the jfieldIDs of class 'LearnParam': perror()");
  144. exit(1);
  145. }
  146. tids->ID_long_kernel_type = (*env)->GetFieldID(env, kpCls, "kernel_type", "J");
  147. tids->ID_long_poly_degree = (*env)->GetFieldID(env, kpCls, "poly_degree", "J");
  148. tids->ID_double_rbf_gamma = (*env)->GetFieldID(env, kpCls, "rbf_gamma", "D");
  149. tids->ID_double_coef_lin = (*env)->GetFieldID(env, kpCls, "coef_lin", "D");
  150. tids->ID_double_coef_const = (*env)->GetFieldID(env, kpCls, "coef_const", "D");
  151. tids->ID_string_custom = (*env)->GetFieldID(env, kpCls, "custom", "Ljava/lang/String;");
  152. if ((tids->ID_long_kernel_type && tids->ID_long_poly_degree && tids->ID_double_rbf_gamma && tids->ID_double_coef_lin &&
  153. tids->ID_double_coef_const && tids->ID_string_custom) == 0) {
  154. perror("Can't determine the jfieldIDs of class 'KernelParam': perror()");
  155. exit(1);
  156. }
  157. jfieldID argcID = (*env)->GetFieldID(env, lpCls, "argc", "I");
  158. jfieldID argvID = (*env)->GetFieldID(env, lpCls, "argv", "[Ljava/lang/String;");
  159. if ((argcID && argvID) == 0) {
  160. perror("Can't find jfieldIDs of 'argc/argv'");
  161. exit(1);
  162. }
  163. tids->argc = (*env)->GetIntField(env,tids->lp,argcID);
  164. jobjectArray sfield = (*env)->GetObjectField(env,tids->lp,argvID);
  165. if (tids->argc > 0) {
  166. tids->argv = (char**) my_malloc(sizeof(char*) * tids->argc);
  167. int j;
  168. for (j=0;j<tids->argc;j++) {
  169. jstring jstr = (*env)->GetObjectArrayElement(env, sfield, j);
  170. const char* str = (*env)->GetStringUTFChars(env, jstr, 0 );
  171. (tids->argv)[j] = (char*) my_malloc(sizeof(char) * strlen(str)+1);
  172. strcpy((tids->argv)[j],str);
  173. (*env)->ReleaseStringUTFChars(env,jstr,str);
  174. }
  175. }
  176. return tids;
  177. }
  178. void SVMparmInit(KERNEL_CACHE* kernel_cache,LEARN_PARM* learn_parm,KERNEL_PARM* kernel_parm, MODEL* model, JTrainParams* tparm) {
  179. char type[100] = " ";
  180. jstring test;
  181. const char *str;
  182. JNIEnv* env = tparm->env;
  183. int argc = tparm->argc;
  184. char **argv = tparm->argv;
  185. verbosity = (*env)->GetIntField(env,tparm->lp,tparm->ID_int_verbosity);
  186. learn_parm->type = (*env)->GetLongField(env,tparm->lp,tparm->ID_long_type);
  187. /* learn_parm->svm_c=0.0; */
  188. learn_parm->svm_c = (*env)->GetDoubleField(env,tparm->lp,tparm->ID_double_svm_c);
  189. /* learn_parm->eps=0.1; */
  190. learn_parm->eps = (*env)->GetDoubleField(env,tparm->lp,tparm->ID_double_eps);
  191. /* learn_parm->svm_costratio=1.0; */
  192. learn_parm->svm_costratio = (*env)->GetDoubleField(env,tparm->lp,tparm->ID_double_svm_costratio);
  193. /* learn_parm->transduction_posratio=-1.0; */
  194. learn_parm->transduction_posratio = (*env)->GetDoubleField(env,tparm->lp,tparm->ID_double_transduction_posratio);
  195. /* learn_parm->biased_hyperplane=1; */
  196. learn_parm->biased_hyperplane = (*env)->GetLongField(env,tparm->lp,tparm->ID_long_biased_hyperplane);
  197. /* learn_parm->sharedslack=0; */
  198. learn_parm->sharedslack = (*env)->GetLongField(env,tparm->lp,tparm->ID_long_sharedslack);
  199. /* learn_parm->svm_maxqpsize=10; */
  200. learn_parm->svm_maxqpsize = (*env)->GetLongField(env,tparm->lp,tparm->ID_long_svm_maxqpsize);
  201. /* learn_parm->svm_newvarsinqp=0; */
  202. learn_parm->svm_newvarsinqp = (*env)->GetLongField(env,tparm->lp,tparm->ID_long_svm_newvarsinqp);
  203. /* learn_parm->kernel_cache_size=40; */
  204. learn_parm->kernel_cache_size = (*env)->GetLongField(env,tparm->lp,tparm->ID_long_kernel_cache_size);
  205. /* learn_parm->epsilon_crit=0.001; */
  206. learn_parm->epsilon_crit = (*env)->GetDoubleField(env,tparm->lp,tparm->ID_double_epsilon_crit);
  207. learn_parm->epsilon_shrink = (*env)->GetDoubleField(env,tparm->lp,tparm->ID_double_epsilon_shrink);
  208. /* learn_parm->svm_iter_to_shrink=-9999; */
  209. learn_parm->svm_iter_to_shrink = (*env)->GetLongField(env,tparm->lp,tparm->ID_long_svm_iter_to_shrink);
  210. /* learn_parm->maxiter=100000; */
  211. learn_parm->maxiter = (*env)->GetLongField(env,tparm->lp,tparm->ID_long_maxiter);
  212. /* learn_parm->remove_inconsistent=0; */
  213. learn_parm->remove_inconsistent = (*env)->GetLongField(env,tparm->lp,tparm->ID_long_remove_inconsistent);
  214. /* learn_parm->skip_final_opt_check=0; */
  215. learn_parm->skip_final_opt_check = (*env)->GetLongField(env,tparm->lp,tparm->ID_long_skip_final_opt_check);
  216. /* learn_parm->compute_loo=0; */
  217. learn_parm->compute_loo = (*env)->GetLongField(env,tparm->lp,tparm->ID_long_compute_loo);
  218. /* learn_parm->rho=1.0; */
  219. learn_parm->rho = (*env)->GetDoubleField(env,tparm->lp,tparm->ID_double_rho);
  220. /* learn_parm->xa_depth=0; */
  221. learn_parm->xa_depth = (*env)->GetLongField(env,tparm->lp,tparm->ID_long_xa_depth);
  222. /* strcpy (learn_parm->predfile, "trans_predictions"); */
  223. test = (*env)->GetObjectField(env, tparm->lp, tparm->ID_string_predfile);
  224. str = (*env)->GetStringUTFChars(env, test, 0 );
  225. strcpy (learn_parm->predfile, str);
  226. (*env)->ReleaseStringUTFChars(env,test,str);
  227. /* strcpy (learn_parm->alphafile, ""); */
  228. test = (*env)->GetObjectField(env, tparm->lp, tparm->ID_string_alphafile);
  229. str = (*env)->GetStringUTFChars(env, test, 0 );
  230. strcpy (learn_parm->alphafile, str);
  231. (*env)->ReleaseStringUTFChars(env,test,str);
  232. learn_parm->epsilon_const = (*env)->GetDoubleField(env,tparm->lp,tparm->ID_double_epsilon_const);
  233. /* learn_parm->epsilon_a=1E-15; */
  234. learn_parm->epsilon_a = (*env)->GetDoubleField(env,tparm->lp,tparm->ID_double_epsilon_a);
  235. learn_parm->opt_precision = (*env)->GetDoubleField(env,tparm->lp,tparm->ID_double_opt_precision);
  236. learn_parm->svm_c_steps = (*env)->GetLongField(env,tparm->lp,tparm->ID_long_svm_c_steps);
  237. learn_parm->svm_c_factor = (*env)->GetDoubleField(env,tparm->lp,tparm->ID_double_svm_c_factor);
  238. /* learn_parm->svm_costratio_unlab=1.0; */
  239. learn_parm->svm_costratio_unlab = (*env)->GetDoubleField(env,tparm->lp,tparm->ID_double_svm_costratio_unlab);
  240. /* learn_parm->svm_unlabbound=1E-5; */
  241. learn_parm->svm_unlabbound = (*env)->GetDoubleField(env,tparm->lp,tparm->ID_double_svm_unlabbound);
  242. learn_parm->svm_cost = (double *) my_malloc(sizeof(double));
  243. *(learn_parm->svm_cost) = (double) (*env)->GetDoubleField(env,tparm->lp,tparm->ID_double_svm_cost);
  244. learn_parm->totwords = (*env)->GetLongField(env,tparm->lp,tparm->ID_long_svm_c_steps);
  245. /* kernel_parm->kernel_type=0; */
  246. kernel_parm->kernel_type = (*env)->GetLongField(env,tparm->kp,tparm->ID_long_kernel_type);
  247. /* kernel_parm->poly_degree=3; */
  248. kernel_parm->poly_degree = (*env)->GetLongField(env,tparm->kp,tparm->ID_long_poly_degree);
  249. /* kernel_parm->rbf_gamma=1.0; */
  250. kernel_parm->rbf_gamma = (*env)->GetDoubleField(env,tparm->kp,tparm->ID_double_rbf_gamma);
  251. /* kernel_parm->coef_lin=1; */
  252. kernel_parm->coef_lin = (*env)->GetDoubleField(env,tparm->kp,tparm->ID_double_coef_lin);
  253. /* kernel_parm->coef_const=1; */
  254. kernel_parm->coef_const = (*env)->GetDoubleField(env,tparm->kp,tparm->ID_double_coef_const);
  255. /* strcpy(kernel_parm->custom,"empty"); */
  256. test = (*env)->GetObjectField(env, tparm->kp, tparm->ID_string_custom);
  257. str = (*env)->GetStringUTFChars(env, test, 0 );
  258. strcpy(kernel_parm->custom, str);
  259. (*env)->ReleaseStringUTFChars(env,test,str);
  260. if (argc>0) {
  261. int i=0;
  262. for(i=0;(i<argc) && ((argv[i])[0] == '-');i++) {
  263. switch ((argv[i])[1])
  264. {
  265. case '?': print_help(); exit(0);
  266. case 'z': i++; strcpy(type,argv[i]); break;
  267. case 'v': i++; verbosity=atol(argv[i]); break;
  268. case 'b': i++; learn_parm->biased_hyperplane=atol(argv[i]); break;
  269. case 'i': i++; learn_parm->remove_inconsistent=atol(argv[i]); break;
  270. case 'f': i++; learn_parm->skip_final_opt_check=!atol(argv[i]); break;
  271. case 'q': i++; learn_parm->svm_maxqpsize=atol(argv[i]); break;
  272. case 'n': i++; learn_parm->svm_newvarsinqp=atol(argv[i]); break;
  273. case '#': i++; learn_parm->maxiter=atol(argv[i]); break;
  274. case 'h': i++; learn_parm->svm_iter_to_shrink=atol(argv[i]); break;
  275. case 'm': i++; learn_parm->kernel_cache_size=atol(argv[i]); break;
  276. case 'c': i++; learn_parm->svm_c=atof(argv[i]); break;
  277. case 'w': i++; learn_parm->eps=atof(argv[i]); break;
  278. case 'p': i++; learn_parm->transduction_posratio=atof(argv[i]); break;
  279. case 'j': i++; learn_parm->svm_costratio=atof(argv[i]); break;
  280. case 'e': i++; learn_parm->epsilon_crit=atof(argv[i]); break;
  281. case 'o': i++; learn_parm->rho=atof(argv[i]); break;
  282. case 'k': i++; learn_parm->xa_depth=atol(argv[i]); break;
  283. case 'x': i++; learn_parm->compute_loo=atol(argv[i]); break;
  284. case 't': i++; kernel_parm->kernel_type=atol(argv[i]); break;
  285. case 'd': i++; kernel_parm->poly_degree=atol(argv[i]); break;
  286. case 'g': i++; kernel_parm->rbf_gamma=atof(argv[i]); break;
  287. case 's': i++; kernel_parm->coef_lin=atof(argv[i]); break;
  288. case 'r': i++; kernel_parm->coef_const=atof(argv[i]); break;
  289. case 'u': i++; strcpy(kernel_parm->custom,argv[i]); break;
  290. case 'l': i++; strcpy(learn_parm->predfile,argv[i]); break;
  291. case 'a': i++; strcpy(learn_parm->alphafile,argv[i]); break;
  292. case 'y': i++; printf("Option \"-y\" is not supported in this Version of the JNI-SVMLight-interface!\n"); fflush(stdout); break;
  293. default: printf("\nUnrecognized option %s!\n\n",argv[i]);
  294. print_help();
  295. exit(0);
  296. }
  297. }
  298. if(strcmp(type,"c")==0) {
  299. learn_parm->type=CLASSIFICATION;
  300. }
  301. else if(strcmp(type,"r")==0) {
  302. learn_parm->type=REGRESSION;
  303. }
  304. else if(strcmp(type,"p")==0) {
  305. learn_parm->type=RANKING;
  306. }
  307. else if(strcmp(type,"o")==0) {
  308. learn_parm->type=OPTIMIZATION;
  309. }
  310. else if(strcmp(type,"s")==0) {
  311. learn_parm->type=OPTIMIZATION;
  312. learn_parm->sharedslack=1;
  313. }
  314. else if (strcmp(type," ") != 0 || ((learn_parm->type & (CLASSIFICATION | REGRESSION | RANKING | OPTIMIZATION))==0)) {
  315. printf("\n\nUnknown type '%s': Valid types are 'c' (classification), 'r' regession, and 'p' preference ranking.\n",type);
  316. fflush(stdout);
  317. printf("\n\nPress Return for help\n\n");
  318. fflush(stdout);
  319. wait_any_key();
  320. print_help();
  321. exit(0);
  322. }
  323. }
  324. if(learn_parm->svm_iter_to_shrink == -9999) {
  325. if(kernel_parm->kernel_type == LINEAR)
  326. learn_parm->svm_iter_to_shrink=2;
  327. else
  328. learn_parm->svm_iter_to_shrink=100;
  329. }
  330. if((learn_parm->skip_final_opt_check)
  331. && (kernel_parm->kernel_type == LINEAR)) {
  332. printf("\nIt does not make sense to skip the final optimality check for linear kernels.\n\n");
  333. learn_parm->skip_final_opt_check=0;
  334. }
  335. if((learn_parm->skip_final_opt_check)
  336. && (learn_parm->remove_inconsistent)) {
  337. printf("\nIt is necessary to do the final optimality check when removing inconsistent \nexamples.\n");
  338. fflush(stdout);
  339. printf("\n\nPress Return for help\n\n");
  340. fflush(stdout);
  341. wait_any_key();
  342. print_help();
  343. exit(0);
  344. }
  345. if((learn_parm->svm_maxqpsize<2)) {
  346. printf("\nMaximum size of QP-subproblems not in valid range: %ld [2..]\n",learn_parm->svm_maxqpsize);
  347. fflush(stdout);
  348. printf("\n\nPress Return for help\n\n");
  349. fflush(stdout);
  350. wait_any_key();
  351. print_help();
  352. exit(0);
  353. }
  354. if((learn_parm->svm_maxqpsize<learn_parm->svm_newvarsinqp)) {
  355. printf("\nMaximum size of QP-subproblems [%ld] must be larger than the number of\n",learn_parm->svm_maxqpsize);
  356. printf("new variables [%ld] entering the working set in each iteration.\n",learn_parm->svm_newvarsinqp);
  357. fflush(stdout);
  358. printf("\n\nPress Return for help\n\n");
  359. fflush(stdout);
  360. wait_any_key();
  361. print_help();
  362. exit(0);
  363. }
  364. if(learn_parm->svm_iter_to_shrink<1) {
  365. printf("\nMaximum number of iterations for shrinking not in valid range: %ld [1,..]\n",learn_parm->svm_iter_to_shrink);
  366. fflush(stdout);
  367. printf("\n\nPress Return for help\n\n");
  368. fflush(stdout);
  369. wait_any_key();
  370. print_help();
  371. exit(0);
  372. }
  373. if(learn_parm->svm_c<0) {
  374. printf("\nThe C parameter must be greater than zero!\n\n");
  375. fflush(stdout);
  376. printf("\n\nPress Return for help\n\n");
  377. fflush(stdout);
  378. wait_any_key();
  379. print_help();
  380. exit(0);
  381. }
  382. if(learn_parm->transduction_posratio>1) {
  383. printf("\nThe fraction of unlabeled examples to classify as positives must\n");
  384. printf("be less than 1.0 !!!\n\n");
  385. fflush(stdout);
  386. printf("\n\nPress Return for help\n\n");
  387. fflush(stdout);
  388. wait_any_key();
  389. print_help();
  390. exit(0);
  391. }
  392. if(learn_parm->svm_costratio<=0) {
  393. printf("\nThe COSTRATIO parameter must be greater than zero!\n\n");
  394. fflush(stdout);
  395. printf("\n\nPress Return for help\n\n");
  396. fflush(stdout);
  397. wait_any_key();
  398. print_help();
  399. exit(0);
  400. }
  401. if(learn_parm->epsilon_crit<=0) {
  402. printf("\nThe epsilon parameter must be greater than zero!\n\n");
  403. fflush(stdout);
  404. printf("\n\nPress Return for help\n\n");
  405. fflush(stdout);
  406. wait_any_key();
  407. print_help();
  408. exit(0);
  409. }
  410. if(learn_parm->rho<0) {
  411. printf("\nThe parameter rho for xi/alpha-estimates and leave-one-out pruning must\n");
  412. printf("be greater than zero (typically 1.0 or 2.0, see T. Joachims, Estimating the\n");
  413. printf("Generalization Performance of an SVM Efficiently, ICML, 2000.)!\n\n");
  414. fflush(stdout);
  415. printf("\n\nPress Return for help\n\n");
  416. fflush(stdout);
  417. wait_any_key();
  418. print_help();
  419. exit(0);
  420. }
  421. if((learn_parm->xa_depth<0) || (learn_parm->xa_depth>100)) {
  422. printf("\nThe parameter depth for ext. xi/alpha-estimates must be in [0..100] (zero\n");
  423. printf("for switching to the conventional xa/estimates described in T. Joachims,\n");
  424. printf("Estimating the Generalization Performance of an SVM Efficiently, ICML, 2000.)\n");
  425. fflush(stdout);
  426. printf("\n\nPress Return for help\n\n");
  427. fflush(stdout);
  428. wait_any_key();
  429. print_help();
  430. exit(0);
  431. }
  432. }
  433. JNIEXPORT jdouble JNICALL Java_jnisvmlight_SVMLightInterface_classifyNative(JNIEnv * env, jobject jo1, jobject testdoc) {
  434. DOC *doc; /* test example */
  435. WORD *words;
  436. SVECTOR *sv;
  437. long j,wpos,queryid,slackid,max_words_doc;
  438. double dist,costfactor;
  439. jclass classid = (*env)->GetObjectClass(env, testdoc);
  440. jintArray dim = (*env)->GetObjectField(env, testdoc, (*env)->GetFieldID(env, classid, "m_dims", "[I"));
  441. jdoubleArray val = (*env)->GetObjectField(env, testdoc, (*env)->GetFieldID(env, classid, "m_vals", "[D"));
  442. jsize dimLen = (*env)->GetArrayLength(env, dim);
  443. jsize valLen = (*env)->GetArrayLength(env, val);
  444. jint *dimEl = (*env)->GetIntArrayElements(env, dim, 0);
  445. jdouble *valEl = (*env)->GetDoubleArrayElements(env, val, 0);
  446. int* ds;
  447. double *vs;
  448. if (sizeof(int) == sizeof(jint)) {
  449. ds = (int*) dimEl;
  450. } else {
  451. int fi=0;
  452. printf("!!!!!!!!!!!!!!! Warning: java datatype \"jint\" isn't of the same size as C datatype \"int\"\n");
  453. ds = (int*) my_malloc(sizeof(int)*dimLen);
  454. for(fi=0;fi<dimLen;fi++) {
  455. ds[fi] = (int) dimEl[fi];
  456. }
  457. }
  458. if (sizeof(double) == sizeof(jdouble)) {
  459. vs = (double*) valEl;
  460. } else {
  461. int fi=0;
  462. printf("!!!!!!!!!!!!!!! Warning: Java-Datatype (jdouble) isn't of the same size as C datatype");
  463. vs = (double*) my_malloc(sizeof(double)*valLen);
  464. for(fi=0;fi<valLen;fi++) {
  465. vs[fi] = (double) valEl[fi];
  466. }
  467. }
  468. //fprintf("totalwords: %ld \n",(int)dimLen);
  469. max_words_doc=dimLen;
  470. words = (WORD *)my_malloc(sizeof(WORD)*(dimLen+10));
  471. jparse_document(words,&queryid,&slackid,&costfactor,&wpos,max_words_doc,ds,vs);
  472. (*env)->ReleaseIntArrayElements(env,dim,dimEl,0);
  473. (*env)->ReleaseDoubleArrayElements(env,val,valEl,0);
  474. if(_model->kernel_parm.kernel_type == 0) { /* linear kernel */
  475. for(j=0;(words[j]).wnum != 0;j++) { /* Check if feature numbers */
  476. if((words[j]).wnum>_model->totwords) /* are not larger than in */
  477. (words[j]).wnum=0; /* model. Remove feature if */
  478. } /* necessary. */
  479. sv = create_svector2(words,1.0);
  480. doc = create_example(-1,0,0,0.0,sv);
  481. dist=classify_example_linear(_model,doc);
  482. } else { /* non-linear kernel */
  483. sv = create_svector2(words,1.0);
  484. doc = create_example(-1,0,0,0.0,sv);
  485. dist=classify_example(_model,doc);
  486. }
  487. free(words);
  488. free(doc);
  489. return (jdouble)dist;
  490. }
  491. SVECTOR *create_svector2(WORD *words, double factor)
  492. {
  493. SVECTOR *vec;
  494. long fnum,i;
  495. fnum=0;
  496. while(words[fnum].wnum) {
  497. fnum++;
  498. }
  499. fnum++;
  500. vec = (SVECTOR *)my_malloc(sizeof(SVECTOR));
  501. vec->words = (WORD *)my_malloc(sizeof(WORD)*(fnum));
  502. for(i=0;i<fnum;i++) {
  503. vec->words[i]=words[i];
  504. }
  505. vec->twonorm_sq=sprod_ss(vec,vec);
  506. vec->kernel_id=0;
  507. vec->next=NULL;
  508. vec->factor=factor;
  509. return(vec);
  510. }
  511. JNIEXPORT jobject JNICALL Java_jnisvmlight_SVMLightInterface_trainmodel
  512. (JNIEnv * env, jobject obj, jobjectArray tdata, jobject tparm)
  513. {
  514. DOC **docs; /* training examples */
  515. long i;
  516. long* totdoc = (long*) my_malloc(sizeof(long));
  517. long* totwords = (long*) my_malloc(sizeof(long));
  518. long* ndocuments = (long*) my_malloc(sizeof(long));
  519. double *target=NULL;
  520. double *alpha_in=NULL;
  521. KERNEL_CACHE *kernel_cache;
  522. LEARN_PARM learn_parm;
  523. KERNEL_PARM kernel_parm;
  524. _model=(MODEL *)my_malloc(sizeof(MODEL));
  525. // --------------------- init stuff ----------------------------
  526. JavaParamIDs *JIDs = GetJParamIDs(env, &tdata);
  527. JTrainParams* targs = GetJTrainParamIDs(env,&tparm);
  528. SVMparmInit(kernel_cache,&learn_parm,&kernel_parm,_model,targs);
  529. if(verbosity>=1) {
  530. printf("\n --- Native C function: scanning examples, now .. (JNI Interface)\n"); fflush(stdout);
  531. }
  532. // --------------------- create DOCs ---------------------------
  533. // allocate memory for all training documents
  534. createDOCs(env,JIDs,&tdata,&docs,&target,totwords,totdoc,ndocuments);
  535. if(verbosity>=1)
  536. printf(" --- Native C function: documents allocated successully.\n"); fflush(stdout);
  537. learn_parm.totwords = *totwords;
  538. // --------------------- create kernel -------------------------
  539. FILE * dump = NULL;
  540. long int z = 0;
  541. long int y = 0;
  542. if (verbosity>10) {
  543. if ((dump = fopen("jni-traindump.dat","w")) == NULL) {
  544. perror("Writing to \"traindump.txt\" doesn't work!\n");
  545. exit(1);
  546. }
  547. printf("\n|||||||||||||||||||||||||||||||||| dumping ..\n");
  548. fprintf(dump,"totaldocuments: %ld \n",*totdoc);
  549. while(z<(*totdoc)) {
  550. fprintf(dump,"(%ld) (QID: %ld) (CF: %.16g) (SID: %ld) ",docs[z]->docnum,docs[z]->queryid,docs[z]->costfactor,docs[z]->slackid);
  551. SVECTOR *v = docs[z]->fvec;
  552. fprintf(dump,"(NORM:%.32g) (UD:%s) (KID:%ld) (VL:%p) (F:%.32g) %.32g ",v->twonorm_sq,(v->userdefined == NULL ? "" : v->userdefined),v->kernel_id,v->next,v->factor,target[z]);
  553. if (v != NULL && v->words != NULL) {
  554. while ((v->words[y]).wnum) {
  555. fprintf(dump,"%ld:%.32g ",(v->words[y]).wnum, (v->words[y]).weight);
  556. y++;
  557. }
  558. } else
  559. fprintf(dump, "NULL WORTE\n");
  560. fprintf(dump,"\n");
  561. y=0;
  562. z++;
  563. }
  564. fprintf(dump,"---------------------------------------------------\n");
  565. fprintf(dump,"kernel_type: %ld\n",kernel_parm.kernel_type);
  566. fprintf(dump,"poly_degree: %ld\n",kernel_parm.poly_degree);
  567. fprintf(dump,"rbf_gamma: %.32g\n",kernel_parm.rbf_gamma);
  568. fprintf(dump,"coef_lin: %.32g\n",kernel_parm.coef_lin);
  569. fprintf(dump,"coef_const: %.32g\n",kernel_parm.coef_const);
  570. fprintf(dump,"custom: %s\n",kernel_parm.custom);
  571. fprintf(dump,"type: %ld\n",learn_parm.type);
  572. fprintf(dump,"svm_c: %.32g\n",learn_parm.svm_c);
  573. fprintf(dump,"eps: %.32g\n",learn_parm.eps);
  574. fprintf(dump,"svm_costratio: %.32g\n",learn_parm.svm_costratio);
  575. fprintf(dump,"transduction_posratio: %.32g\n",learn_parm.transduction_posratio);
  576. fprintf(dump,"biased_hyperplane: %ld\n",learn_parm.biased_hyperplane);
  577. fprintf(dump,"svm_maxqpsize: %ld\n",learn_parm.svm_maxqpsize);
  578. fprintf(dump,"svm_newvarsinqp: %ld\n",learn_parm.svm_newvarsinqp);
  579. fprintf(dump,"epsilon_crit: %.32g\n",learn_parm.epsilon_crit);
  580. fprintf(dump,"epsilon_shrink: %.32g\n",learn_parm.epsilon_shrink);
  581. fprintf(dump,"svm_iter_to_shrink: %ld\n",learn_parm.svm_iter_to_shrink);
  582. fprintf(dump,"remove_inconsistent: %ld\n",learn_parm.remove_inconsistent);
  583. fprintf(dump,"skip_final_opt_check: %ld\n",learn_parm.skip_final_opt_check);
  584. fprintf(dump,"compute_loo: %ld\n",learn_parm.compute_loo);
  585. fprintf(dump,"rho: %.32g\n",learn_parm.rho);
  586. fprintf(dump,"xa_depth: %ld\n",learn_parm.xa_depth);
  587. fprintf(dump,"predfile: %s\n",learn_parm.predfile);
  588. fprintf(dump,"alphafile: %s\n",learn_parm.alphafile);
  589. fprintf(dump,"epsilon_const: %.32g\n",learn_parm.epsilon_const);
  590. fprintf(dump,"epsilon_a: %.32g\n",learn_parm.epsilon_a);
  591. fprintf(dump,"opt_precision: %.32g\n",learn_parm.opt_precision);
  592. fprintf(dump,"svm_c_steps: %ld\n",learn_parm.svm_c_steps);
  593. fprintf(dump,"svm_c_factor: %.32g\n",learn_parm.svm_c_factor);
  594. fprintf(dump,"svm_costratio_unlab: %.32g\n",learn_parm.svm_costratio_unlab);
  595. fprintf(dump,"svm_unlabbound: %.32g\n",learn_parm.svm_unlabbound);
  596. }
  597. if (*ndocuments > 0) {
  598. if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */
  599. kernel_cache=NULL;
  600. }
  601. else {
  602. /* Always get a new kernel cache. It is not possible to use the
  603. same cache for two different training runs */
  604. kernel_cache=kernel_cache_init(*totdoc,learn_parm.kernel_cache_size);
  605. }
  606. if(verbosity>=1)
  607. printf(" --- Native C function: engaging the training process.\n"); fflush(stdout);
  608. if(learn_parm.type == CLASSIFICATION) {
  609. svm_learn_classification(docs,target,*totdoc,*totwords,&learn_parm,&kernel_parm,kernel_cache,_model,alpha_in);
  610. }
  611. else if(learn_parm.type == REGRESSION) {
  612. svm_learn_regression(docs,target,*totdoc,*totwords,&learn_parm,&kernel_parm,&kernel_cache,_model);
  613. }
  614. else if(learn_parm.type == RANKING) {
  615. svm_learn_ranking(docs,target,*totdoc,*totwords,&learn_parm,&kernel_parm,&kernel_cache,_model);
  616. }
  617. else if(learn_parm.type == OPTIMIZATION) {
  618. svm_learn_ranking(docs,target,*totdoc,*totwords,&learn_parm,&kernel_parm,&kernel_cache,_model);
  619. }
  620. if(verbosity>=1)
  621. printf(" --- Native C function: training has been done.\n"); fflush(stdout);
  622. if(_model->kernel_parm.kernel_type == 0) { /* linear kernel */
  623. /* compute weight vector */
  624. add_weight_vector_to_linear_model(_model);
  625. }
  626. } else {
  627. _model->supvec = (DOC **)my_malloc(sizeof(DOC *)*2);
  628. _model->alpha = (double *)my_malloc(sizeof(double)*2);
  629. _model->index = (long *)my_malloc(sizeof(long)*2);
  630. _model->at_upper_bound=0;
  631. _model->b=0;
  632. _model->supvec[0]=0; /* element 0 reserved and empty for now */
  633. _model->alpha[0]=0;
  634. _model->lin_weights=NULL;
  635. _model->totwords=0;
  636. _model->totdoc=0;
  637. _model->kernel_parm=(kernel_parm);
  638. _model->sv_num=1;
  639. _model->loo_error=-1;
  640. _model->loo_recall=-1;
  641. _model->loo_precision=-1;
  642. _model->xa_error=-1;
  643. _model->xa_recall=-1;
  644. _model->xa_precision=-1;
  645. }
  646. if (verbosity>10) {
  647. fprintf(dump,"totwords: %ld\n",learn_parm.totwords);
  648. printf("|||||||||||||||||||||||||||||||||| z: %ld, totdoc: %ld\n",z,*totdoc);
  649. }
  650. // ---------------------- build the model -----------------------
  651. if (verbosity>10)
  652. write_model("model-jnisvmlib.dat",_model);
  653. // baue C-Struktur des SVMLight-Models in Java-Objekt um.
  654. if(verbosity>=1)
  655. printf(" --- Native C function: creating Java return type.\n"); fflush(stdout);
  656. jobject ret = buildModelData(env,obj,_model,JIDs);
  657. if(verbosity>=1)
  658. printf(" --- Native C function: creating Java object has been done.\n"); fflush(stdout);
  659. // Uncomment the following when using Java-side classification only!
  660. // For native classification we need to remember all model-related parameters.
  661. //free(alpha_in);
  662. //free_model(_model,0);
  663. //for(i=0;i<*(totdoc);i++)
  664. //free_example(docs[i],1);
  665. //free(docs);
  666. //free(target);
  667. //free(totdoc);
  668. //free(totwords);
  669. //free(ndocuments);
  670. return ret;
  671. }
  672. jobject buildModelData(JNIEnv *env, jobject obj, MODEL* model,JavaParamIDs* ids) {
  673. SVECTOR *v;
  674. long NUM_DOCS, NUM_FEAT, j;
  675. NUM_DOCS = model->sv_num;
  676. jobjectArray doks = NULL;
  677. if (NUM_DOCS > 1) {
  678. // Erstelle ein Java-Array vom Typ jnisvmlight/LabeledFeatureVector
  679. doks = (*env)->NewObjectArray(env,(jsize) NUM_DOCS-1, ids->tDataCls, NULL);
  680. if (doks == 0) {
  681. perror("perror: Can't create Java array of type jnisvmlight/LabeledFeatureVector!");
  682. (*env)->ExceptionDescribe(env);
  683. exit(1);
  684. }
  685. long u;
  686. for (u=1; u<NUM_DOCS; u++) {
  687. // erzeuge fuer alle Dokumente ein neues Objekt vom Typ jnisvmlight/LabeledFeatureVector
  688. jobject Data =(*env)->NewObject(env, ids->tDataCls, ids->ConstructorID_tDataCls);
  689. if (Data == NULL) {
  690. perror("perror: Can't create object of type LabeledFeatureVector!");
  691. (*env)->ExceptionDescribe(env);
  692. }
  693. // fuelle die lable-Membervariable mit zugehoerigem Wert
  694. jdouble label = (jdouble) model->alpha[u];
  695. (*env)->SetDoubleField(env, Data, ids->ID_double_label, label);
  696. // fuelle die factor-Membervariable mit zugehoerigem Wert
  697. jdouble factor = (jdouble) ((model->supvec[u])->fvec)->factor;
  698. //(*env)->SetDoubleField(env, Data, ids->ID_double_label, label);
  699. v = (model->supvec[u])->fvec;
  700. for (j=0; (v->words[j]).wnum; j++);
  701. NUM_FEAT = j;
  702. //XXX: goes wrong, if NUM_FEAT is too big!!!!
  703. //jint size = (jint) NUM_FEAT;
  704. //(*env)->SetIntField(env, Data, ids->MemVarID_size, size);
  705. // Reserviere Speicherplatz fuer int/double Array zum Aufnehmen der Dokumentenfeatures
  706. int* intar = (int*) my_malloc(NUM_FEAT*sizeof(int));
  707. double* doublear = (double*) my_malloc(NUM_FEAT*sizeof(double));
  708. // Erstelle korrespondierende Java-Arrays
  709. jintArray dim = (*env)->NewIntArray(env,(jsize) NUM_FEAT);
  710. jdoubleArray val = (*env)->NewDoubleArray(env,(jsize) NUM_FEAT);
  711. if (dim && val == NULL) {
  712. perror("perror: Can't create jint- or jdoubleArrray! :");
  713. (*env)->ExceptionDescribe(env);
  714. exit(1);
  715. }
  716. for (j=0;j<NUM_FEAT;j++) {
  717. // Fuelle int/double Arrays mit zugehoerigen Werten
  718. intar[j] = (int)((v->words[j]).wnum);
  719. doublear[j] = (double) ((v->words[j]).weight);
  720. // Lege Referenz auf int/double Werte in Java-Array ab
  721. (*env)->SetIntArrayRegion(env, dim, (jsize) j, (jsize) 1, (jint*) &intar[j]);
  722. (*env)->SetDoubleArrayRegion(env, val, (jsize) j, (jsize) 1, (jdouble*) &doublear[j]);
  723. }
  724. // Lege Java-Arrays in Objekt des Typs jnisvmlight/LabeledFeatureVector ab
  725. (*env)->SetObjectField(env, Data, ids->ID_intArray_dimensions, dim);
  726. (*env)->SetObjectField(env, Data, ids->ID_doubleArray_values, val);
  727. // lege Objekt vom Type jnisvmlight/LabeledFeatureVector in jnisvmlight/LabeledFeatureVector-Array ab (an Position u-1)
  728. (*env)->SetObjectArrayElement(env, doks, (jsize) u-1, Data);
  729. // ------------------------------------------------------------
  730. // XXX: TODO: check if i am allowed to free these arrays!!!
  731. free(intar);
  732. free(doublear);
  733. // ------------------------------------------------------------
  734. }
  735. // Lege jnisvmlight/LabeledFeatureVector-Array in zugehoeriger Membervariable des Objekts vom Type SVMLightModel ab
  736. // (*env)->SetObjectField(env, SVMLightModel, ids->ID_labeledFeatureVectorArray_docs, doks);
  737. } else {
  738. fprintf(stderr,"-------------------------------------------------------------------------------\nThe number of suppert vecors (model->sv_num: %ld) is less than 2!\nThere must be at least 2 support vectors. Model can't be built.\n",model->sv_num);
  739. // (*env)->SetObjectField(env, SVMLightModel, ids->ID_labeledFeatureVectorArray_docs, NULL);
  740. }
  741. char* text = "SVM-light Version ";
  742. char* dummy = (char*) my_malloc(((int)strlen(VERSION)+strlen(text)+1)*sizeof(char));
  743. sprintf(dummy,"%s%s",text,VERSION);
  744. // kreiere ein neues Objekt vom Typ SVMLightModel
  745. jobject SVMLightModel =(*env)->NewObject(env, ids->SVMLightModelCls, ids->ConstructorID_SVMLightModelCls,
  746. (*env)->NewStringUTF(env, dummy),
  747. (jlong) (model->kernel_parm.kernel_type),
  748. (jlong) (model->kernel_parm.poly_degree),
  749. (jdouble) (model->kernel_parm.rbf_gamma),
  750. (jdouble) (model->kernel_parm.coef_lin),
  751. (jdouble) (model->kernel_parm.coef_const),
  752. (*env)->NewStringUTF(env, (model->kernel_parm.custom)),
  753. (jlong) (model->totwords),
  754. (jlong) (model->totdoc),
  755. (jlong) (model->sv_num),
  756. (jdouble) (model->b),
  757. ((model->sv_num > 1) ? doks : (jobjectArray) NULL)
  758. );
  759. if ( SVMLightModel == 0) {
  760. perror("perror: Can't create a new SVMLightModel-Object :");
  761. (*env)->ExceptionDescribe(env);
  762. exit(1);
  763. }
  764. if ( (model->kernel_parm.kernel_type) == 0 ) {
  765. jdoubleArray val = (*env)->NewDoubleArray(env,(jsize) model->totwords +1);
  766. for (j=0;j<model->totwords +1;j++) {
  767. (*env)->SetDoubleArrayRegion(env, val, (jsize) j, (jsize) 1, (jdouble*) &(model->lin_weights)[j]);
  768. }
  769. (*env)->SetObjectField(env, SVMLightModel, ids->ID_doubleArray_linWeights, val);
  770. } else {
  771. double nullpointer = 0;
  772. (*env)->SetObjectField(env, SVMLightModel, ids->ID_doubleArray_linWeights, &nullpointer);
  773. }
  774. // Belege restliche Membervariablen des Objekts vom Typ SVMLightModel mit zugehoerigen Werten
  775. //(*env)->SetObjectField(env, SVMLightModel, ids->ID_string_format, ((*env)->NewStringUTF(env, dummy)));
  776. free(dummy);
  777. //(*env)->SetLongField(env, SVMLightModel, ids->ID_long_kType, (jlong) (model->kernel_parm.kernel_type));
  778. //(*env)->SetLongField(env, SVMLightModel, ids->ID_long_dParam, (jlong) (model->kernel_parm.poly_degree));
  779. //(*env)->SetDoubleField(env, SVMLightModel, ids->ID_double_gParam, (jdouble) (model->kernel_parm.rbf_gamma));
  780. //(*env)->SetDoubleField(env, SVMLightModel, ids->ID_double_sParam, (jdouble) (model->kernel_parm.coef_lin));
  781. //(*env)->SetDoubleField(env, SVMLightModel, ids->ID_double_rParam, (jdouble) (model->kernel_parm.coef_const) );
  782. //(*env)->SetObjectField(env, SVMLightModel, ids->ID_string_uParam, (*env)->NewStringUTF(env, (model->kernel_parm.custom)));
  783. //(*env)->SetLongField(env, SVMLightModel, ids->ID_long_highFeatIdx, (jlong) (model->totwords) );
  784. //(*env)->SetLongField(env, SVMLightModel, ids->ID_long_trainDocs, (jlong) (model->totdoc));
  785. //(*env)->SetLongField(env, SVMLightModel, ids->ID_long_numSupVecs, (jlong) (model->sv_num));
  786. //(*env)->SetDoubleField(env, SVMLightModel, ids->ID_double_threshold, (jdouble) (model->b));
  787. if(verbosity>=1)
  788. printf(" --- Native C function: classifier model created successfully.\n"); fflush(stdout);
  789. return SVMLightModel;
  790. }
  791. void createDOCs(JNIEnv * env,JavaParamIDs *JIDs,jobjectArray* tdata,DOC*** docs, double** target, long* totwords, long* totdoc, long* ndocuments) {
  792. jboolean ex = 0;
  793. *ndocuments = 0;
  794. *totwords = 0;
  795. *totdoc = (long) JIDs->tDataSize;
  796. long max_docs=(long) ((JIDs->tDataSize)+3);
  797. (*docs) = (DOC **)my_malloc(sizeof(DOC *)*max_docs); /* feature vectors */
  798. (*target) = (double *)my_malloc(sizeof(double)*max_docs); /* target values */
  799. long max_words_doc = 10;
  800. WORD *words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
  801. int k=0;
  802. FILE *test = NULL;
  803. int doTest=0;
  804. if(verbosity>10) {
  805. doTest=1;
  806. if ((test = fopen ("jni-train.dat", "w")) == NULL) { perror ("Writing to \"jni-train.dat\" doesn't work.\n"); exit (1); }
  807. }
  808. for (k=0; k<JIDs->tDataSize; k++) {
  809. // Bestimme Referenz auf k-tes Trainingsdokument
  810. jobject traindoc = (*env)->GetObjectArrayElement(env, *tdata, k);
  811. if (traindoc == NULL) {
  812. if (verbosity>2) {
  813. // nexus: debugging ..
  814. printf("\n\n Debugging: ----------------------------------------------- empty document %d! \n\n",k);
  815. }
  816. (*totdoc)--;
  817. ex = (*env)->ExceptionCheck(env);
  818. if (ex) {
  819. (*env)->ExceptionDescribe(env);
  820. (*env)->ExceptionClear(env);
  821. }
  822. continue;
  823. }
  824. // Lese das Label des k-ten Trainingsdokumentes ein
  825. jdouble label = (*env)->GetDoubleField(env, traindoc, JIDs->ID_double_label);
  826. //if ((label != 1.0) && (label != -1.0)) {
  827. // perror("\n\nTraining data with wrong label!\n\n");
  828. // exit(1);
  829. //}
  830. //if (doTest)
  831. // fprintf(test,"%s%lf ",label); fflush(stdout);
  832. // Bestimme Speicheradresse der Dimension/Wert-Arrays aus der
  833. // Java-Klasse des k-ten Trainingsdokumentes
  834. jintArray dim = (*env)->GetObjectField(env, traindoc, JIDs->ID_intArray_dimensions);
  835. jdoubleArray val = (*env)->GetObjectField(env, traindoc, JIDs->ID_doubleArray_values);
  836. if ( (dim && val)== 0) {
  837. perror("---------------------------------------------------------- Can't access Dim/Val-Arrays \n");
  838. exit(1);
  839. }
  840. // Bestimme die Groesse der Dimension/Wert-Arrays aus der
  841. // Java-Klasse des k-ten Trainingsdokumentes
  842. jsize dimLen = (*env)->GetArrayLength(env, dim);
  843. jsize valLen = (*env)->GetArrayLength(env, val);
  844. if ((dimLen != valLen) || (dimLen == 0)) {
  845. perror("---------------------------------------------------------- array length is zero or arrays are of different size!\n");
  846. exit(1);
  847. }
  848. // Referenziere Elemente aus den Java-Arrays aus
  849. jint *dimEl = (*env)->GetIntArrayElements(env, dim, 0);
  850. jdouble *valEl = (*env)->GetDoubleArrayElements(env, val, 0);
  851. int* ds;
  852. double *vs;
  853. if (sizeof(int) == sizeof(jint)) {
  854. ds = (int*) dimEl;
  855. } else {
  856. int fi=0;
  857. printf("!!!!!!!!!!!!!!! Warning: java datatype \"jint\" isn't of the same size as C datatype \"int\"\n");
  858. ds = (int*) my_malloc(sizeof(int)*dimLen);
  859. for(fi=0;fi<dimLen;fi++) {
  860. ds[fi] = (int) dimEl[fi];
  861. }
  862. }
  863. if (sizeof(double) == sizeof(jdouble)) {
  864. vs = (double*) valEl;
  865. } else {
  866. int fi=0;
  867. printf("!!!!!!!!!!!!!!! Warning: Java-Datatype (jdouble) isn't of the same size as C datatype");
  868. vs = (double*) my_malloc(sizeof(double)*valLen);
  869. for(fi=0;fi<valLen;fi++) {
  870. vs[fi] = (double) valEl[fi];
  871. }
  872. }
  873. /* int g=0;
  874. printf("%lf",label);
  875. for (g=0;g<dimLen;g++){
  876. printf(" %ld:%.16lf ",ds[g],vs[g]);
  877. }
  878. printf("\n%lf",label);
  879. for (g=0;g<dimLen;g++){
  880. printf(" %ld:%.16lf ",dimEl[g],valEl[g]);
  881. }
  882. exit(0); */
  883. // ------------------------------- fill DOCs --------------------------------------------
  884. if (dimLen>max_words_doc) {
  885. free(words);
  886. max_words_doc=dimLen;
  887. words = (WORD *)my_malloc(sizeof(WORD)*(dimLen+10));
  888. }
  889. // erstelle anhand der Werte aus label,Dimension- und Wert-Array eine Datenstruktur vom Typ DOC
  890. jinit_traindoc((double)label,docs,target,dimLen,totwords,totdoc, ds, vs, ndocuments, words, test);
  891. //jinit_traindoc((double)label,docs,target,dimLen,totwords,totdoc, ds, vs, ndocuments, words,NULL);
  892. (*ndocuments)++;
  893. // --------------------------------------------------------------------------------------
  894. // gebe Speicherplatz der Java-Arrays frei
  895. (*env)->ReleaseIntArrayElements(env,dim,dimEl,0);
  896. (*env)->ReleaseDoubleArrayElements(env,val,valEl,0);
  897. }
  898. free(words);
  899. if(verbosity>=1) {
  900. fprintf(stdout, "OK. (%ld examples read)\n", *ndocuments); fflush(stdout);
  901. }
  902. if (doTest)
  903. fclose(test);
  904. }
  905. void jinit_traindoc(double doc_label, DOC ***docs, double **label,
  906. long max_words_doc, long int *totwords, long int *totdoc, int* dims,
  907. double *vals, long* ndocuments, WORD* words, FILE* test)
  908. {
  909. char comment[1] = {'\0'};
  910. long dnum=0,wpos,dpos=0,dneg=0,dunlab=0,queryid,slackid;
  911. double costfactor;
  912. dnum = *ndocuments;
  913. if(!jparse_document(words,&queryid,&slackid,&costfactor,
  914. &wpos,max_words_doc,dims,vals)) {
  915. perror("\nParsing error in line !\n");
  916. exit(1);
  917. }
  918. int iw=0;
  919. while(iw<max_words_doc) {
  920. if (test != NULL) fprintf(test,"%ld:%.32g ",words[iw].wnum,words[iw].weight); fflush(stdout);
  921. iw++;
  922. }
  923. if (test != NULL) fprintf(test,"\n"); fflush(stdout);
  924. (*label)[dnum]=doc_label;
  925. /* printf("docnum=%ld: Class=%f ",dnum,doc_label); fflush(stdout); */
  926. if(doc_label > 0) dpos++;
  927. if (doc_label < 0) dneg++;
  928. if (doc_label == 0) dunlab++;
  929. if((wpos>1) && ((words[wpos-2]).wnum>(*totwords)))
  930. (*totwords)=(words[wpos-2]).wnum;
  931. if((*totwords) > MAXFEATNUM) {
  932. printf("\nMaximum feature number exceeds limit defined in MAXFEATNUM! (%ld>MAXFEATNUM:%ld)\n",*totwords,(long int)MAXFEATNUM);
  933. exit(1);
  934. }
  935. (*docs)[dnum] = create_example(dnum,queryid,slackid,costfactor,
  936. create_svector(words,comment,1.0));
  937. /* printf("\nNorm=%f\n",((*docs)[dnum]->fvec)->twonorm_sq); fflush(stdout); */
  938. if(verbosity>=1) {
  939. if((dnum % 20) == 0) {
  940. printf("%ld..",dnum); fflush(stdout);
  941. }
  942. if (dnum == (*totdoc)-1) {
  943. printf("%ld\n",dnum); fflush(stdout);
  944. }
  945. }
  946. }
  947. int jparse_document(WORD* words, long *queryid, long *slackid, double *costfactor, long int *numwords, long int max_words_doc, int *dims, double *vals) {
  948. register long wpos;
  949. long wnum;
  950. double weight;
  951. (*queryid)=0;
  952. (*slackid)=0;
  953. (*costfactor)=1;
  954. wpos=0;
  955. while(wpos<max_words_doc) {
  956. wnum = dims[wpos];
  957. weight = vals[wpos]; //(FVAL) vals[wpos];
  958. if(wnum<=0) {
  959. perror ("Feature numbers must be larger or equal to 1!!!\n");
  960. exit (1);
  961. }
  962. if((wpos>0) && ((words[wpos-1]).wnum >= wnum)) {
  963. perror ("Features must be in increasing order!!!\n");
  964. exit(1);
  965. }
  966. (words[wpos]).wnum=wnum;
  967. (words[wpos]).weight=(FVAL)weight;
  968. wpos++;
  969. }
  970. (words[wpos]).wnum=0;
  971. (*numwords)=wpos+1;
  972. return(1);
  973. }