/python_modules/plearn/learners/modulelearners/plugNetwork2SVM.py

https://github.com/lisa-lab/PLearn · Python · 121 lines · 75 code · 23 blank · 23 comment · 13 complexity · c9e1fd5ab01ab4cfeecd93d0990e48bb MD5 · raw file

  1. import sys, os, os.path
  2. from plearn.learners.modulelearners import *
  3. from plearn.learners.SVM import *
  4. if __name__ == '__main__':
  5. if len(sys.argv) <= 5:
  6. print "Usage:\n\tpython "+sys.argv[0]+" learner train_data valid_data test_data ports_list\n"
  7. print "Example:\n\tpython "+sys.argv[0]+" 3layer_DBN.psave trainData.dmat validData.dmat testData.dmat rbm3.hidden.state [rbm2.hidden.state ...]\n"
  8. sys.exit(0)
  9. learner_filename = sys.argv[1]
  10. dataTrain_filename = sys.argv[2]
  11. dataValid_filename = sys.argv[3]
  12. dataTest_filename = sys.argv[4]
  13. ports_list = sys.argv[5:]
  14. if os.path.isfile(learner_filename) == False and os.path.isdir(learner_filename) == False:
  15. raise EOFError, "ERROR : Learner file cannot be find\n\tCould not find file "+learner_filename
  16. learner = loadModuleLearner(learner_filename)
  17. learner_nickname = os.path.basename(learner_filename)+"_".join(ports_list).replace(".","")
  18. result_dir = os.path.dirname(learner_filename)
  19. output_filename = result_dir+'/SVM_results_'+"_"+learner_nickname+"-"+os.path.basename(dataTrain_filename).replace(".vmat","").replace(".amat","")
  20. print
  21. print "Results will be written in "+output_filename
  22. # #
  23. # MAIN PART #
  24. # #
  25. new_learner = plug2output( learner, ports_list)
  26. for typeDataSet in ['Train','Valid','Test']:
  27. data_filename = globals()['data'+typeDataSet+'_filename']
  28. if os.path.isfile(data_filename) == False and os.path.isdir(data_filename) == False:
  29. raise EOFError, "Could not find "+data_filename
  30. sys.exit(0)
  31. dataSet = pl.AutoVMatrix( filename = data_filename )
  32. print "CONVERSION "+data_filename
  33. globals()[typeDataSet+'_outputs'], globals()[typeDataSet+'_targets'] = computeOutputsTargets( new_learner, dataSet)
  34. #
  35. # Normalizing the data (/!\ compute statistics on the training data and assumes it comes first)
  36. #
  37. if typeDataSet == 'Train':
  38. mean, std = normalize(globals()[typeDataSet+'_outputs'],None,None)
  39. else:
  40. normalize(globals()[typeDataSet+'_outputs'],mean,std)
  41. my_SVM = SVM()
  42. print "Writing results in "+output_filename
  43. if os.path.isfile(output_filename):
  44. print "WARNING : output "+output_filename+" already exists"
  45. FID = open(output_filename, 'a')
  46. abspath = os.path.realpath(learner_filename)
  47. FID.write('LEARNER.: '+abspath+'\n')
  48. for i in range(3):
  49. abspath = os.path.dirname(abspath)
  50. global_results = abspath+'/global_stats.pmat'
  51. if os.path.isfile(global_results):
  52. os.system("echo baseline test error rate : `plearn vmat cat "+global_results+" | tail -1 | awk '{print $NF}'` \% >> "+output_filename )
  53. else:
  54. print "WARNING : could not find global_stats.pmat\n\t( "+abspath+"/global_stats.pmat )"
  55. FID.write('Train...: '+os.path.realpath(dataTrain_filename)+'\n')
  56. FID.write('Valid...: '+os.path.realpath(dataValid_filename)+'\n')
  57. FID.write('Test....: '+os.path.realpath(dataTest_filename)+'\n')
  58. FID.close()
  59. # A log file where all the intermediate results will be stored
  60. my_SVM.save_filename = output_filename
  61. # Trying the linear kernel
  62. # with several values for C (i.e. bias-variance trade-off in SVM)
  63. #
  64. my_SVM.train_and_tune( 'LINEAR' , [[Train_outputs,Train_targets], [Valid_outputs,Valid_targets] ])
  65. best_valid_error_rate = my_SVM.valid_error_rate
  66. print
  67. print "Tried parameters : "+str(my_SVM.tried_parameters)
  68. print 'BEST ERROR RATE: '+str(best_valid_error_rate)+' (valid) for '+str(my_SVM.best_parameters)
  69. # Trying the RBF (Gaussian) kernel
  70. # with several values for C and 'gamma' (kernel width)
  71. #
  72. # my_SVM.train_and_tune( 'RBF' , [[Train_outputs,Train_targets], [Valid_outputs,Valid_targets] ])
  73. best_valid_error_rate = my_SVM.valid_error_rate
  74. print
  75. print "Tried parameters : "+str(my_SVM.tried_parameters)
  76. print 'BEST ERROR RATE: '+str(best_valid_error_rate)+' (valid) for '+str(my_SVM.best_parameters)
  77. # Trying the RBF kernel once more
  78. # i.e. more precise tuning
  79. # with more values for C and 'gamma' (kernel width)
  80. #
  81. # my_SVM.train_and_tune( 'RBF' , [[Train_outputs,Train_targets], [Valid_outputs,Valid_targets] ])
  82. best_valid_error_rate = my_SVM.valid_error_rate
  83. print
  84. print "Tried parameters : "+str(my_SVM.tried_parameters)
  85. print 'BEST ERROR RATE: '+str(best_valid_error_rate)+' (valid) for '+str(my_SVM.best_parameters)
  86. # Trying the polynomial kernel
  87. # with several values for C and the degree
  88. #
  89. # my_SVM.train_and_tune( 'POLY' , [[Train_outputs,Train_targets], [Valid_outputs,Valid_targets] ])
  90. best_valid_error_rate = my_SVM.valid_error_rate
  91. print
  92. print "Tried parameters : "+str(my_SVM.tried_parameters)
  93. print 'BEST ERROR RATE: '+str(best_valid_error_rate)+' (valid) for '+str(my_SVM.best_parameters)
  94. my_SVM.test( [Test_outputs,Test_targets] )
  95. test_error_rate = my_SVM.error_rate
  96. print "Test ERROR RATE with best model : "+str(test_error_rate)
  97. print
  98. print "Results written in "+output_filename