PageRenderTime 50ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/nlp-project-code/joel/condor_scripts/classifier/gencon.test.py

https://gitlab.com/joeliven/domain-adaptation-sentiment-analysis
Python | 278 lines | 225 code | 16 blank | 37 comment | 3 complexity | d324c5eac62d4ec2573afafcfb5e8983 MD5 | raw file
  1. #############################################
  2. # python script to call numerous condor job scripts #
  3. #############################################
  4. import sys, os
  5. SCRATCH = "/scratch/cluster/joeliven/datasets/"
  6. HOME = "/u/joeliven/Documents/"
  7. universe = "universe=vanilla"
  8. Initialdir = "Initialdir=/u/joeliven/Documents"
  9. Group = """+Group=\"GRAD\""""
  10. Project = """+Project=\"INSTRUCTIONAL\""""
  11. ProjectDescription = """+ProjectDescription=\"CS388 Final Project\""""
  12. GPUJob = "+GPUJob=true"
  13. # requirements = "requirements=(TARGET.GPUSlot && TitanBlack == True)"
  14. requirements = "requirements=(TARGET.GPUSlot)"
  15. Notification = "Notification=complete"
  16. Notify_user = "Notify_user=joeliven@gmail.com"
  17. Queue="Queue 1"
  18. def write_condor_script(program, S, T, inv, vac, cpu_gpu, auto_type, c_type):
  19. name="%s.%s%s.%s" % (vac, S, T, c_type)
  20. savefile="condor.test." + name + ".sh"
  21. executable="ml/condor_scripts/" + vac + "/" + "run_test." + name + ".sh"
  22. with open(savefile, 'w') as f:
  23. Executable = "Executable = %s%s" % (HOME,executable)
  24. Log="Log=%samazon/logs/test.%s.log.$(Cluster)" % (SCRATCH, name)
  25. Output="Output=%samazon/logs/test.%s.out.$(Cluster)" % (SCRATCH, name)
  26. Error="Error=%samazon/logs/test.%s.err.$(Cluster)" % (SCRATCH, name)
  27. f.write(universe)
  28. f.write("\n")
  29. f.write(Initialdir)
  30. f.write("\n")
  31. f.write(Executable)
  32. f.write("\n")
  33. f.write(Group)
  34. f.write("\n")
  35. f.write(Project)
  36. f.write("\n")
  37. f.write(ProjectDescription)
  38. f.write("\n")
  39. f.write(GPUJob)
  40. f.write("\n")
  41. f.write(requirements)
  42. f.write("\n")
  43. f.write(Notification)
  44. f.write("\n")
  45. f.write(Notify_user)
  46. f.write("\n")
  47. f.write(Log)
  48. f.write("\n")
  49. f.write(Output)
  50. f.write("\n")
  51. f.write(Error)
  52. f.write("\n")
  53. f.write(Queue)
  54. f.write("\n")
  55. ################################################################3
  56. def write_bash_script(program, S, T, inv, vac, cpu_gpu, auto_type, c_type):
  57. savefile="run_test.%s.%s%s.%s.sh" % (vac, S, T, c_type)
  58. with open(savefile, 'w') as f:
  59. f.write('#!/usr/local/bin/bash\n')
  60. f.write('P="python"\n')
  61. f.write('PROGRAM="%s"\n' % program)
  62. if inv:
  63. f.write('TRAINING_DATA="%samazon/train/%s_%s/%s%s_%s%s.train"\n' % (SCRATCH, T, S, T, "0", S, "100")) # invert S&T to match names
  64. else:
  65. f.write('TRAINING_DATA="%samazon/train/%s_%s/%s%s_%s%s.train"\n' % (SCRATCH, S, T, S, "100", T, "0"))
  66. if inv:
  67. f.write('VAL_DATA="%samazon/train/%s_%s/%s%s_%s%s.val"\n' % (SCRATCH, T, S, T, "0", S, "100")) # invert S&T to match names
  68. else:
  69. f.write('VAL_DATA="%samazon/train/%s_%s/%s%s_%s%s.val"\n' % (SCRATCH, S, T, S, "100", T, "0"))
  70. f.write('TEST_DATA_SOURCE="%samazon/test/%s.test"\n' % (SCRATCH, S))
  71. f.write('TEST_DATA_TARGET="%samazon/test/%s.test"\n' % (SCRATCH, T))
  72. f.write('LOAD_MODEL_ARCH="%samazon/classifiers/%s/%s%s/%s%s.%s.arch.json"\n' % (SCRATCH, c_type, S, T, S, T, vac))
  73. f.write('LOAD_MODEL_WEIGHTS="%samazon/classifiers/%s/%s%s/%s%s.%s.weights.hdf5"\n' % (SCRATCH, c_type, S, T, S, T, vac))
  74. f.write('SAVE_MODEL_ARCH="%samazon/classifiers/%s/%s%s/%s%s.%s.arch.json"\n' % (SCRATCH, c_type, S, T, S, T, vac))
  75. f.write('SAVE_MODEL_WEIGHTS="%samazon/classifiers/%s/%s%s/%s%s.%s.weights.final.hdf5"\n' % (SCRATCH, c_type, S, T, S, T, vac))
  76. f.write('SAVE_CHECKPOINTS_WEIGHTS="%samazon/classifiers/%s/%s%s/%s%s.%s.weights.hdf5"\n' % (SCRATCH, c_type, S, T, S, T, vac))
  77. f.write('NUM_ENCODERS="3"\n')
  78. if inv:
  79. f.write('AUTOENCODER_ARCH_FILE1="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.arch.json"\n' % (SCRATCH, auto_type, T, S, T, "0", S, "100", "auto"))
  80. f.write('AUTOENCODER_WEIGHTS_FILE1="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.weights.hdf5"\n' % (SCRATCH, auto_type, T, S, T, "0", S, "100", "auto"))
  81. f.write('AUTOENCODER_ARCH_FILE2="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.arch.json"\n' % (SCRATCH, auto_type, T, S, T, "50", S, "50", "auto"))
  82. f.write('AUTOENCODER_WEIGHTS_FILE2="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.weights.hdf5"\n' % (SCRATCH, auto_type, T, S, T, "50", S, "50", "auto"))
  83. f.write('AUTOENCODER_ARCH_FILE3="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.arch.json"\n' % (SCRATCH, auto_type, T, S, T, "100", S, "0", "auto"))
  84. f.write('AUTOENCODER_WEIGHTS_FILE3="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.weights.hdf5"\n' % (SCRATCH, auto_type, T, S, T, "100", S, "0", "auto"))
  85. else:
  86. f.write('AUTOENCODER_ARCH_FILE1="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.arch.json"\n' % (SCRATCH, auto_type, S, T, S, "100", T, "0", "auto"))
  87. f.write('AUTOENCODER_WEIGHTS_FILE1="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.weights.hdf5"\n' % (SCRATCH, auto_type, S, T, S, "100", T, "0", "auto"))
  88. f.write('AUTOENCODER_ARCH_FILE2="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.arch.json"\n' % (SCRATCH, auto_type, S, T, S, "50", T, "50", "auto"))
  89. f.write('AUTOENCODER_WEIGHTS_FILE2="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.weights.hdf5"\n' % (SCRATCH, auto_type, S, T, S, "50", T, "50", "auto"))
  90. f.write('AUTOENCODER_ARCH_FILE3="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.arch.json"\n' % (SCRATCH, auto_type, S, T, S, "0", T, "100", "auto"))
  91. f.write('AUTOENCODER_WEIGHTS_FILE3="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.weights.hdf5"\n' % (SCRATCH, auto_type, S, T, S, "0", T, "100", "auto"))
  92. f.write('WORD2VEC_D="300"\n')
  93. f.write('SENT2VEC_D="128"\n')
  94. f.write('REV2VEC_D="64"\n')
  95. f.write('VOCAB_SIZE="20000"\n')
  96. f.write('BATCH_SIZE="32"\n')
  97. f.write('N_EPOCHS="50"\n')
  98. # f.write('N_EPOCHS="2"\n')
  99. f.write('EPOCH_SIZE="32000"\n')
  100. # f.write('EPOCH_SIZE="640"\n')
  101. f.write('MAXLEN_SENT="20"\n')
  102. # f.write('MAXLEN_REV="8"\n')
  103. f.write('MAXLEN_REV="5"\n')
  104. f.write('OPTIMIZER="adam"\n')
  105. f.write('LR_START="0.001"\n')
  106. f.write('LR_END="0.001"\n')
  107. f.write('LR_HYP="0.001"\n')
  108. f.write('DROPOUT_LSTM_U="0.1"\n')
  109. f.write('DROPOUT_LSTM_W="0.1"\n')
  110. f.write('DENSE_L2_W="0.0005"\n')
  111. f.write('DENSE_L2_B="0.0005"\n')
  112. # f.write('VAL_BATCHES="5"\n')
  113. f.write('VAL_BATCHES="10"\n')
  114. # f.write('TEST_BATCHES="100"\n')
  115. f.write('TEST_BATCHES="20"\n')
  116. f.write('GTOR_LIM="100000"\n')
  117. f.write('GTOR_LIM_TEST="10000"\n')
  118. if inv:
  119. f.write('IDX2V="%samazon/vocab/%s_%s/%s_%s_idx2v.npy"\n' % (SCRATCH, T, S, T, S))
  120. f.write('IDX2W="%samazon/vocab/%s_%s/%s_%s_idx2w.list.pickle"\n' % (SCRATCH, T, S, T, S))
  121. f.write('W2V="%samazon/vocab/%s_%s/%s_%s_w2v.dict.pickle"\n' % (SCRATCH, T, S, T, S))
  122. f.write('W2IDX="%samazon/vocab/%s_%s/%s_%s_w2idx.dict.pickle"\n' % (SCRATCH, T, S, T, S))
  123. f.write('VOCAB_LIST="%samazon/vocab/%s_%s/%s_%s_vocab.list.pickle"\n' % (SCRATCH, T, S, T, S))
  124. else:
  125. f.write('IDX2V="%samazon/vocab/%s_%s/%s_%s_idx2v.npy"\n' % (SCRATCH, S, T, S, T))
  126. f.write('IDX2W="%samazon/vocab/%s_%s/%s_%s_idx2w.list.pickle"\n' % (SCRATCH, S, T, S, T))
  127. f.write('W2V="%samazon/vocab/%s_%s/%s_%s_w2v.dict.pickle"\n' % (SCRATCH, S, T, S, T))
  128. f.write('W2IDX="%samazon/vocab/%s_%s/%s_%s_w2idx.dict.pickle"\n' % (SCRATCH, S, T, S, T))
  129. f.write('VOCAB_LIST="%samazon/vocab/%s_%s/%s_%s_vocab.list.pickle"\n' % (SCRATCH, S, T, S, T))
  130. f.write('EXPID="%s.%s%s"\n' % (vac, S, T))
  131. f.write("#############################\n# SET UP ENVIRONMENT\n#############################\n")
  132. f.write('export PATH="/u/joeliven/anaconda3/bin:$PATH"\n')
  133. f.write('PYTHONPATH="${PYTHONPATH}:/u/joeliven/Documents/ml/"\n')
  134. f.write('export PYTHONPATH\n')
  135. f.write('echo "path: "\n')
  136. f.write('echo $PATH\n')
  137. f.write('echo "pythonpath: "\n')
  138. f.write('echo $PYTHONPATH\n')
  139. f.write('source activate ml34\n')
  140. f.write('cuda=/opt/cuda-7.5\n')
  141. f.write('# cuDNN=/u/ebanner/builds/cudnn-7.0-linux-x64-v3.0-prod\n')
  142. f.write('# export LD_LIBRARY_PATH=$cuDNN/lib64:$cuda/lib64:$LD_LIBRARY_PATH\n')
  143. f.write('# cuDNN=/u/ebanner/builds/cudnn-7.0-linux-x64-v3.0-prod\n')
  144. f.write('# export CPATH=$cuDNN/include:$CPATH\n')
  145. f.write('# export LIBRARY_PATH=$cuDNN/lib64:$LD_LIBRARY_PATH\n')
  146. f.write('# export CUDNN_PATH=$cuDNN\n')
  147. f.write('export LD_LIBRARY_PATH=$cuda/lib64:$LD_LIBRARY_PATH\n')
  148. f.write('export LIBRARY_PATH=$LD_LIBRARY_PATH\n')
  149. f.write('# export CUDNN_PATH=$cuDNN\n')
  150. f.write('export CUDA_HOME=$cuda\n')
  151. f.write('echo "pwd: "\n')
  152. f.write('pwd\n')
  153. f.write("#############################\n# EXECUTION\n#############################\n")
  154. # f.write('time "$P" "$PROGRAM" \\ \n')
  155. f.write('THEANO_FLAGS=device=%s,floatX=float32 time "$P" "$PROGRAM" \\\n' %(cpu_gpu))
  156. f.write('--verbose \\\n')
  157. f.write('--gpu \\\n')
  158. f.write('--uses-embeddings \\\n')
  159. # f.write('--train \\\n')
  160. # f.write('--training-data "$TRAINING_DATA" \\\n')
  161. # f.write('--val-data "$VAL_DATA" \\\n')
  162. f.write('--test \\\n')
  163. f.write('--test-data-source "$TEST_DATA_SOURCE" \\\n')
  164. f.write('--test-data-target "$TEST_DATA_TARGET" \\\n')
  165. f.write('--load-model \\\n')
  166. f.write('--load-model-arch "$LOAD_MODEL_ARCH" \\\n')
  167. f.write('--load-model-weights "$LOAD_MODEL_WEIGHTS" \\\n')
  168. f.write('--num-encoders "$NUM_ENCODERS" \\\n')
  169. f.write('--autoencoder-arch "$AUTOENCODER_ARCH_FILE1" \\\n')
  170. f.write('--autoencoder-weights "$AUTOENCODER_WEIGHTS_FILE1" \\\n')
  171. f.write('--autoencoder-arch "$AUTOENCODER_ARCH_FILE2" \\\n')
  172. f.write('--autoencoder-weights "$AUTOENCODER_WEIGHTS_FILE2" \\\n')
  173. f.write('--autoencoder-arch "$AUTOENCODER_ARCH_FILE3" \\\n')
  174. f.write('--autoencoder-weights "$AUTOENCODER_WEIGHTS_FILE3" \\\n')
  175. # f.write('--save-model \\\n')
  176. # f.write('--save-model-arch "$SAVE_MODEL_ARCH" \\\n')
  177. # f.write('--save-model-weights "$SAVE_MODEL_WEIGHTS" \\\n')
  178. # f.write('--save-checkpoints \\\n')
  179. # f.write('--save-checkpoints-weights "$SAVE_CHECKPOINTS_WEIGHTS" \\\n')
  180. f.write('--word2v-d "$WORD2VEC_D" \\\n')
  181. f.write('--sent2v-d "$SENT2VEC_D" \\\n')
  182. f.write('--rev2v-d "$REV2VEC_D" \\\n')
  183. f.write('--vocab-size "$VOCAB_SIZE" \\\n')
  184. f.write('--batch-size "$BATCH_SIZE" \\\n')
  185. f.write('--n-epochs "$N_EPOCHS" \\\n')
  186. f.write('--epoch-size "$EPOCH_SIZE" \\\n')
  187. f.write('--maxlen-sent "$MAXLEN_SENT" \\\n')
  188. f.write('--maxlen-rev "$MAXLEN_REV" \\\n')
  189. f.write('--optimizer "$OPTIMIZER" \\\n')
  190. f.write('--lr-start "$LR_START" \\\n')
  191. f.write('--lr-end "$LR_END" \\\n')
  192. f.write('--lr-hyp "$LR_HYP" \\\n')
  193. f.write('--dropout-lstm-w "$DROPOUT_LSTM_W" \\\n')
  194. f.write('--dropout-lstm-u "$DROPOUT_LSTM_U" \\\n')
  195. f.write('--dense-l2-w "$DENSE_L2_W" \\\n')
  196. f.write('--dense-l2-b "$DENSE_L2_B" \\\n')
  197. f.write('--val-batches "$VAL_BATCHES" \\\n')
  198. f.write('--test-batches "$TEST_BATCHES" \\\n')
  199. f.write('--gtor-lim "$GTOR_LIM" \\\n')
  200. f.write('--gtor-lim-test "$GTOR_LIM_TEST" \\\n')
  201. f.write('--idx2v "$IDX2V" \\\n')
  202. f.write('--idx2w "$IDX2W" \\\n')
  203. f.write('--w2idx "$W2IDX" \\\n')
  204. f.write('--w2v "$W2V" \\\n')
  205. f.write('--vocab-list "$VOCAB_LIST" \\\n')
  206. f.write('--expID "$EXPID"\n')
  207. f.write('source deactivate\n')
  208. ##################################################################
  209. ##################################################################
  210. # BE
  211. # EB
  212. # BM
  213. # MB
  214. # EM
  215. # ME
  216. # S_T = [
  217. # ("B","E","B","E"),
  218. # ("E","B","B","E"),
  219. # ("B","M","B","M"),
  220. # ("M","B","B","M"),
  221. # ("E","M","E","M"),
  222. # ("M","E","B","M"),
  223. # ]
  224. S_T = [
  225. ("B","E", False),
  226. ("E","B", True),
  227. ("B","M", False),
  228. ("M","B", True),
  229. ("E","M", False),
  230. ("M","E", True),
  231. ]
  232. # Spct_Tpct = [
  233. # ("100","0"),
  234. # ("50","50"),
  235. # ("0","100"),
  236. # ]
  237. program="ml/nlp_proj/lstm_classifier_mse.py"
  238. # Spct="100"
  239. # Tpct="0"
  240. vac="classifier"
  241. cpu_gpu="gpu"
  242. auto_type="mse128"
  243. c_type="mse128_64"
  244. params = [[program, s,t,inv, vac, cpu_gpu, auto_type, c_type] for (s,t,inv) in S_T]
  245. for args in params:
  246. write_condor_script(*args)
  247. for i,args in enumerate(params):
  248. write_bash_script(*args)
  249. # print("i:%d"% i)
  250. # print(args)