/nlp-project-code/joel/condor_scripts/classifier/gencon.test.py
Python | 278 lines | 225 code | 16 blank | 37 comment | 3 complexity | d324c5eac62d4ec2573afafcfb5e8983 MD5 | raw file
- #############################################
- # python script to call numerous condor job scripts #
- #############################################
- import sys, os
- SCRATCH = "/scratch/cluster/joeliven/datasets/"
- HOME = "/u/joeliven/Documents/"
- universe = "universe=vanilla"
- Initialdir = "Initialdir=/u/joeliven/Documents"
- Group = """+Group=\"GRAD\""""
- Project = """+Project=\"INSTRUCTIONAL\""""
- ProjectDescription = """+ProjectDescription=\"CS388 Final Project\""""
- GPUJob = "+GPUJob=true"
- # requirements = "requirements=(TARGET.GPUSlot && TitanBlack == True)"
- requirements = "requirements=(TARGET.GPUSlot)"
- Notification = "Notification=complete"
- Notify_user = "Notify_user=joeliven@gmail.com"
- Queue="Queue 1"
- def write_condor_script(program, S, T, inv, vac, cpu_gpu, auto_type, c_type):
- name="%s.%s%s.%s" % (vac, S, T, c_type)
- savefile="condor.test." + name + ".sh"
- executable="ml/condor_scripts/" + vac + "/" + "run_test." + name + ".sh"
- with open(savefile, 'w') as f:
- Executable = "Executable = %s%s" % (HOME,executable)
- Log="Log=%samazon/logs/test.%s.log.$(Cluster)" % (SCRATCH, name)
- Output="Output=%samazon/logs/test.%s.out.$(Cluster)" % (SCRATCH, name)
- Error="Error=%samazon/logs/test.%s.err.$(Cluster)" % (SCRATCH, name)
- f.write(universe)
- f.write("\n")
- f.write(Initialdir)
- f.write("\n")
- f.write(Executable)
- f.write("\n")
- f.write(Group)
- f.write("\n")
- f.write(Project)
- f.write("\n")
- f.write(ProjectDescription)
- f.write("\n")
- f.write(GPUJob)
- f.write("\n")
- f.write(requirements)
- f.write("\n")
- f.write(Notification)
- f.write("\n")
- f.write(Notify_user)
- f.write("\n")
- f.write(Log)
- f.write("\n")
- f.write(Output)
- f.write("\n")
- f.write(Error)
- f.write("\n")
- f.write(Queue)
- f.write("\n")
- ################################################################3
- def write_bash_script(program, S, T, inv, vac, cpu_gpu, auto_type, c_type):
- savefile="run_test.%s.%s%s.%s.sh" % (vac, S, T, c_type)
- with open(savefile, 'w') as f:
- f.write('#!/usr/local/bin/bash\n')
- f.write('P="python"\n')
- f.write('PROGRAM="%s"\n' % program)
- if inv:
- f.write('TRAINING_DATA="%samazon/train/%s_%s/%s%s_%s%s.train"\n' % (SCRATCH, T, S, T, "0", S, "100")) # invert S&T to match names
- else:
- f.write('TRAINING_DATA="%samazon/train/%s_%s/%s%s_%s%s.train"\n' % (SCRATCH, S, T, S, "100", T, "0"))
- if inv:
- f.write('VAL_DATA="%samazon/train/%s_%s/%s%s_%s%s.val"\n' % (SCRATCH, T, S, T, "0", S, "100")) # invert S&T to match names
- else:
- f.write('VAL_DATA="%samazon/train/%s_%s/%s%s_%s%s.val"\n' % (SCRATCH, S, T, S, "100", T, "0"))
- f.write('TEST_DATA_SOURCE="%samazon/test/%s.test"\n' % (SCRATCH, S))
- f.write('TEST_DATA_TARGET="%samazon/test/%s.test"\n' % (SCRATCH, T))
- f.write('LOAD_MODEL_ARCH="%samazon/classifiers/%s/%s%s/%s%s.%s.arch.json"\n' % (SCRATCH, c_type, S, T, S, T, vac))
- f.write('LOAD_MODEL_WEIGHTS="%samazon/classifiers/%s/%s%s/%s%s.%s.weights.hdf5"\n' % (SCRATCH, c_type, S, T, S, T, vac))
- f.write('SAVE_MODEL_ARCH="%samazon/classifiers/%s/%s%s/%s%s.%s.arch.json"\n' % (SCRATCH, c_type, S, T, S, T, vac))
- f.write('SAVE_MODEL_WEIGHTS="%samazon/classifiers/%s/%s%s/%s%s.%s.weights.final.hdf5"\n' % (SCRATCH, c_type, S, T, S, T, vac))
- f.write('SAVE_CHECKPOINTS_WEIGHTS="%samazon/classifiers/%s/%s%s/%s%s.%s.weights.hdf5"\n' % (SCRATCH, c_type, S, T, S, T, vac))
- f.write('NUM_ENCODERS="3"\n')
- if inv:
- f.write('AUTOENCODER_ARCH_FILE1="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.arch.json"\n' % (SCRATCH, auto_type, T, S, T, "0", S, "100", "auto"))
- f.write('AUTOENCODER_WEIGHTS_FILE1="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.weights.hdf5"\n' % (SCRATCH, auto_type, T, S, T, "0", S, "100", "auto"))
- f.write('AUTOENCODER_ARCH_FILE2="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.arch.json"\n' % (SCRATCH, auto_type, T, S, T, "50", S, "50", "auto"))
- f.write('AUTOENCODER_WEIGHTS_FILE2="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.weights.hdf5"\n' % (SCRATCH, auto_type, T, S, T, "50", S, "50", "auto"))
- f.write('AUTOENCODER_ARCH_FILE3="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.arch.json"\n' % (SCRATCH, auto_type, T, S, T, "100", S, "0", "auto"))
- f.write('AUTOENCODER_WEIGHTS_FILE3="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.weights.hdf5"\n' % (SCRATCH, auto_type, T, S, T, "100", S, "0", "auto"))
- else:
- f.write('AUTOENCODER_ARCH_FILE1="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.arch.json"\n' % (SCRATCH, auto_type, S, T, S, "100", T, "0", "auto"))
- f.write('AUTOENCODER_WEIGHTS_FILE1="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.weights.hdf5"\n' % (SCRATCH, auto_type, S, T, S, "100", T, "0", "auto"))
- f.write('AUTOENCODER_ARCH_FILE2="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.arch.json"\n' % (SCRATCH, auto_type, S, T, S, "50", T, "50", "auto"))
- f.write('AUTOENCODER_WEIGHTS_FILE2="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.weights.hdf5"\n' % (SCRATCH, auto_type, S, T, S, "50", T, "50", "auto"))
- f.write('AUTOENCODER_ARCH_FILE3="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.arch.json"\n' % (SCRATCH, auto_type, S, T, S, "0", T, "100", "auto"))
- f.write('AUTOENCODER_WEIGHTS_FILE3="%samazon/autoencoders/%s/%s%s/%s%s_%s%s.%s.weights.hdf5"\n' % (SCRATCH, auto_type, S, T, S, "0", T, "100", "auto"))
- f.write('WORD2VEC_D="300"\n')
- f.write('SENT2VEC_D="128"\n')
- f.write('REV2VEC_D="64"\n')
- f.write('VOCAB_SIZE="20000"\n')
- f.write('BATCH_SIZE="32"\n')
- f.write('N_EPOCHS="50"\n')
- # f.write('N_EPOCHS="2"\n')
- f.write('EPOCH_SIZE="32000"\n')
- # f.write('EPOCH_SIZE="640"\n')
- f.write('MAXLEN_SENT="20"\n')
- # f.write('MAXLEN_REV="8"\n')
- f.write('MAXLEN_REV="5"\n')
- f.write('OPTIMIZER="adam"\n')
- f.write('LR_START="0.001"\n')
- f.write('LR_END="0.001"\n')
- f.write('LR_HYP="0.001"\n')
- f.write('DROPOUT_LSTM_U="0.1"\n')
- f.write('DROPOUT_LSTM_W="0.1"\n')
- f.write('DENSE_L2_W="0.0005"\n')
- f.write('DENSE_L2_B="0.0005"\n')
- # f.write('VAL_BATCHES="5"\n')
- f.write('VAL_BATCHES="10"\n')
- # f.write('TEST_BATCHES="100"\n')
- f.write('TEST_BATCHES="20"\n')
- f.write('GTOR_LIM="100000"\n')
- f.write('GTOR_LIM_TEST="10000"\n')
- if inv:
- f.write('IDX2V="%samazon/vocab/%s_%s/%s_%s_idx2v.npy"\n' % (SCRATCH, T, S, T, S))
- f.write('IDX2W="%samazon/vocab/%s_%s/%s_%s_idx2w.list.pickle"\n' % (SCRATCH, T, S, T, S))
- f.write('W2V="%samazon/vocab/%s_%s/%s_%s_w2v.dict.pickle"\n' % (SCRATCH, T, S, T, S))
- f.write('W2IDX="%samazon/vocab/%s_%s/%s_%s_w2idx.dict.pickle"\n' % (SCRATCH, T, S, T, S))
- f.write('VOCAB_LIST="%samazon/vocab/%s_%s/%s_%s_vocab.list.pickle"\n' % (SCRATCH, T, S, T, S))
- else:
- f.write('IDX2V="%samazon/vocab/%s_%s/%s_%s_idx2v.npy"\n' % (SCRATCH, S, T, S, T))
- f.write('IDX2W="%samazon/vocab/%s_%s/%s_%s_idx2w.list.pickle"\n' % (SCRATCH, S, T, S, T))
- f.write('W2V="%samazon/vocab/%s_%s/%s_%s_w2v.dict.pickle"\n' % (SCRATCH, S, T, S, T))
- f.write('W2IDX="%samazon/vocab/%s_%s/%s_%s_w2idx.dict.pickle"\n' % (SCRATCH, S, T, S, T))
- f.write('VOCAB_LIST="%samazon/vocab/%s_%s/%s_%s_vocab.list.pickle"\n' % (SCRATCH, S, T, S, T))
- f.write('EXPID="%s.%s%s"\n' % (vac, S, T))
- f.write("#############################\n# SET UP ENVIRONMENT\n#############################\n")
- f.write('export PATH="/u/joeliven/anaconda3/bin:$PATH"\n')
- f.write('PYTHONPATH="${PYTHONPATH}:/u/joeliven/Documents/ml/"\n')
- f.write('export PYTHONPATH\n')
- f.write('echo "path: "\n')
- f.write('echo $PATH\n')
- f.write('echo "pythonpath: "\n')
- f.write('echo $PYTHONPATH\n')
- f.write('source activate ml34\n')
- f.write('cuda=/opt/cuda-7.5\n')
- f.write('# cuDNN=/u/ebanner/builds/cudnn-7.0-linux-x64-v3.0-prod\n')
- f.write('# export LD_LIBRARY_PATH=$cuDNN/lib64:$cuda/lib64:$LD_LIBRARY_PATH\n')
- f.write('# cuDNN=/u/ebanner/builds/cudnn-7.0-linux-x64-v3.0-prod\n')
- f.write('# export CPATH=$cuDNN/include:$CPATH\n')
- f.write('# export LIBRARY_PATH=$cuDNN/lib64:$LD_LIBRARY_PATH\n')
- f.write('# export CUDNN_PATH=$cuDNN\n')
- f.write('export LD_LIBRARY_PATH=$cuda/lib64:$LD_LIBRARY_PATH\n')
- f.write('export LIBRARY_PATH=$LD_LIBRARY_PATH\n')
- f.write('# export CUDNN_PATH=$cuDNN\n')
- f.write('export CUDA_HOME=$cuda\n')
- f.write('echo "pwd: "\n')
- f.write('pwd\n')
- f.write("#############################\n# EXECUTION\n#############################\n")
- # f.write('time "$P" "$PROGRAM" \\ \n')
- f.write('THEANO_FLAGS=device=%s,floatX=float32 time "$P" "$PROGRAM" \\\n' %(cpu_gpu))
- f.write('--verbose \\\n')
- f.write('--gpu \\\n')
- f.write('--uses-embeddings \\\n')
- # f.write('--train \\\n')
- # f.write('--training-data "$TRAINING_DATA" \\\n')
- # f.write('--val-data "$VAL_DATA" \\\n')
- f.write('--test \\\n')
- f.write('--test-data-source "$TEST_DATA_SOURCE" \\\n')
- f.write('--test-data-target "$TEST_DATA_TARGET" \\\n')
- f.write('--load-model \\\n')
- f.write('--load-model-arch "$LOAD_MODEL_ARCH" \\\n')
- f.write('--load-model-weights "$LOAD_MODEL_WEIGHTS" \\\n')
- f.write('--num-encoders "$NUM_ENCODERS" \\\n')
- f.write('--autoencoder-arch "$AUTOENCODER_ARCH_FILE1" \\\n')
- f.write('--autoencoder-weights "$AUTOENCODER_WEIGHTS_FILE1" \\\n')
- f.write('--autoencoder-arch "$AUTOENCODER_ARCH_FILE2" \\\n')
- f.write('--autoencoder-weights "$AUTOENCODER_WEIGHTS_FILE2" \\\n')
- f.write('--autoencoder-arch "$AUTOENCODER_ARCH_FILE3" \\\n')
- f.write('--autoencoder-weights "$AUTOENCODER_WEIGHTS_FILE3" \\\n')
- # f.write('--save-model \\\n')
- # f.write('--save-model-arch "$SAVE_MODEL_ARCH" \\\n')
- # f.write('--save-model-weights "$SAVE_MODEL_WEIGHTS" \\\n')
- # f.write('--save-checkpoints \\\n')
- # f.write('--save-checkpoints-weights "$SAVE_CHECKPOINTS_WEIGHTS" \\\n')
- f.write('--word2v-d "$WORD2VEC_D" \\\n')
- f.write('--sent2v-d "$SENT2VEC_D" \\\n')
- f.write('--rev2v-d "$REV2VEC_D" \\\n')
- f.write('--vocab-size "$VOCAB_SIZE" \\\n')
- f.write('--batch-size "$BATCH_SIZE" \\\n')
- f.write('--n-epochs "$N_EPOCHS" \\\n')
- f.write('--epoch-size "$EPOCH_SIZE" \\\n')
- f.write('--maxlen-sent "$MAXLEN_SENT" \\\n')
- f.write('--maxlen-rev "$MAXLEN_REV" \\\n')
- f.write('--optimizer "$OPTIMIZER" \\\n')
- f.write('--lr-start "$LR_START" \\\n')
- f.write('--lr-end "$LR_END" \\\n')
- f.write('--lr-hyp "$LR_HYP" \\\n')
- f.write('--dropout-lstm-w "$DROPOUT_LSTM_W" \\\n')
- f.write('--dropout-lstm-u "$DROPOUT_LSTM_U" \\\n')
- f.write('--dense-l2-w "$DENSE_L2_W" \\\n')
- f.write('--dense-l2-b "$DENSE_L2_B" \\\n')
- f.write('--val-batches "$VAL_BATCHES" \\\n')
- f.write('--test-batches "$TEST_BATCHES" \\\n')
- f.write('--gtor-lim "$GTOR_LIM" \\\n')
- f.write('--gtor-lim-test "$GTOR_LIM_TEST" \\\n')
- f.write('--idx2v "$IDX2V" \\\n')
- f.write('--idx2w "$IDX2W" \\\n')
- f.write('--w2idx "$W2IDX" \\\n')
- f.write('--w2v "$W2V" \\\n')
- f.write('--vocab-list "$VOCAB_LIST" \\\n')
- f.write('--expID "$EXPID"\n')
- f.write('source deactivate\n')
- ##################################################################
- ##################################################################
- # BE
- # EB
- # BM
- # MB
- # EM
- # ME
- # S_T = [
- # ("B","E","B","E"),
- # ("E","B","B","E"),
- # ("B","M","B","M"),
- # ("M","B","B","M"),
- # ("E","M","E","M"),
- # ("M","E","B","M"),
- # ]
- S_T = [
- ("B","E", False),
- ("E","B", True),
- ("B","M", False),
- ("M","B", True),
- ("E","M", False),
- ("M","E", True),
- ]
- # Spct_Tpct = [
- # ("100","0"),
- # ("50","50"),
- # ("0","100"),
- # ]
- program="ml/nlp_proj/lstm_classifier_mse.py"
- # Spct="100"
- # Tpct="0"
- vac="classifier"
- cpu_gpu="gpu"
- auto_type="mse128"
- c_type="mse128_64"
- params = [[program, s,t,inv, vac, cpu_gpu, auto_type, c_type] for (s,t,inv) in S_T]
- for args in params:
- write_condor_script(*args)
- for i,args in enumerate(params):
- write_bash_script(*args)
- # print("i:%d"% i)
- # print(args)