| WORKDIR="your_CodeT5_path/CodeT5" | |
| export PYTHONPATH=$WORKDIR | |
| TASK=${1} | |
| SUB_TASK=${2} | |
| MODEL_TAG=${3} | |
| GPU=${4} | |
| DATA_NUM=${5} | |
| BS=${6} | |
| LR=${7} | |
| SRC_LEN=${8} | |
| TRG_LEN=${9} | |
| PATIENCE=${10} | |
| EPOCH=${11} | |
| WARMUP=${12} | |
| MODEL_DIR=${13} | |
| SUMMARY_DIR=${14} | |
| RES_FN=${15} | |
| if [[ $DATA_NUM == -1 ]]; then | |
| DATA_TAG='all' | |
| else | |
| DATA_TAG=$DATA_NUM | |
| EPOCH=1 | |
| fi | |
| if [[ ${TASK} == 'multi_task' ]]; then | |
| FULL_MODEL_TAG=${MODEL_TAG}_${DATA_TAG}_lr${LR}_s${16} | |
| else | |
| FULL_MODEL_TAG=${MODEL_TAG}_${DATA_TAG}_lr${LR}_bs${BS}_src${SRC_LEN}_trg${TRG_LEN}_pat${PATIENCE}_e${EPOCH} | |
| fi | |
| if [[ ${SUB_TASK} == none ]]; then | |
| OUTPUT_DIR=${MODEL_DIR}/${TASK}/${FULL_MODEL_TAG} | |
| else | |
| OUTPUT_DIR=${MODEL_DIR}/${TASK}/${SUB_TASK}/${FULL_MODEL_TAG} | |
| fi | |
| CACHE_DIR=${OUTPUT_DIR}/cache_data | |
| RES_DIR=${OUTPUT_DIR}/prediction | |
| LOG=${OUTPUT_DIR}/train.log | |
| mkdir -p ${OUTPUT_DIR} | |
| mkdir -p ${CACHE_DIR} | |
| mkdir -p ${RES_DIR} | |
| if [[ $MODEL_TAG == roberta ]]; then | |
| MODEL_TYPE=roberta | |
| TOKENIZER=roberta-base | |
| MODEL_PATH=roberta-base | |
| elif [[ $MODEL_TAG == codebert ]]; then | |
| MODEL_TYPE=roberta | |
| TOKENIZER=roberta-base | |
| MODEL_PATH=microsoft/codebert-base | |
| elif [[ $MODEL_TAG == bart_base ]]; then | |
| MODEL_TYPE=bart | |
| TOKENIZER=facebook/bart-base | |
| MODEL_PATH=facebook/bart-base | |
| elif [[ $MODEL_TAG == codet5_small ]]; then | |
| MODEL_TYPE=codet5 | |
| TOKENIZER=Salesforce/codet5-small | |
| MODEL_PATH=Salesforce/codet5-small | |
| elif [[ $MODEL_TAG == codet5_base ]]; then | |
| MODEL_TYPE=codet5 | |
| TOKENIZER=Salesforce/codet5-base | |
| MODEL_PATH=Salesforce/codet5-base | |
| elif [[ $MODEL_TAG == codet5_large ]]; then | |
| MODEL_TYPE=codet5 | |
| TOKENIZER=Salesforce/codet5-large | |
| MODEL_PATH=Salesforce/codet5-large | |
| fi | |
| if [[ ${TASK} == 'multi_task' ]]; then | |
| RUN_FN=${WORKDIR}/run_multi_gen.py | |
| MULTI_TASK_AUG='--max_steps '${16}' --save_steps '${17}' --log_steps '${18} | |
| elif [[ ${TASK} == 'clone' ]]; then | |
| RUN_FN=${WORKDIR}/run_clone.py | |
| elif [[ ${TASK} == 'defect' ]] && [[ ${MODEL_TYPE} == 'roberta' || ${MODEL_TYPE} == 'bart' ]]; then | |
| RUN_FN=${WORKDIR}/run_defect.py | |
| else | |
| RUN_FN=${WORKDIR}/run_gen.py | |
| fi | |
| CUDA_VISIBLE_DEVICES=${GPU} \ | |
| python ${RUN_FN} ${MULTI_TASK_AUG} \ | |
| --do_train --do_eval --do_eval_bleu --do_test \ | |
| --task ${TASK} --sub_task ${SUB_TASK} --model_type ${MODEL_TYPE} --data_num ${DATA_NUM} \ | |
| --num_train_epochs ${EPOCH} --warmup_steps ${WARMUP} --learning_rate ${LR}e-5 --patience ${PATIENCE} \ | |
| --tokenizer_name=${TOKENIZER} --model_name_or_path=${MODEL_PATH} --data_dir ${WORKDIR}/data \ | |
| --cache_path ${CACHE_DIR} --output_dir ${OUTPUT_DIR} --summary_dir ${SUMMARY_DIR} \ | |
| --save_last_checkpoints --always_save_model --res_dir ${RES_DIR} --res_fn ${RES_FN} \ | |
| --train_batch_size ${BS} --eval_batch_size ${BS} --max_source_length ${SRC_LEN} --max_target_length ${TRG_LEN} \ | |
| 2>&1 | tee ${LOG} | |