Spaces:
Runtime error
Runtime error
| # repo_dir: root directory of the project | |
| repo_dir="$( cd "$( dirname "$0" )" && pwd )" | |
| echo "==== Working directory: ====" >&2 | |
| echo "${repo_dir}" >&2 | |
| echo "============================" >&2 | |
| test_config=$1 | |
| source ${repo_dir}/scripts/load_config.sh ${test_config} ${repo_dir} | |
| model_dir=$2 | |
| choice=$3 # all|best|last | |
| model_dir=${repo_dir}/model | |
| data_dir=${repo_dir}/data | |
| res_path=${model_dir}/results | |
| mkdir -p ${model_dir} ${data_dir} ${res_path} | |
| testset_name=data_testset_1_name | |
| testset_path=data_testset_1_path | |
| testset_ref=data_testset_1_ref | |
| testset_direc=data_testset_1_direction | |
| i=1 | |
| testsets="" | |
| while [[ ! -z ${!testset_path} && ! -z ${!testset_direc} ]]; do | |
| dataname=${!testset_name} | |
| mkdir -p ${data_dir}/${!testset_direc}/${dataname} ${data_dir}/ref/${!testset_direc}/${dataname} | |
| cp ${!testset_path}/* ${data_dir}/${!testset_direc}/${dataname}/ | |
| cp ${!testset_ref}/* ${data_dir}/ref/${!testset_direc}/${dataname}/ | |
| if [[ $testsets == "" ]]; then | |
| testsets=${!testset_direc}/${dataname} | |
| else | |
| testsets=${testsets}:${!testset_direc}/${dataname} | |
| fi | |
| i=$((i+1)) | |
| testset_name=testset_${i}_name | |
| testset_path=testset_${i}_path | |
| testset_ref=testset_${i}_ref | |
| testset_direc=testset_${i}_direction | |
| done | |
| IFS=':' read -r -a testset_list <<< ${testsets} | |
| bleu () { | |
| src=$1 | |
| tgt=$2 | |
| res_file=$3 | |
| ref_file=$4 | |
| if [[ -f ${res_file} ]]; then | |
| f_dirname=`dirname ${res_file}` | |
| python3 ${repo_dir}/scripts/utils.py ${res_file} ${ref_file} || exit 1; | |
| input_file="${f_dirname}/hypo.out.nobpe" | |
| output_file="${f_dirname}/hypo.out.nobpe.final" | |
| # form command | |
| cmd="cat ${input_file}" | |
| lang_token="LANG_TOK_"`echo "${tgt} " | tr '[a-z]' '[A-Z]'` | |
| if [[ $tgt == "fr" ]]; then | |
| cmd=$cmd" | sed -Ee 's/\"([^\"]*)\"/« \1 »/g'" | |
| elif [[ $tgt == "zh" ]]; then | |
| tokenizer="zh" | |
| elif [[ $tgt == "ja" ]]; then | |
| tokenizer="ja-mecab" | |
| fi | |
| [[ -z $tokenizer ]] && tokenizer="none" | |
| cmd=$cmd" | sed -e s'|${lang_token} ||g' > ${output_file}" | |
| eval $cmd || { echo "$cmd FAILED !"; exit 1; } | |
| cat ${output_file} | sacrebleu -l ${src}-${tgt} -tok $tokenizer --short "${f_dirname}/ref.out" | awk '{print $3}' | |
| else | |
| echo "${res_file} not exist!" >&2 && exit 1; | |
| fi | |
| } | |
| # monitor | |
| # ${ckptname}/${direction}/${testname}/orig.txt | |
| (inotifywait -r -m -e close_write ${res_path} | | |
| while read path action file; do | |
| if [[ "$file" =~ .*txt$ ]]; then | |
| tmp_str="${path%/*}" | |
| testname="${tmp_str##*/}" | |
| tmp_str="${tmp_str%/*}" | |
| direction="${tmp_str##*/}" | |
| tmp_str="${tmp_str%/*}" | |
| ckptname="${tmp_str##*/}" | |
| src_lang="${direction%2*}" | |
| tgt_lang="${direction##*2}" | |
| res_file=$path$file | |
| ref_file=${data_dir}/ref/${direction}/${testname}/dev.${tgt_lang} | |
| bleuscore=`bleu ${src_lang} ${tgt_lang} ${res_file} ${ref_file}` | |
| bleu_str="$(date "+%Y-%m-%d %H:%M:%S")\t${ckptname}\t${direction}/${testname}\t$bleuscore" | |
| echo -e ${bleu_str} # to stdout | |
| echo -e ${bleu_str} >> ${model_dir}/summary.log | |
| fi | |
| done) & | |
| if [[ ${choice} == "all" ]]; then | |
| filelist=`ls -la ${model_dir} | sort -k6,7 -r | awk '{print $NF}' | grep .pt$ | tr '\n' ' '` | |
| elif [[ ${choice} == "best" ]]; then | |
| filelist="${model_dir}/checkpoint_best.pt" | |
| elif [[ ${choice} == "last" ]]; then | |
| filelist="${model_dir}/checkpoint_last.pt" | |
| else | |
| echo "invalid choice!" && exit 2; | |
| fi | |
| N=${NUM_GPU} | |
| #export CUDA_VISIBLE_DEVICES=$(seq -s ',' 0 $(($N - 1)) ) | |
| infer_test () { | |
| test_path=$1 | |
| ckpts=$2 | |
| gpu=$3 | |
| final_res_file=$4 | |
| src=$5 | |
| tgt=$6 | |
| gpu_cmd="CUDA_VISIBLE_DEVICES=$gpu " | |
| lang_token="LANG_TOK_"`echo "${tgt} " | tr '[a-z]' '[A-Z]'` | |
| [[ -z ${max_source_positions} ]] && max_source_positions=1024 | |
| [[ -z ${max_target_positions} ]] && max_target_positions=1024 | |
| command=${gpu_cmd}"fairseq-generate ${test_path} \ | |
| --user-dir ${repo_dir}/mcolt \ | |
| -s ${src} \ | |
| -t ${tgt} \ | |
| --skip-invalid-size-inputs-valid-test \ | |
| --path ${ckpts} \ | |
| --max-tokens 1024 \ | |
| --task translation_w_langtok \ | |
| ${options} \ | |
| --lang-prefix-tok ${lang_token} \ | |
| --max-source-positions ${max_source_positions} \ | |
| --max-target-positions ${max_target_positions} \ | |
| --nbest 1 | grep -E '[S|H|P|T]-[0-9]+' > ${final_res_file} | |
| " | |
| echo "$command" | |
| } | |
| export -f infer_test | |
| i=0 | |
| (for ckpt in ${filelist} | |
| do | |
| for testset in "${testset_list[@]}" | |
| do | |
| ckptbase=`basename $ckpt` | |
| ckptname="${ckptbase%.*}" | |
| direction="${testset%/*}" | |
| testname="${testset##*/}" | |
| src_lang="${direction%2*}" | |
| tgt_lang="${direction##*2}" | |
| ((i=i%N)); ((i++==0)) && wait | |
| test_path=${data_dir}/${testset} | |
| echo "-----> "${ckptname}" | "${direction}/$testname" <-----" >&2 | |
| if [[ ! -d ${res_path}/${ckptname}/${direction}/${testname} ]]; then | |
| mkdir -p ${res_path}/${ckptname}/${direction}/${testname} | |
| fi | |
| final_res_file="${res_path}/${ckptname}/${direction}/${testname}/orig.txt" | |
| command=`infer_test ${test_path} ${model_dir}/${ckptname}.pt $((i-1)) ${final_res_file} ${src_lang} ${tgt_lang}` | |
| echo "${command}" | |
| eval $command & | |
| done | |
| done) | |