Spaces:
Paused
Paused
| # CHOOSE: | |
| ngpus=4 | |
| # below has to match GPUs for A6000s due to long context tests | |
| export TESTMODULOTOTAL=4 | |
| pip install pytest-instafail || true | |
| docker ps | grep text-generation-inference | awk '{print $1}' | xargs docker stop | |
| killall -s SIGINT pytest | |
| killall -s SIGTERM pytest | |
| killall -s 9 pytest | |
| pkill --signal 9 -f weaviate-embedded/weaviate | |
| NPHYSICAL=`lscpu -p | egrep -v '^\#' | sort -u -t, -k 2,4 | wc -l` | |
| NPROCS=`lscpu -p | egrep -v '^\#' | wc -l` | |
| # | |
| n_jobs=$(($NPROCS / $TESTMODULOTOTAL)) | |
| echo "CORES: $NPHYSICAL $NPROCS $n_jobs" | |
| # GENERAL: | |
| lowergpuid=0 | |
| low=0 | |
| high=$(($TESTMODULOTOTAL-1)) | |
| pids="" | |
| for mod in $(seq $low $high) | |
| do | |
| # in some cases launch gradio server, TGI server, or gradio server as inference server with +1 and +2 off base port | |
| # ports always increment by 3 | |
| export GRADIO_SERVER_PORT=$((7860+$(($mod*3)))) | |
| export TESTMODULO=$mod | |
| # CVD loops over number of GPUs | |
| export CUDA_VISIBLE_DEVICES=$(($lowergpuid+$(($mod % $ngpus)))) | |
| export n_jobs=$n_jobs | |
| export OMP_NUM_THREADS=$n_jobs | |
| export NUMEXPR_MAX_THREADS=$n_jobs | |
| export OPENBLAS_NUM_THREADS=$n_jobs | |
| # By default, OpenBLAS will restrict the Cpus_allowed to be 0x1. | |
| export OPENBLAS_MAIN_FREE=$n_jobs | |
| export MKL_NUM_THREADS=$n_jobs | |
| export H2OGPT_BASE_PATH="./base_$mod" | |
| # huggyllama test uses alot of memory, requires TESTMODULOTOTAL=ngpus for even A6000s | |
| # pytest --instafail -s -v -n 1 tests -k "not test_huggyllama_transformers_pr" &> testsparallel"${mod}".log & | |
| pytest --instafail -s -v -n 1 tests &> testsparallel"${mod}".log & | |
| pid=$! | |
| echo "MODS: $mod $GRADIO_SERVER_PORT $CUDA_VISIBLE_DEVICES $H2OGPT_BASE_PATH" | |
| pids="$pids $pid" | |
| done | |
| trap "kill $pids; exit 1" INT | |
| echo "to check on results while running, do:" | |
| echo "grep -a PASSED testsparallel*.log | sed 's/.*PASSED//g' | sort | uniq |wc -l" | |
| echo "grep -a FAILED testsparallel*.log | sed 's/.*FAILED//g' | sort | uniq |wc -l" | |
| echo "to interrupt but still get some results, do:" | |
| #echo "ps -auxwf | grep -v "[g]rep" | grep pytest | awk '{print $2}' |xargs kill -s SIGINT" | |
| echo "kill -s SIGINT $pids" | |
| wait | |