File size: 1,081 Bytes
1575bd1 c6ec434 7eb4d0c 960b4fa c6ec434 7eb4d0c c6ec434 960b4fa c6ec434 1575bd1 7eb4d0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
#!/bin/bash
set -ex
echo "--- Starting setup and server ---"
# Use the exact snapshot path resolved at build time so Python can import the model code.
# The HF cache's model root doesn't contain the files; they're under snapshots/<commit>.
HF_MODEL_PATH_FILE="/home/user/app/model_path.txt"
if [ -f "$HF_MODEL_PATH_FILE" ]; then
export HF_MODEL_PATH="$(cat "$HF_MODEL_PATH_FILE")"
fi
# Ensure Python sees the model's modules (e.g., modeling_dots_ocr_vllm.py).
export PYTHONPATH="${HF_MODEL_PATH}:${PYTHONPATH}"
if [ -n "$HF_MODEL_PATH" ] && [ ! -e "DotsOCR" ]; then
ln -s "$HF_MODEL_PATH" DotsOCR
fi
python3 - <<'PY'
import sys, os
print("HF_MODEL_PATH:", os.environ.get("HF_MODEL_PATH"))
print("PYTHONPATH:", sys.path)
from DotsOCR import modeling_dots_ocr_vllm
print("DotsOCR import OK")
PY
echo "Starting server..."
exec vllm serve ${HF_MODEL_PATH} \
--host 0.0.0.0 \
--port ${PORT} \
--tensor-parallel-size 1 \
--gpu-memory-utilization 0.8 \
--chat-template-content-format string \
--served-model-name dotsocr-model \
--trust-remote-code |