kybocr / start_server.sh
tommulder's picture
Use direct import of modeling_dots_ocr_vllm in vLLM entrypoint
c6ec434
raw
history blame
463 Bytes
#!/bin/bash
set -ex
echo "--- Starting setup and server ---"
python3 - <<'PY'
import sys, os
print("PYTHONPATH:", sys.path)
import modeling_dots_ocr_vllm
print("DotsOCR import OK")
PY
echo "Starting server..."
exec vllm serve ${HF_MODEL_PATH} \
--host 0.0.0.0 \
--port ${PORT} \
--tensor-parallel-size 1 \
--gpu-memory-utilization 0.8 \
--chat-template-content-format string \
--served-model-name dotsocr-model \
--trust-remote-code