Seperate run file
Browse files- Dockerfile +14 -16
- start_server.sh +12 -0
Dockerfile
CHANGED
|
@@ -1,12 +1,19 @@
|
|
| 1 |
# Base: official vLLM OpenAI-compatible server (tested version family)
|
| 2 |
FROM vllm/vllm-openai:v0.9.1
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
# vLLM needs the custom model to be registered before main() runs.
|
| 5 |
# The model authors recommend importing their vLLM adapter into the vllm CLI module.
|
| 6 |
-
# Do this as root before switching to user
|
| 7 |
RUN sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
|
| 8 |
from DotsOCR import modeling_dots_ocr_vllm' $(which vllm)
|
| 9 |
|
|
|
|
|
|
|
|
|
|
| 10 |
# Set up a new user named "user" with user ID 1000
|
| 11 |
RUN useradd -m -u 1000 user
|
| 12 |
|
|
@@ -25,11 +32,6 @@ ENV HF_HUB_ENABLE_HF_TRANSFER=1 \
|
|
| 25 |
GIT_LFS_SKIP_SMUDGE=1 \
|
| 26 |
PIP_DISABLE_PIP_VERSION_CHECK=1
|
| 27 |
|
| 28 |
-
# Install compatible Transformers version for vLLM 0.9.1 (>=4.51.1 required)
|
| 29 |
-
# plus minimal utils. vLLM image already includes CUDA/PyTorch.
|
| 30 |
-
RUN pip install --no-cache-dir --upgrade pip && \
|
| 31 |
-
pip install --no-cache-dir "transformers>=4.51.1" "huggingface_hub>=0.24.0"
|
| 32 |
-
|
| 33 |
# Pre-download the model repo using Hugging Face cache
|
| 34 |
# Note: dots.ocr requires the directory name to avoid '.' (see model card).
|
| 35 |
ARG MODEL_ID=rednote-hilab/dots.ocr
|
|
@@ -57,17 +59,13 @@ RUN HF_MODEL_PATH=$(cat /home/user/app/model_path.txt) && \
|
|
| 57 |
ENV HF_MODEL_PATH=/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr
|
| 58 |
ENV PYTHONPATH="/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr:${PYTHONPATH}"
|
| 59 |
|
| 60 |
-
|
| 61 |
-
# Expose the Space port and launch the OpenAI-compatible server
|
| 62 |
EXPOSE 7860
|
| 63 |
ENV PORT=7860
|
| 64 |
|
| 65 |
-
#
|
| 66 |
-
|
|
|
|
| 67 |
|
| 68 |
-
#
|
| 69 |
-
|
| 70 |
-
# --served-model-name model so clients can use model="model"
|
| 71 |
-
# --trust-remote-code load repo's custom code
|
| 72 |
-
# --host 0.0.0.0 --port $PORT bind to Space port
|
| 73 |
-
CMD ["vllm serve ${HF_MODEL_PATH} --host 0.0.0.0 --port ${PORT} --served-model-name model --gpu-memory-utilization 0.95 --chat-template-content-format string --trust-remote-code"]
|
|
|
|
| 1 |
# Base: official vLLM OpenAI-compatible server (tested version family)
|
| 2 |
FROM vllm/vllm-openai:v0.9.1
|
| 3 |
|
| 4 |
+
# Install required packages as root
|
| 5 |
+
RUN pip3 install flash_attn==2.8.0.post2
|
| 6 |
+
RUN pip3 install transformers==4.51.3
|
| 7 |
+
|
| 8 |
# vLLM needs the custom model to be registered before main() runs.
|
| 9 |
# The model authors recommend importing their vLLM adapter into the vllm CLI module.
|
| 10 |
+
# Do this as root before switching to user (required for HF Spaces)
|
| 11 |
RUN sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
|
| 12 |
from DotsOCR import modeling_dots_ocr_vllm' $(which vllm)
|
| 13 |
|
| 14 |
+
# Show the patched part of the vllm script for verification
|
| 15 |
+
RUN grep -A 1 'from vllm.entrypoints.cli.main import main' $(which vllm)
|
| 16 |
+
|
| 17 |
# Set up a new user named "user" with user ID 1000
|
| 18 |
RUN useradd -m -u 1000 user
|
| 19 |
|
|
|
|
| 32 |
GIT_LFS_SKIP_SMUDGE=1 \
|
| 33 |
PIP_DISABLE_PIP_VERSION_CHECK=1
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
# Pre-download the model repo using Hugging Face cache
|
| 36 |
# Note: dots.ocr requires the directory name to avoid '.' (see model card).
|
| 37 |
ARG MODEL_ID=rednote-hilab/dots.ocr
|
|
|
|
| 59 |
ENV HF_MODEL_PATH=/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr
|
| 60 |
ENV PYTHONPATH="/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr:${PYTHONPATH}"
|
| 61 |
|
| 62 |
+
# Expose the Space port
|
|
|
|
| 63 |
EXPOSE 7860
|
| 64 |
ENV PORT=7860
|
| 65 |
|
| 66 |
+
# Copy the startup script and make it executable
|
| 67 |
+
COPY start_server.sh /home/user/app/start_server.sh
|
| 68 |
+
RUN chmod +x /home/user/app/start_server.sh
|
| 69 |
|
| 70 |
+
# Use the startup script as entrypoint
|
| 71 |
+
ENTRYPOINT ["/home/user/app/start_server.sh"]
|
|
|
|
|
|
|
|
|
|
|
|
start_server.sh
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -ex
|
| 3 |
+
echo "--- Starting setup and server ---"
|
| 4 |
+
echo "Starting server..."
|
| 5 |
+
exec vllm serve ${HF_MODEL_PATH} \
|
| 6 |
+
--host 0.0.0.0 \
|
| 7 |
+
--port ${PORT} \
|
| 8 |
+
--tensor-parallel-size 1 \
|
| 9 |
+
--gpu-memory-utilization 0.8 \
|
| 10 |
+
--chat-template-content-format string \
|
| 11 |
+
--served-model-name dotsocr-model \
|
| 12 |
+
--trust-remote-code
|