tommulder commited on
Commit
1575bd1
·
1 Parent(s): 8387c51

Seperate run file

Browse files
Files changed (2) hide show
  1. Dockerfile +14 -16
  2. start_server.sh +12 -0
Dockerfile CHANGED
@@ -1,12 +1,19 @@
1
  # Base: official vLLM OpenAI-compatible server (tested version family)
2
  FROM vllm/vllm-openai:v0.9.1
3
 
 
 
 
 
4
  # vLLM needs the custom model to be registered before main() runs.
5
  # The model authors recommend importing their vLLM adapter into the vllm CLI module.
6
- # Do this as root before switching to user
7
  RUN sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
8
  from DotsOCR import modeling_dots_ocr_vllm' $(which vllm)
9
 
 
 
 
10
  # Set up a new user named "user" with user ID 1000
11
  RUN useradd -m -u 1000 user
12
 
@@ -25,11 +32,6 @@ ENV HF_HUB_ENABLE_HF_TRANSFER=1 \
25
  GIT_LFS_SKIP_SMUDGE=1 \
26
  PIP_DISABLE_PIP_VERSION_CHECK=1
27
 
28
- # Install compatible Transformers version for vLLM 0.9.1 (>=4.51.1 required)
29
- # plus minimal utils. vLLM image already includes CUDA/PyTorch.
30
- RUN pip install --no-cache-dir --upgrade pip && \
31
- pip install --no-cache-dir "transformers>=4.51.1" "huggingface_hub>=0.24.0"
32
-
33
  # Pre-download the model repo using Hugging Face cache
34
  # Note: dots.ocr requires the directory name to avoid '.' (see model card).
35
  ARG MODEL_ID=rednote-hilab/dots.ocr
@@ -57,17 +59,13 @@ RUN HF_MODEL_PATH=$(cat /home/user/app/model_path.txt) && \
57
  ENV HF_MODEL_PATH=/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr
58
  ENV PYTHONPATH="/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr:${PYTHONPATH}"
59
 
60
-
61
- # Expose the Space port and launch the OpenAI-compatible server
62
  EXPOSE 7860
63
  ENV PORT=7860
64
 
65
- # Override the entrypoint to use bash directly
66
- ENTRYPOINT ["/bin/bash", "-c"]
 
67
 
68
- # Notes:
69
- # --chat-template-content-format string per dots.ocr README
70
- # --served-model-name model so clients can use model="model"
71
- # --trust-remote-code load repo's custom code
72
- # --host 0.0.0.0 --port $PORT bind to Space port
73
- CMD ["vllm serve ${HF_MODEL_PATH} --host 0.0.0.0 --port ${PORT} --served-model-name model --gpu-memory-utilization 0.95 --chat-template-content-format string --trust-remote-code"]
 
1
  # Base: official vLLM OpenAI-compatible server (tested version family)
2
  FROM vllm/vllm-openai:v0.9.1
3
 
4
+ # Install required packages as root
5
+ RUN pip3 install flash_attn==2.8.0.post2
6
+ RUN pip3 install transformers==4.51.3
7
+
8
  # vLLM needs the custom model to be registered before main() runs.
9
  # The model authors recommend importing their vLLM adapter into the vllm CLI module.
10
+ # Do this as root before switching to user (required for HF Spaces)
11
  RUN sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
12
  from DotsOCR import modeling_dots_ocr_vllm' $(which vllm)
13
 
14
+ # Show the patched part of the vllm script for verification
15
+ RUN grep -A 1 'from vllm.entrypoints.cli.main import main' $(which vllm)
16
+
17
  # Set up a new user named "user" with user ID 1000
18
  RUN useradd -m -u 1000 user
19
 
 
32
  GIT_LFS_SKIP_SMUDGE=1 \
33
  PIP_DISABLE_PIP_VERSION_CHECK=1
34
 
 
 
 
 
 
35
  # Pre-download the model repo using Hugging Face cache
36
  # Note: dots.ocr requires the directory name to avoid '.' (see model card).
37
  ARG MODEL_ID=rednote-hilab/dots.ocr
 
59
  ENV HF_MODEL_PATH=/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr
60
  ENV PYTHONPATH="/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr:${PYTHONPATH}"
61
 
62
+ # Expose the Space port
 
63
  EXPOSE 7860
64
  ENV PORT=7860
65
 
66
+ # Copy the startup script and make it executable
67
+ COPY start_server.sh /home/user/app/start_server.sh
68
+ RUN chmod +x /home/user/app/start_server.sh
69
 
70
+ # Use the startup script as entrypoint
71
+ ENTRYPOINT ["/home/user/app/start_server.sh"]
 
 
 
 
start_server.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -ex
3
+ echo "--- Starting setup and server ---"
4
+ echo "Starting server..."
5
+ exec vllm serve ${HF_MODEL_PATH} \
6
+ --host 0.0.0.0 \
7
+ --port ${PORT} \
8
+ --tensor-parallel-size 1 \
9
+ --gpu-memory-utilization 0.8 \
10
+ --chat-template-content-format string \
11
+ --served-model-name dotsocr-model \
12
+ --trust-remote-code