Spaces:

algoryn
/

kybocr

Sleeping

App Files Files Community

tommulder commited on Sep 19

Commit

1575bd1

1 Parent(s): 8387c51

Seperate run file

Browse files

Files changed (2) hide show

Dockerfile +14 -16
start_server.sh +12 -0

Dockerfile CHANGED Viewed

@@ -1,12 +1,19 @@
 # Base: official vLLM OpenAI-compatible server (tested version family)
 FROM vllm/vllm-openai:v0.9.1
 # vLLM needs the custom model to be registered before main() runs.
 # The model authors recommend importing their vLLM adapter into the vllm CLI module.
-# Do this as root before switching to user
 RUN sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
 from DotsOCR import modeling_dots_ocr_vllm' $(which vllm)
 # Set up a new user named "user" with user ID 1000
 RUN useradd -m -u 1000 user
@@ -25,11 +32,6 @@ ENV HF_HUB_ENABLE_HF_TRANSFER=1 \
     GIT_LFS_SKIP_SMUDGE=1 \
     PIP_DISABLE_PIP_VERSION_CHECK=1
-# Install compatible Transformers version for vLLM 0.9.1 (>=4.51.1 required)
-# plus minimal utils. vLLM image already includes CUDA/PyTorch.
-RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir "transformers>=4.51.1" "huggingface_hub>=0.24.0"
 # Pre-download the model repo using Hugging Face cache
 # Note: dots.ocr requires the directory name to avoid '.' (see model card).
 ARG MODEL_ID=rednote-hilab/dots.ocr
@@ -57,17 +59,13 @@ RUN HF_MODEL_PATH=$(cat /home/user/app/model_path.txt) && \
 ENV HF_MODEL_PATH=/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr
 ENV PYTHONPATH="/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr:${PYTHONPATH}"
-# Expose the Space port and launch the OpenAI-compatible server
 EXPOSE 7860
 ENV PORT=7860
-# Override the entrypoint to use bash directly
-ENTRYPOINT ["/bin/bash", "-c"]
-# Notes:
-# --chat-template-content-format string   per dots.ocr README
-# --served-model-name model               so clients can use model="model"
-# --trust-remote-code                     load repo's custom code
-# --host 0.0.0.0 --port $PORT             bind to Space port
-CMD ["vllm serve ${HF_MODEL_PATH} --host 0.0.0.0 --port ${PORT} --served-model-name model --gpu-memory-utilization 0.95 --chat-template-content-format string --trust-remote-code"]

 # Base: official vLLM OpenAI-compatible server (tested version family)
 FROM vllm/vllm-openai:v0.9.1
+# Install required packages as root
+RUN pip3 install flash_attn==2.8.0.post2
+RUN pip3 install transformers==4.51.3
 # vLLM needs the custom model to be registered before main() runs.
 # The model authors recommend importing their vLLM adapter into the vllm CLI module.
+# Do this as root before switching to user (required for HF Spaces)
 RUN sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
 from DotsOCR import modeling_dots_ocr_vllm' $(which vllm)
+# Show the patched part of the vllm script for verification
+RUN grep -A 1 'from vllm.entrypoints.cli.main import main' $(which vllm)
 # Set up a new user named "user" with user ID 1000
 RUN useradd -m -u 1000 user
     GIT_LFS_SKIP_SMUDGE=1 \
     PIP_DISABLE_PIP_VERSION_CHECK=1
 # Pre-download the model repo using Hugging Face cache
 # Note: dots.ocr requires the directory name to avoid '.' (see model card).
 ARG MODEL_ID=rednote-hilab/dots.ocr
 ENV HF_MODEL_PATH=/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr
 ENV PYTHONPATH="/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr:${PYTHONPATH}"
+# Expose the Space port
 EXPOSE 7860
 ENV PORT=7860
+# Copy the startup script and make it executable
+COPY start_server.sh /home/user/app/start_server.sh
+RUN chmod +x /home/user/app/start_server.sh
+# Use the startup script as entrypoint
+ENTRYPOINT ["/home/user/app/start_server.sh"]

start_server.sh ADDED Viewed

	@@ -0,0 +1,12 @@

+#!/bin/bash
+set -ex
+echo "--- Starting setup and server ---"
+echo "Starting server..."
+exec vllm serve ${HF_MODEL_PATH} \
+    --host 0.0.0.0 \
+    --port ${PORT} \
+    --tensor-parallel-size 1 \
+    --gpu-memory-utilization 0.8 \
+    --chat-template-content-format string \
+    --served-model-name dotsocr-model \
+    --trust-remote-code