File size: 3,217 Bytes
1a22905
 
 
 
15a1cdd
 
 
1a22905
 
 
 
1575bd1
1a22905
 
 
8387c51
1a22905
 
 
1575bd1
1a22905
 
 
 
b455369
1a22905
 
9ca5497
65907ea
1a22905
 
 
 
 
 
65907ea
b455369
 
15a1cdd
e612421
 
 
 
 
 
 
15a1cdd
 
 
 
 
5394e5b
15a1cdd
 
1a22905
15a1cdd
5394e5b
 
 
 
 
 
 
 
15a1cdd
1a22905
 
 
 
1575bd1
15a1cdd
 
 
1a22905
 
 
 
1575bd1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# =============================================================================
# vLLM Server with DotsOCR Model for Hugging Face Spaces
# =============================================================================

# Base: official vLLM OpenAI-compatible server (tested version family)
FROM vllm/vllm-openai:v0.9.1

# Add metadata labels
LABEL maintainer="kybtech-vllm-dotsocr"
LABEL description="vLLM server with DotsOCR model for document OCR"
LABEL version="1.0"

# =============================================================================
# SYSTEM SETUP (as root)
# =============================================================================

# Install additional Python packages
COPY requirements.txt /tmp/requirements.txt
RUN pip3 install -r /tmp/requirements.txt

# Patch vLLM entrypoint to register DotsOCR model
# This must be done as root before switching to user (required for HF Spaces)
COPY patch_vllm.sh /tmp/patch_vllm.sh
RUN chmod +x /tmp/patch_vllm.sh && /tmp/patch_vllm.sh

# Create user and copy application files
RUN useradd -m -u 1000 user
RUN mkdir -p /home/user/app && chown -R user:user /home/user/app
COPY start_server.sh /home/user/app/start_server.sh
COPY download_model.py /home/user/app/download_model.py
RUN chmod +x /home/user/app/start_server.sh /home/user/app/download_model.py

# =============================================================================
# APPLICATION SETUP (as user)
# =============================================================================

# Switch to the "user" user
USER user

# Set home to the user's home directory and update PATH
ENV HOME=/home/user \
    PATH=/home/user/.local/bin:$PATH

# Set the working directory to the user's home directory
WORKDIR $HOME/app

# Speed up HF downloads and avoid interactive git prompts
ENV HF_HUB_ENABLE_HF_TRANSFER=1 \
    GIT_LFS_SKIP_SMUDGE=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1

# Pre-download the model repo using Hugging Face cache
# Note: dots.ocr requires the directory name to avoid '.' (see model card).
ARG MODEL_ID=rednote-hilab/dots.ocr
RUN MODEL_ID=$MODEL_ID python3 /home/user/app/download_model.py

# Set the model path from the downloaded location
RUN HF_MODEL_PATH=$(cat /home/user/app/model_path.txt) && \
    echo "export HF_MODEL_PATH=$HF_MODEL_PATH" >> /home/user/.bashrc && \
    echo "export PYTHONPATH=\"$HF_MODEL_PATH:\$PYTHONPATH\"" >> /home/user/.bashrc

# Set default environment variables (will be overridden by .bashrc in interactive shells)
ENV HF_MODEL_PATH=/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr
ENV PYTHONPATH="/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr:${PYTHONPATH}"

# =============================================================================
# RUNTIME CONFIGURATION
# =============================================================================

# Expose the Space port
EXPOSE 7860
ENV PORT=7860

# Add health check (using httpx instead of curl)
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD python3 -c "import httpx; httpx.get(f'http://localhost:{PORT}/health', timeout=5)" || exit 1

# Use the startup script as entrypoint
ENTRYPOINT ["/home/user/app/start_server.sh"]