File size: 2,396 Bytes
b269c5d
 
 
 
 
 
b9cb4a6
b269c5d
 
 
 
 
b9cb4a6
 
b269c5d
 
b9cb4a6
b269c5d
 
 
 
 
 
 
 
 
 
 
 
 
 
b9cb4a6
 
 
b269c5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9cb4a6
f2adbf5
b269c5d
 
 
 
 
 
 
 
 
 
 
 
 
 
b9cb4a6
b269c5d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# Use Python 3.10 base image optimized for HuggingFace Spaces
FROM python:3.10-slim

# Set working directory
WORKDIR /app

# Install system dependencies required for runtime and compilation
RUN apt-get update && apt-get install -y \
    wget \
    curl \
    git \
    git-lfs \
    build-essential \
    cmake \
    libopenblas-dev \
    libssl-dev \
    libgomp1 \
    && rm -rf /var/lib/apt/lists/*

# Initialize git-lfs
RUN git lfs install

# Set environment variables for optimal Docker performance
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
ENV PIP_NO_CACHE_DIR=1
ENV DOCKER_CONTAINER=true

# Create models directory
RUN mkdir -p /app/models

# Copy and install llama-cpp-python from local wheel
COPY wheels/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl /tmp/
RUN pip install /tmp/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl

# Copy requirements first for better Docker layer caching
COPY requirements.txt .

# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Copy configuration to get model info
COPY config.py .

# Pre-download the model during build
RUN python -c "import os; from huggingface_hub import hf_hub_download; from config import Config; os.makedirs('/app/models', exist_ok=True); print(f'Downloading model {Config.MODEL_REPO}/{Config.MODEL_FILENAME}...'); p=hf_hub_download(repo_id=Config.MODEL_REPO, filename=Config.MODEL_FILENAME, local_dir='/app/models', token=os.getenv('HUGGINGFACE_TOKEN') or None); print(f'Model downloaded to: {p}'); import os; s=os.path.getsize(p) if os.path.exists(p) else (_ for _ in ()).throw(FileNotFoundError(f'Model file not found: {p}')); print(f'Model file size: {s/(1024**3):.2f} GB'); (s>1024*1024) or (_ for _ in ()).throw(ValueError(f'Downloaded model file seems too small: {s} bytes')); print('Model download verification successful')"

# Verify model file exists after build
RUN ls -la /app/models/ && \
    [ -n "$(ls /app/models/*.gguf 2>/dev/null)" ] || (echo "No .gguf model file found!" && exit 1)

# Copy application files
COPY . .

# Make entrypoint script executable
RUN chmod +x entrypoint.sh

# Create a non-root user for security
RUN useradd -m -u 1000 user && chown -R user:user /app
USER user

# Expose the port that Gradio will run on
EXPOSE 7860

# Set entrypoint and default command
# ENTRYPOINT ["./entrypoint.sh"]
CMD ["python", "main.py", "--mode", "gradio"]