| # Use Python 3.10 base image optimized for HuggingFace Spaces | |
| FROM python:3.10-slim | |
| # Set working directory | |
| WORKDIR /app | |
| # Install system dependencies required for runtime and compilation | |
| RUN apt-get update && apt-get install -y \ | |
| wget \ | |
| curl \ | |
| git \ | |
| git-lfs \ | |
| build-essential \ | |
| cmake \ | |
| libopenblas-dev \ | |
| libssl-dev \ | |
| libgomp1 \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Initialize git-lfs | |
| RUN git lfs install | |
| # Set environment variables for optimal Docker performance | |
| ENV PYTHONUNBUFFERED=1 | |
| ENV PYTHONDONTWRITEBYTECODE=1 | |
| ENV PIP_NO_CACHE_DIR=1 | |
| ENV DOCKER_CONTAINER=true | |
| # Create models directory | |
| RUN mkdir -p /app/models | |
| # Copy and install llama-cpp-python from local wheel | |
| COPY wheels/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl /tmp/ | |
| RUN pip install /tmp/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl | |
| # Copy requirements first for better Docker layer caching | |
| COPY requirements.txt . | |
| # Install Python dependencies | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| # Copy configuration to get model info | |
| COPY config.py . | |
| # Pre-download the model during build | |
| RUN python -c "import os; from huggingface_hub import hf_hub_download; from config import Config; os.makedirs('/app/models', exist_ok=True); print(f'Downloading model {Config.MODEL_REPO}/{Config.MODEL_FILENAME}...'); p=hf_hub_download(repo_id=Config.MODEL_REPO, filename=Config.MODEL_FILENAME, local_dir='/app/models', token=os.getenv('HUGGINGFACE_TOKEN') or None); print(f'Model downloaded to: {p}'); import os; s=os.path.getsize(p) if os.path.exists(p) else (_ for _ in ()).throw(FileNotFoundError(f'Model file not found: {p}')); print(f'Model file size: {s/(1024**3):.2f} GB'); (s>1024*1024) or (_ for _ in ()).throw(ValueError(f'Downloaded model file seems too small: {s} bytes')); print('Model download verification successful')" | |
| # Verify model file exists after build | |
| RUN ls -la /app/models/ && \ | |
| [ -n "$(ls /app/models/*.gguf 2>/dev/null)" ] || (echo "No .gguf model file found!" && exit 1) | |
| # Copy application files | |
| COPY . . | |
| # Make entrypoint script executable | |
| RUN chmod +x entrypoint.sh | |
| # Create a non-root user for security | |
| RUN useradd -m -u 1000 user && chown -R user:user /app | |
| USER user | |
| # Expose the port that Gradio will run on | |
| EXPOSE 7860 | |
| # Set entrypoint and default command | |
| # ENTRYPOINT ["./entrypoint.sh"] | |
| CMD ["python", "main.py", "--mode", "gradio"] | |