| version: '3.8' | |
| services: | |
| llm-app: | |
| build: | |
| context: . | |
| dockerfile: Dockerfile | |
| ports: | |
| - "7860:7860" | |
| environment: | |
| - MODEL_REPO=lmstudio-community/gemma-3n-E4B-it-text-GGUF | |
| - MODEL_FILENAME=gemma-3n-E4B-it-Q8_0.gguf | |
| - N_CTX=4096 | |
| - N_GPU_LAYERS=0 | |
| - N_THREADS=4 | |
| - MAX_NEW_TOKENS=256 | |
| - TEMPERATURE=0.1 | |
| volumes: | |
| # Optional: Mount models directory to persist downloaded models | |
| - ./models:/app/models | |
| restart: unless-stopped | |
| mem_limit: 8g | |
| # Uncomment below for GPU support | |
| # deploy: | |
| # resources: | |
| # reservations: | |
| # devices: | |
| # - driver: nvidia | |
| # count: 1 | |
| # capabilities: [gpu] | |