Spaces:

ThongCoder
/

minecraft-ai-builder-backend

Paused

File size: 1,617 Bytes

6bf37cd
deb83c9
d9d6b2c
1f23ef2
d9d6b2c
deb83c9
6bf37cd
 
1f23ef2
6bf37cd
1f23ef2
6bf37cd
 
1f23ef2
 
 
 
6bf37cd
 
 
 
 
 
 
1f23ef2
6bf37cd
1f23ef2
6bf37cd
1f23ef2
6bf37cd
 
 
deb83c9
6bf37cd
 
 
 
 
 
 
 
d9d6b2c
 
6bf37cd
 
 
 
231cb7b
deb83c9
072df7d
6bf37cd

from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os

REPO_ID = "google/gemma-2b-it-GGUF"
FILENAME = "gemma-2b-it.gguf"
HF_TOKEN = os.environ.get("HF_TOKEN")
MODEL_DIR = "./models"
CACHE_DIR = "./models/.hf_cache"
MODEL_PATH = os.path.join(MODEL_DIR, FILENAME)

# Make sure directories exist
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(CACHE_DIR, exist_ok=True)

if not os.path.exists(MODEL_PATH):
    try:
        print("📦 Downloading model from Hugging Face Hub...")
        hf_hub_download(
            repo_id=REPO_ID,
            filename=FILENAME,
            token=HF_TOKEN,
            cache_dir=CACHE_DIR,
            local_dir=MODEL_DIR,
            local_dir_use_symlinks=False  # even though deprecated, keep for compatibility
        )
        print(f"✅ Model downloaded to {MODEL_PATH}")
    except Exception as e:
        print(f"❌ Download failed: {e}")
        raise

# Step 2: Load model using llama-cpp-python
print("🤖 Loading GGUF model...")
llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=512,
    n_threads=4,
    n_batch=512,
    verbose=False
)

# Step 3: FastAPI app
app = FastAPI()

class PromptRequest(BaseModel):
    prompt: str

@app.post("/prompt")
def generate_prompt(req: PromptRequest):
    prompt = req.prompt.strip()

    output = llm(
        prompt,
        max_tokens=512,
        temperature=0.6,
        top_p=0.95,
        stop=["<|endoftext|>", "</s>", "```"],
        echo=False
    )

    result = output["choices"][0]["text"].strip()
    return {"response": result}