import os
import shutil
import requests
from llama_cpp import Llama

MODEL_URL = "https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.Q4_K_M.gguf"
MODEL_FILENAME = "Llama-3.2-1B-Instruct.Q4_K_M.gguf"
MODEL_PATH = f"./models/{MODEL_FILENAME}"

def download_model():
    os.makedirs("./models", exist_ok=True)
    print("📦 Downloading GGUF model directly from URL...")
    response = requests.get(MODEL_URL, stream=True)
    if response.status_code == 200:
        with open(MODEL_PATH, 'wb') as f:
            shutil.copyfileobj(response.raw, f)
        print(f"✅ Model downloaded to {MODEL_PATH}")
    else:
        raise RuntimeError(f"❌ Failed to download model. Status code: {response.status_code}")

# Only download if not already present
if not os.path.exists(MODEL_PATH):
    download_model()

# Load model with llama-cpp-python
llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=512,
    n_batch=512,
    n_threads=6,
    chat_format="llama-3",
    verbose=False
)

def generate_structure(prompt: str) -> str:
    messages = [
        {"role": "system", "content": "You are a Minecraft-style structure planner. You always respond with strictly valid JSON describing a 3D structure."},
        {"role": "user", "content": prompt}
    ]
    output = llm.create_chat_completion(messages=messages)
    return output["choices"][0]["message"]["content"]