import os
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

# Model parameters
REPO_ID = "TheBloke/phi-2-GGUF"
MODEL_FILENAME = "phi-2.Q4_K_M.gguf"
MODEL_PATH = f"./models/{MODEL_FILENAME}"

# Auto-download if model not present
if not os.path.exists(MODEL_PATH):
    os.makedirs("./models", exist_ok=True)
    print("📦 Downloading GGUF model manually from Hugging Face...")
    MODEL_PATH = hf_hub_download(
        repo_id=REPO_ID,
        filename=MODEL_FILENAME,
        local_dir="./models",
        local_dir_use_symlinks=False
    )
    print(f"✅ Model downloaded to {MODEL_PATH}")

# Initialize Llama model
llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=512,
    n_threads=2,
    verbose=True
)

def generate_structure(prompt: str) -> str:
    response = llm.create_chat_completion(
        messages=[{"role": "user", "content": prompt}],
        temperature=0.4,
        top_p=0.95,
        max_tokens=1024,
    )
    return response["choices"][0]["message"]["content"]