import os import shutil import requests from llama_cpp import Llama MODEL_URL = "https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.Q4_K_M.gguf" MODEL_FILENAME = "Llama-3.2-1B-Instruct.Q4_K_M.gguf" MODEL_PATH = f"./models/{MODEL_FILENAME}" def download_model(): os.makedirs("./models", exist_ok=True) print("📦 Downloading GGUF model directly from URL...") response = requests.get(MODEL_URL, stream=True) if response.status_code == 200: with open(MODEL_PATH, 'wb') as f: shutil.copyfileobj(response.raw, f) print(f"✅ Model downloaded to {MODEL_PATH}") else: raise RuntimeError(f"❌ Failed to download model. Status code: {response.status_code}") # Only download if not already present if not os.path.exists(MODEL_PATH): download_model() # Load model with llama-cpp-python llm = Llama( model_path=MODEL_PATH, n_ctx=512, n_batch=512, n_threads=6, chat_format="llama-3", verbose=False ) def generate_structure(prompt: str) -> str: messages = [ {"role": "system", "content": "You are a Minecraft-style structure planner. You always respond with strictly valid JSON describing a 3D structure."}, {"role": "user", "content": prompt} ] output = llm.create_chat_completion(messages=messages) return output["choices"][0]["message"]["content"]