import os from llama_cpp import Llama from huggingface_hub import hf_hub_download # Model parameters REPO_ID = "TheBloke/phi-2-GGUF" MODEL_FILENAME = "phi-2.Q4_K_M.gguf" MODEL_PATH = f"./models/{MODEL_FILENAME}" # Auto-download if model not present if not os.path.exists(MODEL_PATH): os.makedirs("./models", exist_ok=True) print("📦 Downloading GGUF model manually from Hugging Face...") MODEL_PATH = hf_hub_download( repo_id=REPO_ID, filename=MODEL_FILENAME, local_dir="./models", local_dir_use_symlinks=False ) print(f"✅ Model downloaded to {MODEL_PATH}") # Initialize Llama model llm = Llama( model_path=MODEL_PATH, n_ctx=512, n_threads=2, verbose=True ) def generate_structure(prompt: str) -> str: response = llm.create_chat_completion( messages=[{"role": "user", "content": prompt}], temperature=0.4, top_p=0.95, max_tokens=1024, ) return response["choices"][0]["message"]["content"]