ThongCoding's picture
s
2fc68b4
raw
history blame
1.01 kB
import os
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# Model parameters
REPO_ID = "TheBloke/phi-2-GGUF"
MODEL_FILENAME = "phi-2.Q4_K_M.gguf"
MODEL_PATH = f"./models/{MODEL_FILENAME}"
# Auto-download if model not present
if not os.path.exists(MODEL_PATH):
os.makedirs("./models", exist_ok=True)
print("📦 Downloading GGUF model manually from Hugging Face...")
MODEL_PATH = hf_hub_download(
repo_id=REPO_ID,
filename=MODEL_FILENAME,
local_dir="./models",
local_dir_use_symlinks=False
)
print(f"✅ Model downloaded to {MODEL_PATH}")
# Initialize Llama model
llm = Llama(
model_path=MODEL_PATH,
n_ctx=512,
n_threads=2,
verbose=True
)
def generate_structure(prompt: str) -> str:
response = llm.create_chat_completion(
messages=[{"role": "user", "content": prompt}],
temperature=0.4,
top_p=0.95,
max_tokens=1024,
)
return response["choices"][0]["message"]["content"]