|
|
import os |
|
|
from llama_cpp import Llama |
|
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
|
|
|
REPO_ID = "TheBloke/phi-2-GGUF" |
|
|
MODEL_FILENAME = "phi-2.Q4_K_M.gguf" |
|
|
MODEL_PATH = f"./models/{MODEL_FILENAME}" |
|
|
|
|
|
|
|
|
if not os.path.exists(MODEL_PATH): |
|
|
os.makedirs("./models", exist_ok=True) |
|
|
print("📦 Downloading GGUF model manually from Hugging Face...") |
|
|
MODEL_PATH = hf_hub_download( |
|
|
repo_id=REPO_ID, |
|
|
filename=MODEL_FILENAME, |
|
|
local_dir="./models", |
|
|
local_dir_use_symlinks=False |
|
|
) |
|
|
print(f"✅ Model downloaded to {MODEL_PATH}") |
|
|
|
|
|
|
|
|
llm = Llama( |
|
|
model_path=MODEL_PATH, |
|
|
n_ctx=512, |
|
|
n_threads=2, |
|
|
verbose=True |
|
|
) |
|
|
|
|
|
def generate_structure(prompt: str) -> str: |
|
|
response = llm.create_chat_completion( |
|
|
messages=[{"role": "user", "content": prompt}], |
|
|
temperature=0.4, |
|
|
top_p=0.95, |
|
|
max_tokens=1024, |
|
|
) |
|
|
return response["choices"][0]["message"]["content"] |
|
|
|