ThongCoding's picture
s
deb83c9
raw
history blame
1.63 kB
import os
import requests
from huggingface_hub import hf_hub_download, HfApi
from llama_cpp import Llama
HF_TOKEN = os.environ.get("HF_TOKEN")
REPO_ID = "google/gemma-2b-it-GGUF"
MODEL_FILENAME = "gemma-2b-it.gguf"
LOCAL_MODEL_PATH = f"/code/models/{MODEL_FILENAME}"
CACHE_DIR = "/code/cache"
os.makedirs(os.path.dirname(LOCAL_MODEL_PATH), exist_ok=True)
os.makedirs(CACHE_DIR, exist_ok=True)
def download_model():
try:
print("🔄 Attempting HF Hub download...")
model_path = hf_hub_download(
repo_id=REPO_ID,
filename=MODEL_FILENAME,
token=HF_TOKEN,
cache_dir=CACHE_DIR,
)
print("✅ Downloaded via hf_hub_download:", model_path)
return model_path
except Exception as e:
print("⚠️ hf_hub_download failed:", e)
print("🔁 Falling back to manual download...")
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
url = f"https://huggingface.co/{REPO_ID}/resolve/main/{MODEL_FILENAME}"
response = requests.get(url, headers=headers, stream=True)
response.raise_for_status()
with open(LOCAL_MODEL_PATH, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
print("✅ Manual download completed:", LOCAL_MODEL_PATH)
return LOCAL_MODEL_PATH
print("📦 Loading GGUF model...")
model_path = download_model()
llm = Llama(model_path=model_path)
def generate_structure(prompt: str) -> str:
output = llm(prompt, max_tokens=512)
return output["choices"][0]["text"].strip()