| import os | |
| import requests | |
| from huggingface_hub import hf_hub_download, HfApi | |
| from llama_cpp import Llama | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| REPO_ID = "google/gemma-2b-it-GGUF" | |
| MODEL_FILENAME = "gemma-2b-it.gguf" | |
| LOCAL_MODEL_PATH = f"/code/models/{MODEL_FILENAME}" | |
| CACHE_DIR = "/code/cache" | |
| os.makedirs(os.path.dirname(LOCAL_MODEL_PATH), exist_ok=True) | |
| os.makedirs(CACHE_DIR, exist_ok=True) | |
| def download_model(): | |
| try: | |
| print("🔄 Attempting HF Hub download...") | |
| model_path = hf_hub_download( | |
| repo_id=REPO_ID, | |
| filename=MODEL_FILENAME, | |
| token=HF_TOKEN, | |
| cache_dir=CACHE_DIR, | |
| ) | |
| print("✅ Downloaded via hf_hub_download:", model_path) | |
| return model_path | |
| except Exception as e: | |
| print("⚠️ hf_hub_download failed:", e) | |
| print("🔁 Falling back to manual download...") | |
| headers = {"Authorization": f"Bearer {HF_TOKEN}"} | |
| url = f"https://huggingface.co/{REPO_ID}/resolve/main/{MODEL_FILENAME}" | |
| response = requests.get(url, headers=headers, stream=True) | |
| response.raise_for_status() | |
| with open(LOCAL_MODEL_PATH, "wb") as f: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| if chunk: | |
| f.write(chunk) | |
| print("✅ Manual download completed:", LOCAL_MODEL_PATH) | |
| return LOCAL_MODEL_PATH | |
| print("📦 Loading GGUF model...") | |
| model_path = download_model() | |
| llm = Llama(model_path=model_path) | |
| def generate_structure(prompt: str) -> str: | |
| output = llm(prompt, max_tokens=512) | |
| return output["choices"][0]["text"].strip() | |