Spaces:

ThongCoder
/

minecraft-ai-builder-backend

Paused

ThongCoding commited on Jul 13

Commit

bfb014a

1 Parent(s): 61c7e2b

sd

Files changed (1) hide show

model.py CHANGED Viewed

@@ -1,38 +1,41 @@
 import os
 from llama_cpp import Llama
-from huggingface_hub import hf_hub_download
-# Model parameters
-REPO_ID = "TheBloke/phi-2-GGUF"
-MODEL_FILENAME = "phi-2.Q4_K_M.gguf"
 MODEL_PATH = f"./models/{MODEL_FILENAME}"
-# Auto-download if model not present
 if not os.path.exists(MODEL_PATH):
-    os.makedirs("./models", exist_ok=True)
     print("📦 Downloading GGUF model manually from Hugging Face...")
-    MODEL_PATH = hf_hub_download(
-        repo_id=REPO_ID,
-        filename=MODEL_FILENAME,
-        cache_dir="./models",  # <== force download to writable folder
-        local_dir="./models",
-        local_dir_use_symlinks=False
-    )
     print(f"✅ Model downloaded to {MODEL_PATH}")
-# Initialize Llama model
 llm = Llama(
     model_path=MODEL_PATH,
     n_ctx=512,
-    n_threads=2,
-    verbose=True
 )
 def generate_structure(prompt: str) -> str:
-    response = llm.create_chat_completion(
-        messages=[{"role": "user", "content": prompt}],
-        temperature=0.4,
-        top_p=0.95,
-        max_tokens=1024,
     )
-    return response["choices"][0]["message"]["content"]

 import os
+import requests
 from llama_cpp import Llama
+HF_TOKEN = os.getenv("HF_TOKEN")
+MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
 MODEL_PATH = f"./models/{MODEL_FILENAME}"
+# Manual download with fallback
 if not os.path.exists(MODEL_PATH):
     print("📦 Downloading GGUF model manually from Hugging Face...")
+    url = f"https://huggingface.co/{MODEL_REPO}/resolve/main/{MODEL_FILENAME}"
+    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
+    os.makedirs("./models", exist_ok=True)
+    with requests.get(url, headers=headers, stream=True) as r:
+        r.raise_for_status()
+        with open(MODEL_PATH, "wb") as f:
+            for chunk in r.iter_content(chunk_size=8192):
+                f.write(chunk)
     print(f"✅ Model downloaded to {MODEL_PATH}")
+# Load with llama-cpp
 llm = Llama(
     model_path=MODEL_PATH,
     n_ctx=512,
+    n_threads=4,  # Adjust based on your CPU
+    use_mmap=True,
+    use_mlock=False,
 )
 def generate_structure(prompt: str) -> str:
+    output = llm.create_completion(
+        prompt=prompt,
+        temperature=0.7,
+        max_tokens=512,
     )
+    return output["choices"][0]["text"].strip()