Spaces:

ThongCoder
/

minecraft-ai-builder-backend

Paused

ThongCoding commited on Jul 13

Commit

3dc4bd8

1 Parent(s): 96e3318

sss

Files changed (1) hide show

model.py CHANGED Viewed

@@ -3,8 +3,8 @@ import requests
 from llama_cpp import Llama
 HF_TOKEN = os.getenv("HF_TOKEN")
-MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
-MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q2_K.gguf"
 MODEL_PATH = f"./models/{MODEL_FILENAME}"
 # Manual download with fallback
@@ -25,12 +25,12 @@ if not os.path.exists(MODEL_PATH):
 # Load with llama-cpp
 llm = Llama(
-    model_path="./models/tinyllama-1.1b-chat-v1.0.Q2_K.gguf",
-    n_ctx=128,          # Limit context for smaller RAM/CPU
     n_batch=32,
-    n_threads=2,        # Use 2 threads (you can try 1 if needed)
-    n_gpu_layers=0,     # CPU-only
-    chat_format="llama-2"
 )
 def generate_structure(prompt: str) -> str:

 from llama_cpp import Llama
 HF_TOKEN = os.getenv("HF_TOKEN")
+MODEL_REPO = "afrideva/TinyMistral-248M-SFT-v4-GGUF"
+MODEL_FILENAME = "TinyMistral-248M-SFT-v4.Q4_K_M.gguf"
 MODEL_PATH = f"./models/{MODEL_FILENAME}"
 # Manual download with fallback
 # Load with llama-cpp
 llm = Llama(
+    model_path=MODEL_PATH,
+    n_ctx=256,
+    n_threads=2,
     n_batch=32,
+    n_gpu_layers=0,
+    chat_format=None  # Not using llama-2 format
 )
 def generate_structure(prompt: str) -> str: