Spaces:

ThongCoder
/

minecraft-ai-builder-backend

Paused

ThongCoding commited on Jul 13

Commit

96e3318

1 Parent(s): bfb014a

w

Files changed (1) hide show

model.py CHANGED Viewed

@@ -4,7 +4,7 @@ from llama_cpp import Llama
 HF_TOKEN = os.getenv("HF_TOKEN")
 MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
-MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
 MODEL_PATH = f"./models/{MODEL_FILENAME}"
 # Manual download with fallback
@@ -25,11 +25,12 @@ if not os.path.exists(MODEL_PATH):
 # Load with llama-cpp
 llm = Llama(
-    model_path=MODEL_PATH,
-    n_ctx=512,
-    n_threads=4,  # Adjust based on your CPU
-    use_mmap=True,
-    use_mlock=False,
 )
 def generate_structure(prompt: str) -> str:

 HF_TOKEN = os.getenv("HF_TOKEN")
 MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q2_K.gguf"
 MODEL_PATH = f"./models/{MODEL_FILENAME}"
 # Manual download with fallback
 # Load with llama-cpp
 llm = Llama(
+    model_path="./models/tinyllama-1.1b-chat-v1.0.Q2_K.gguf",
+    n_ctx=128,          # Limit context for smaller RAM/CPU
+    n_batch=32,
+    n_threads=2,        # Use 2 threads (you can try 1 if needed)
+    n_gpu_layers=0,     # CPU-only
+    chat_format="llama-2"
 )
 def generate_structure(prompt: str) -> str: