ThongCoding commited on
Commit
96e3318
·
1 Parent(s): bfb014a
Files changed (1) hide show
  1. model.py +7 -6
model.py CHANGED
@@ -4,7 +4,7 @@ from llama_cpp import Llama
4
 
5
  HF_TOKEN = os.getenv("HF_TOKEN")
6
  MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
7
- MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
8
  MODEL_PATH = f"./models/{MODEL_FILENAME}"
9
 
10
  # Manual download with fallback
@@ -25,11 +25,12 @@ if not os.path.exists(MODEL_PATH):
25
 
26
  # Load with llama-cpp
27
  llm = Llama(
28
- model_path=MODEL_PATH,
29
- n_ctx=512,
30
- n_threads=4, # Adjust based on your CPU
31
- use_mmap=True,
32
- use_mlock=False,
 
33
  )
34
 
35
  def generate_structure(prompt: str) -> str:
 
4
 
5
  HF_TOKEN = os.getenv("HF_TOKEN")
6
  MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
7
+ MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q2_K.gguf"
8
  MODEL_PATH = f"./models/{MODEL_FILENAME}"
9
 
10
  # Manual download with fallback
 
25
 
26
  # Load with llama-cpp
27
  llm = Llama(
28
+ model_path="./models/tinyllama-1.1b-chat-v1.0.Q2_K.gguf",
29
+ n_ctx=128, # Limit context for smaller RAM/CPU
30
+ n_batch=32,
31
+ n_threads=2, # Use 2 threads (you can try 1 if needed)
32
+ n_gpu_layers=0, # CPU-only
33
+ chat_format="llama-2"
34
  )
35
 
36
  def generate_structure(prompt: str) -> str: