ThongCoding commited on
Commit
bfb014a
·
1 Parent(s): 61c7e2b
Files changed (1) hide show
  1. model.py +25 -22
model.py CHANGED
@@ -1,38 +1,41 @@
1
  import os
 
2
  from llama_cpp import Llama
3
- from huggingface_hub import hf_hub_download
4
 
5
- # Model parameters
6
- REPO_ID = "TheBloke/phi-2-GGUF"
7
- MODEL_FILENAME = "phi-2.Q4_K_M.gguf"
8
  MODEL_PATH = f"./models/{MODEL_FILENAME}"
9
 
10
- # Auto-download if model not present
11
  if not os.path.exists(MODEL_PATH):
12
- os.makedirs("./models", exist_ok=True)
13
  print("📦 Downloading GGUF model manually from Hugging Face...")
14
- MODEL_PATH = hf_hub_download(
15
- repo_id=REPO_ID,
16
- filename=MODEL_FILENAME,
17
- cache_dir="./models", # <== force download to writable folder
18
- local_dir="./models",
19
- local_dir_use_symlinks=False
20
- )
 
 
 
 
21
  print(f"✅ Model downloaded to {MODEL_PATH}")
22
 
23
- # Initialize Llama model
24
  llm = Llama(
25
  model_path=MODEL_PATH,
26
  n_ctx=512,
27
- n_threads=2,
28
- verbose=True
 
29
  )
30
 
31
  def generate_structure(prompt: str) -> str:
32
- response = llm.create_chat_completion(
33
- messages=[{"role": "user", "content": prompt}],
34
- temperature=0.4,
35
- top_p=0.95,
36
- max_tokens=1024,
37
  )
38
- return response["choices"][0]["message"]["content"]
 
1
  import os
2
+ import requests
3
  from llama_cpp import Llama
 
4
 
5
+ HF_TOKEN = os.getenv("HF_TOKEN")
6
+ MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
7
+ MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
8
  MODEL_PATH = f"./models/{MODEL_FILENAME}"
9
 
10
+ # Manual download with fallback
11
  if not os.path.exists(MODEL_PATH):
 
12
  print("📦 Downloading GGUF model manually from Hugging Face...")
13
+
14
+ url = f"https://huggingface.co/{MODEL_REPO}/resolve/main/{MODEL_FILENAME}"
15
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
16
+
17
+ os.makedirs("./models", exist_ok=True)
18
+ with requests.get(url, headers=headers, stream=True) as r:
19
+ r.raise_for_status()
20
+ with open(MODEL_PATH, "wb") as f:
21
+ for chunk in r.iter_content(chunk_size=8192):
22
+ f.write(chunk)
23
+
24
  print(f"✅ Model downloaded to {MODEL_PATH}")
25
 
26
+ # Load with llama-cpp
27
  llm = Llama(
28
  model_path=MODEL_PATH,
29
  n_ctx=512,
30
+ n_threads=4, # Adjust based on your CPU
31
+ use_mmap=True,
32
+ use_mlock=False,
33
  )
34
 
35
  def generate_structure(prompt: str) -> str:
36
+ output = llm.create_completion(
37
+ prompt=prompt,
38
+ temperature=0.7,
39
+ max_tokens=512,
 
40
  )
41
+ return output["choices"][0]["text"].strip()