ThongCoding commited on
Commit
6bf37cd
·
1 Parent(s): 513e3d3
Files changed (2) hide show
  1. app.py +50 -24
  2. model.py +0 -49
app.py CHANGED
@@ -1,34 +1,60 @@
1
- from fastapi import FastAPI, Request
2
  from pydantic import BaseModel
3
- from fastapi.middleware.cors import CORSMiddleware
4
  from llama_cpp import Llama
5
  import os
6
 
7
- app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- # Load model
10
- MODEL_PATH = "./models/gemma-2b-it.gguf"
11
- llm = Llama(model_path=MODEL_PATH, n_ctx=512)
12
-
13
- # Allow CORS (so frontend or Swagger can work)
14
- app.add_middleware(
15
- CORSMiddleware,
16
- allow_origins=["*"], # change to frontend origin in production
17
- allow_credentials=True,
18
- allow_methods=["*"],
19
- allow_headers=["*"],
20
  )
21
 
22
- # Input model
23
- class PromptInput(BaseModel):
 
 
24
  prompt: str
25
 
26
  @app.post("/prompt")
27
- async def generate_response(data: PromptInput):
28
- output = llm(data.prompt, max_tokens=512, stop=["</s>", "\n\n"], echo=False)
29
- return {"response": output["choices"][0]["text"].strip()}
30
-
31
- # Healthcheck
32
- @app.get("/")
33
- def read_root():
34
- return {"message": "AI Builder Backend running"}
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
+ from huggingface_hub import hf_hub_download
4
  from llama_cpp import Llama
5
  import os
6
 
7
+ REPO_ID = "google/gemma-2b-it-GGUF"
8
+ FILENAME = "gemma-2b-it.gguf"
9
+ HF_TOKEN = os.environ.get("HF_TOKEN") # must be set in HF Spaces Secrets
10
+ MODEL_DIR = "./models"
11
+ MODEL_PATH = os.path.join(MODEL_DIR, FILENAME)
12
+
13
+ # Step 1: Auto-download model if not exists
14
+ if not os.path.exists(MODEL_PATH):
15
+ os.makedirs(MODEL_DIR, exist_ok=True)
16
+ try:
17
+ print("📦 Downloading model from Hugging Face Hub...")
18
+ hf_hub_download(
19
+ repo_id=REPO_ID,
20
+ filename=FILENAME,
21
+ token=HF_TOKEN,
22
+ local_dir=MODEL_DIR,
23
+ local_dir_use_symlinks=False
24
+ )
25
+ print("✅ Model downloaded.")
26
+ except Exception as e:
27
+ print(f"❌ Download failed: {e}")
28
+ raise
29
 
30
+ # Step 2: Load model using llama-cpp-python
31
+ print("🤖 Loading GGUF model...")
32
+ llm = Llama(
33
+ model_path=MODEL_PATH,
34
+ n_ctx=512,
35
+ n_threads=4,
36
+ n_batch=512,
37
+ verbose=False
 
 
 
38
  )
39
 
40
+ # Step 3: FastAPI app
41
+ app = FastAPI()
42
+
43
+ class PromptRequest(BaseModel):
44
  prompt: str
45
 
46
  @app.post("/prompt")
47
+ def generate_prompt(req: PromptRequest):
48
+ prompt = req.prompt.strip()
49
+
50
+ output = llm(
51
+ prompt,
52
+ max_tokens=512,
53
+ temperature=0.6,
54
+ top_p=0.95,
55
+ stop=["<|endoftext|>", "</s>", "```"],
56
+ echo=False
57
+ )
58
+
59
+ result = output["choices"][0]["text"].strip()
60
+ return {"response": result}
model.py DELETED
@@ -1,49 +0,0 @@
1
- import os
2
- import requests
3
- from huggingface_hub import hf_hub_download, HfApi
4
- from llama_cpp import Llama
5
-
6
- HF_TOKEN = os.environ.get("HF_TOKEN")
7
- REPO_ID = "google/gemma-2b-it-GGUF"
8
- MODEL_FILENAME = "gemma-2b-it.gguf"
9
- LOCAL_MODEL_PATH = f"/models/{MODEL_FILENAME}"
10
- CACHE_DIR = "/cache"
11
-
12
- os.makedirs(os.path.dirname(LOCAL_MODEL_PATH), exist_ok=True)
13
- os.makedirs(CACHE_DIR, exist_ok=True)
14
-
15
- def download_model():
16
- try:
17
- print("🔄 Attempting HF Hub download...")
18
- model_path = hf_hub_download(
19
- repo_id=REPO_ID,
20
- filename=MODEL_FILENAME,
21
- token=HF_TOKEN,
22
- cache_dir=CACHE_DIR,
23
- )
24
- print("✅ Downloaded via hf_hub_download:", model_path)
25
- return model_path
26
- except Exception as e:
27
- print("⚠️ hf_hub_download failed:", e)
28
- print("🔁 Falling back to manual download...")
29
-
30
- headers = {"Authorization": f"Bearer {HF_TOKEN}"}
31
- url = f"https://huggingface.co/{REPO_ID}/resolve/main/{MODEL_FILENAME}"
32
- response = requests.get(url, headers=headers, stream=True)
33
- response.raise_for_status()
34
-
35
- with open(LOCAL_MODEL_PATH, "wb") as f:
36
- for chunk in response.iter_content(chunk_size=8192):
37
- if chunk:
38
- f.write(chunk)
39
- print("✅ Manual download completed:", LOCAL_MODEL_PATH)
40
- return LOCAL_MODEL_PATH
41
-
42
-
43
- print("📦 Loading GGUF model...")
44
- model_path = download_model()
45
- llm = Llama(model_path=model_path)
46
-
47
- def generate_structure(prompt: str) -> str:
48
- output = llm(prompt, max_tokens=512)
49
- return output["choices"][0]["text"].strip()