ThongCoding commited on
Commit
8d85a2c
·
1 Parent(s): d2f3a93
Files changed (2) hide show
  1. app.py +19 -12
  2. model.py +21 -24
app.py CHANGED
@@ -1,23 +1,30 @@
1
- from fastapi import FastAPI, Request
2
- from fastapi.responses import JSONResponse
 
3
  from model import generate_structure
4
  import uvicorn
5
 
6
  app = FastAPI()
7
 
8
- @app.get("/")
9
- def index():
10
- return {"message": "Minecraft AI Builder Backend is running."}
 
 
 
 
 
 
 
 
11
 
12
  @app.post("/prompt")
13
- async def prompt_handler(req: Request):
14
  try:
15
- data = await req.json()
16
- prompt = data.get("prompt", "")
17
- response = generate_structure(prompt)
18
  return {"response": response}
19
  except Exception as e:
20
- return JSONResponse(status_code=500, content={"error": str(e)})
21
 
22
- if __name__ == "__main__":
23
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
  from model import generate_structure
5
  import uvicorn
6
 
7
  app = FastAPI()
8
 
9
+ # Allow all CORS (for testing or frontend use)
10
+ app.add_middleware(
11
+ CORSMiddleware,
12
+ allow_origins=["*"],
13
+ allow_credentials=True,
14
+ allow_methods=["*"],
15
+ allow_headers=["*"],
16
+ )
17
+
18
+ class PromptRequest(BaseModel):
19
+ prompt: str
20
 
21
  @app.post("/prompt")
22
+ async def prompt_route(data: PromptRequest):
23
  try:
24
+ response = generate_structure(data.prompt)
 
 
25
  return {"response": response}
26
  except Exception as e:
27
+ return {"error": str(e)}
28
 
29
+ if __name__ == "__main__":
30
+ uvicorn.run(app, host="0.0.0.0", port=7860)
model.py CHANGED
@@ -2,41 +2,38 @@ import os
2
  import requests
3
  from llama_cpp import Llama
4
 
5
- HF_TOKEN = os.getenv("HF_TOKEN")
6
- MODEL_REPO = "afrideva/TinyMistral-248M-SFT-v4-GGUF"
7
- MODEL_FILENAME = "tinymistral-248m-sft-v4.q2_k.gguf"
8
  MODEL_PATH = f"./models/{MODEL_FILENAME}"
9
 
10
- # Manual download with fallback
11
- if not os.path.exists(MODEL_PATH):
12
- print("📦 Downloading GGUF model manually from Hugging Face...")
13
-
14
- url = f"https://huggingface.co/{MODEL_REPO}/resolve/main/{MODEL_FILENAME}"
15
- headers = {"Authorization": f"Bearer {HF_TOKEN}"}
16
-
17
  os.makedirs("./models", exist_ok=True)
18
- with requests.get(url, headers=headers, stream=True) as r:
 
19
  r.raise_for_status()
20
  with open(MODEL_PATH, "wb") as f:
21
  for chunk in r.iter_content(chunk_size=8192):
22
  f.write(chunk)
23
-
24
  print(f"✅ Model downloaded to {MODEL_PATH}")
25
 
26
- # Load with llama-cpp
 
 
 
 
27
  llm = Llama(
28
  model_path=MODEL_PATH,
29
- n_ctx=256,
30
- n_threads=2,
31
- n_batch=32,
32
- n_gpu_layers=0,
33
- chat_format=None # Not using llama-2 format
34
  )
35
 
36
  def generate_structure(prompt: str) -> str:
37
- output = llm.create_completion(
38
- prompt=prompt,
39
- temperature=0.7,
40
- max_tokens=512,
41
- )
42
- return output["choices"][0]["text"].strip()
 
2
  import requests
3
  from llama_cpp import Llama
4
 
5
+ MODEL_URL = "https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.Q4_K_M.gguf"
6
+ MODEL_FILENAME = "Llama-3.2-1B-Instruct.Q4_K_M.gguf"
 
7
  MODEL_PATH = f"./models/{MODEL_FILENAME}"
8
 
9
+ def download_model():
 
 
 
 
 
 
10
  os.makedirs("./models", exist_ok=True)
11
+ print("📦 Downloading GGUF model directly from URL...")
12
+ with requests.get(MODEL_URL, stream=True) as r:
13
  r.raise_for_status()
14
  with open(MODEL_PATH, "wb") as f:
15
  for chunk in r.iter_content(chunk_size=8192):
16
  f.write(chunk)
 
17
  print(f"✅ Model downloaded to {MODEL_PATH}")
18
 
19
+ # Only download if not already present
20
+ if not os.path.exists(MODEL_PATH):
21
+ download_model()
22
+
23
+ # Load model with llama-cpp-python
24
  llm = Llama(
25
  model_path=MODEL_PATH,
26
+ n_ctx=512,
27
+ n_batch=512,
28
+ n_threads=6,
29
+ chat_format="llama-3",
30
+ verbose=False
31
  )
32
 
33
  def generate_structure(prompt: str) -> str:
34
+ messages = [
35
+ {"role": "system", "content": "You are a Minecraft-style structure planner. You always respond with strictly valid JSON describing a 3D structure."},
36
+ {"role": "user", "content": prompt}
37
+ ]
38
+ output = llm.create_chat_completion(messages=messages)
39
+ return output["choices"][0]["message"]["content"]