File size: 884 Bytes
d9d6b2c
deb83c9
d9d6b2c
 
 
deb83c9
 
 
d9d6b2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231cb7b
deb83c9
072df7d
d9d6b2c
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from fastapi import FastAPI, Request
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware
from llama_cpp import Llama
import os

app = FastAPI()

# Load model
MODEL_PATH = "./models/gemma-2b-it.gguf"
llm = Llama(model_path=MODEL_PATH, n_ctx=512)

# Allow CORS (so frontend or Swagger can work)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # change to frontend origin in production
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Input model
class PromptInput(BaseModel):
    prompt: str

@app.post("/prompt")
async def generate_response(data: PromptInput):
    output = llm(data.prompt, max_tokens=512, stop=["</s>", "\n\n"], echo=False)
    return {"response": output["choices"][0]["text"].strip()}

# Healthcheck
@app.get("/")
def read_root():
    return {"message": "AI Builder Backend running"}