ThongCoding's picture
wdsa
d9d6b2c
raw
history blame
884 Bytes
from fastapi import FastAPI, Request
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware
from llama_cpp import Llama
import os
app = FastAPI()
# Load model
MODEL_PATH = "./models/gemma-2b-it.gguf"
llm = Llama(model_path=MODEL_PATH, n_ctx=512)
# Allow CORS (so frontend or Swagger can work)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # change to frontend origin in production
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Input model
class PromptInput(BaseModel):
prompt: str
@app.post("/prompt")
async def generate_response(data: PromptInput):
output = llm(data.prompt, max_tokens=512, stop=["</s>", "\n\n"], echo=False)
return {"response": output["choices"][0]["text"].strip()}
# Healthcheck
@app.get("/")
def read_root():
return {"message": "AI Builder Backend running"}