|
|
from fastapi import FastAPI, Request |
|
|
from pydantic import BaseModel |
|
|
from fastapi.middleware.cors import CORSMiddleware |
|
|
from llama_cpp import Llama |
|
|
import os |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
|
|
|
MODEL_PATH = "./models/gemma-2b-it.gguf" |
|
|
llm = Llama(model_path=MODEL_PATH, n_ctx=512) |
|
|
|
|
|
|
|
|
app.add_middleware( |
|
|
CORSMiddleware, |
|
|
allow_origins=["*"], |
|
|
allow_credentials=True, |
|
|
allow_methods=["*"], |
|
|
allow_headers=["*"], |
|
|
) |
|
|
|
|
|
|
|
|
class PromptInput(BaseModel): |
|
|
prompt: str |
|
|
|
|
|
@app.post("/prompt") |
|
|
async def generate_response(data: PromptInput): |
|
|
output = llm(data.prompt, max_tokens=512, stop=["</s>", "\n\n"], echo=False) |
|
|
return {"response": output["choices"][0]["text"].strip()} |
|
|
|
|
|
|
|
|
@app.get("/") |
|
|
def read_root(): |
|
|
return {"message": "AI Builder Backend running"} |
|
|
|