Bella / app.py
shawno's picture
Rename requirements.txt to app.py
a590bd1 verified
raw
history blame
2.15 kB
import os
from llama_cpp import Llama
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.utils import embedding_functions
from fastapi import FastAPI, Query
import gradio as gr
# 1. Load model via llama-cpp-python
model = Llama.from_pretrained(
repo_id="openbmb/MiniCPM-V-2_6-gguf",
filename="*.gguf",
n_ctx=4096,
)
# 2. Setup RAG
embedder = SentenceTransformer("all-MiniLM-L6-v2")
client = chromadb.PersistentClient(path="chroma_db")
col = client.get_or_create_collection(
"docs",
embedding_function=embedding_functions.SentenceTransformerEmbeddingFunction(
model_name="all-MiniLM-L6-v2"
)
)
# Seed with example context
seed_texts = [
"MiniCPM‑V‑2_6‑gguf runs well on CPU via llama.cpp.",
"This model supports RAG with Chromadb and FastAPI + Gradio UI."
]
for t in seed_texts:
col.add(documents=[t], ids=[str(hash(t))])
def rag_query(q: str) -> str:
results = col.query(
query_embeddings=[embedder.encode(q)],
n_results=3
)
context = "\n".join(results["documents"][0])
prompt = f"Context:\n{context}\n\nUser: {q}\nAssistant:"
out = model.create(prompt=prompt, max_tokens=256, temperature=0.7)
return out["choices"][0]["text"]
# 3. FastAPI app
app = FastAPI()
@app.get("/ask")
def ask(q: str = Query(...)):
return {"answer": rag_query(q)}
@app.post("/ask")
def ask_post(body: dict):
return ask(q=body.get("q",""))
# 4. Gradio UI
def chat_fn(message, history):
reply = rag_query(message)
history = history or []
history.append(("User", message))
history.append(("Assistant", reply))
return history, history
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
txt = gr.Textbox(placeholder="Ask me...", show_label=False)
txt.submit(chat_fn, [txt, chatbot], [chatbot, chatbot])
gr.Markdown("### 🧠 MiniCPM‑V‑2_6‑gguf RAG Chat (GET & POST API support)")
@app.on_event("startup")
def startup():
demo.queue().launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT",7860)))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)