project / app.py
vaigunthan's picture
Upload 2 files
78e0339 verified
import os
import io
import math
import requests
import pdfplumber
import gradio as gr
HF_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
EMBED_MODEL = "ibm-granite/granite-embedding-english-r2"
GEN_MODEL = "ibm-granite/granite-3.3-2b-instruct"
store = [] # simple in-memory vector store
def hf_request(model, payload):
res = requests.post(
f"https://api-inference.huggingface.co/models/{model}",
headers={"Authorization": f"Bearer {HF_TOKEN}"},
json=payload,
)
res.raise_for_status()
return res.json()
def cosine(a, b):
dot = sum(x * y for x, y in zip(a, b))
na = math.sqrt(sum(x * x for x in a))
nb = math.sqrt(sum(y * y for y in b))
return dot / (na * nb + 1e-9)
def upload_pdf(pdf_file):
global store
if pdf_file is None:
return "Please upload a PDF first."
with open(pdf_file.name, "rb") as f:
pdf_bytes = f.read()
text = ""
with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
for page in pdf.pages:
text += page.extract_text() or ""
chunks, buf = [], ""
for sent in text.split(". "):
if len(buf) + len(sent) > 800:
chunks.append(buf.strip())
buf = sent
else:
buf += " " + sent
if buf:
chunks.append(buf.strip())
if not chunks:
return "No text extracted from PDF."
embeds = hf_request(EMBED_MODEL, {"inputs": chunks})
store = [{"text": c, "vec": embeds[i]} for i, c in enumerate(chunks)]
return f"βœ… PDF processed. {len(store)} chunks indexed."
def ask_question(q):
if not store:
return "⚠️ Please upload a PDF first."
q_embed = hf_request(EMBED_MODEL, {"inputs": [q]})[0]
best = max(store, key=lambda it: cosine(q_embed, it["vec"]))
prompt = f"Answer the question using this context:\n{best['text']}\n\nQ: {q}"
out = hf_request(GEN_MODEL, {"inputs": prompt})
return out[0].get("generated_text", "No answer")
with gr.Blocks() as demo:
gr.Markdown("# πŸ“˜ StudyMate β€” PDF Q&A with IBM Granite")
with gr.Row():
pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"])
upload_btn = gr.Button("Process PDF")
status = gr.Textbox(label="Status", interactive=False)
with gr.Row():
question = gr.Textbox(label="Ask a Question")
ask_btn = gr.Button("Get Answer")
answer = gr.Textbox(label="Answer", interactive=False)
upload_btn.click(upload_pdf, inputs=pdf_file, outputs=status)
ask_btn.click(ask_question, inputs=question, outputs=answer)
demo.launch()