Spaces:
Runtime error
Runtime error
| import os | |
| import io | |
| import math | |
| import requests | |
| import pdfplumber | |
| import gradio as gr | |
| HF_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN") | |
| EMBED_MODEL = "ibm-granite/granite-embedding-english-r2" | |
| GEN_MODEL = "ibm-granite/granite-3.3-2b-instruct" | |
| store = [] # simple in-memory vector store | |
| def hf_request(model, payload): | |
| res = requests.post( | |
| f"https://api-inference.huggingface.co/models/{model}", | |
| headers={"Authorization": f"Bearer {HF_TOKEN}"}, | |
| json=payload, | |
| ) | |
| res.raise_for_status() | |
| return res.json() | |
| def cosine(a, b): | |
| dot = sum(x * y for x, y in zip(a, b)) | |
| na = math.sqrt(sum(x * x for x in a)) | |
| nb = math.sqrt(sum(y * y for y in b)) | |
| return dot / (na * nb + 1e-9) | |
| def upload_pdf(pdf_file): | |
| global store | |
| if pdf_file is None: | |
| return "Please upload a PDF first." | |
| with open(pdf_file.name, "rb") as f: | |
| pdf_bytes = f.read() | |
| text = "" | |
| with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf: | |
| for page in pdf.pages: | |
| text += page.extract_text() or "" | |
| chunks, buf = [], "" | |
| for sent in text.split(". "): | |
| if len(buf) + len(sent) > 800: | |
| chunks.append(buf.strip()) | |
| buf = sent | |
| else: | |
| buf += " " + sent | |
| if buf: | |
| chunks.append(buf.strip()) | |
| if not chunks: | |
| return "No text extracted from PDF." | |
| embeds = hf_request(EMBED_MODEL, {"inputs": chunks}) | |
| store = [{"text": c, "vec": embeds[i]} for i, c in enumerate(chunks)] | |
| return f"β PDF processed. {len(store)} chunks indexed." | |
| def ask_question(q): | |
| if not store: | |
| return "β οΈ Please upload a PDF first." | |
| q_embed = hf_request(EMBED_MODEL, {"inputs": [q]})[0] | |
| best = max(store, key=lambda it: cosine(q_embed, it["vec"])) | |
| prompt = f"Answer the question using this context:\n{best['text']}\n\nQ: {q}" | |
| out = hf_request(GEN_MODEL, {"inputs": prompt}) | |
| return out[0].get("generated_text", "No answer") | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π StudyMate β PDF Q&A with IBM Granite") | |
| with gr.Row(): | |
| pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| upload_btn = gr.Button("Process PDF") | |
| status = gr.Textbox(label="Status", interactive=False) | |
| with gr.Row(): | |
| question = gr.Textbox(label="Ask a Question") | |
| ask_btn = gr.Button("Get Answer") | |
| answer = gr.Textbox(label="Answer", interactive=False) | |
| upload_btn.click(upload_pdf, inputs=pdf_file, outputs=status) | |
| ask_btn.click(ask_question, inputs=question, outputs=answer) | |
| demo.launch() | |