File size: 2,580 Bytes
78e0339
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import os
import io
import math
import requests
import pdfplumber
import gradio as gr

HF_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
EMBED_MODEL = "ibm-granite/granite-embedding-english-r2"
GEN_MODEL = "ibm-granite/granite-3.3-2b-instruct"

store = []  # simple in-memory vector store


def hf_request(model, payload):
    res = requests.post(
        f"https://api-inference.huggingface.co/models/{model}",
        headers={"Authorization": f"Bearer {HF_TOKEN}"},
        json=payload,
    )
    res.raise_for_status()
    return res.json()


def cosine(a, b):
    dot = sum(x * y for x, y in zip(a, b))
    na = math.sqrt(sum(x * x for x in a))
    nb = math.sqrt(sum(y * y for y in b))
    return dot / (na * nb + 1e-9)


def upload_pdf(pdf_file):
    global store
    if pdf_file is None:
        return "Please upload a PDF first."

    with open(pdf_file.name, "rb") as f:
        pdf_bytes = f.read()

    text = ""
    with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
        for page in pdf.pages:
            text += page.extract_text() or ""

    chunks, buf = [], ""
    for sent in text.split(". "):
        if len(buf) + len(sent) > 800:
            chunks.append(buf.strip())
            buf = sent
        else:
            buf += " " + sent
    if buf:
        chunks.append(buf.strip())

    if not chunks:
        return "No text extracted from PDF."

    embeds = hf_request(EMBED_MODEL, {"inputs": chunks})
    store = [{"text": c, "vec": embeds[i]} for i, c in enumerate(chunks)]

    return f"βœ… PDF processed. {len(store)} chunks indexed."


def ask_question(q):
    if not store:
        return "⚠️ Please upload a PDF first."

    q_embed = hf_request(EMBED_MODEL, {"inputs": [q]})[0]
    best = max(store, key=lambda it: cosine(q_embed, it["vec"]))

    prompt = f"Answer the question using this context:\n{best['text']}\n\nQ: {q}"
    out = hf_request(GEN_MODEL, {"inputs": prompt})

    return out[0].get("generated_text", "No answer")


with gr.Blocks() as demo:
    gr.Markdown("# πŸ“˜ StudyMate β€” PDF Q&A with IBM Granite")
    with gr.Row():
        pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"])
        upload_btn = gr.Button("Process PDF")
    status = gr.Textbox(label="Status", interactive=False)

    with gr.Row():
        question = gr.Textbox(label="Ask a Question")
        ask_btn = gr.Button("Get Answer")
    answer = gr.Textbox(label="Answer", interactive=False)

    upload_btn.click(upload_pdf, inputs=pdf_file, outputs=status)
    ask_btn.click(ask_question, inputs=question, outputs=answer)

demo.launch()