vaigunthan commited on
Commit
78e0339
·
verified ·
1 Parent(s): 5fd581d

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +92 -0
  2. requirements.txt +3 -3
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import math
4
+ import requests
5
+ import pdfplumber
6
+ import gradio as gr
7
+
8
+ HF_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
9
+ EMBED_MODEL = "ibm-granite/granite-embedding-english-r2"
10
+ GEN_MODEL = "ibm-granite/granite-3.3-2b-instruct"
11
+
12
+ store = [] # simple in-memory vector store
13
+
14
+
15
+ def hf_request(model, payload):
16
+ res = requests.post(
17
+ f"https://api-inference.huggingface.co/models/{model}",
18
+ headers={"Authorization": f"Bearer {HF_TOKEN}"},
19
+ json=payload,
20
+ )
21
+ res.raise_for_status()
22
+ return res.json()
23
+
24
+
25
+ def cosine(a, b):
26
+ dot = sum(x * y for x, y in zip(a, b))
27
+ na = math.sqrt(sum(x * x for x in a))
28
+ nb = math.sqrt(sum(y * y for y in b))
29
+ return dot / (na * nb + 1e-9)
30
+
31
+
32
+ def upload_pdf(pdf_file):
33
+ global store
34
+ if pdf_file is None:
35
+ return "Please upload a PDF first."
36
+
37
+ with open(pdf_file.name, "rb") as f:
38
+ pdf_bytes = f.read()
39
+
40
+ text = ""
41
+ with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
42
+ for page in pdf.pages:
43
+ text += page.extract_text() or ""
44
+
45
+ chunks, buf = [], ""
46
+ for sent in text.split(". "):
47
+ if len(buf) + len(sent) > 800:
48
+ chunks.append(buf.strip())
49
+ buf = sent
50
+ else:
51
+ buf += " " + sent
52
+ if buf:
53
+ chunks.append(buf.strip())
54
+
55
+ if not chunks:
56
+ return "No text extracted from PDF."
57
+
58
+ embeds = hf_request(EMBED_MODEL, {"inputs": chunks})
59
+ store = [{"text": c, "vec": embeds[i]} for i, c in enumerate(chunks)]
60
+
61
+ return f"✅ PDF processed. {len(store)} chunks indexed."
62
+
63
+
64
+ def ask_question(q):
65
+ if not store:
66
+ return "⚠️ Please upload a PDF first."
67
+
68
+ q_embed = hf_request(EMBED_MODEL, {"inputs": [q]})[0]
69
+ best = max(store, key=lambda it: cosine(q_embed, it["vec"]))
70
+
71
+ prompt = f"Answer the question using this context:\n{best['text']}\n\nQ: {q}"
72
+ out = hf_request(GEN_MODEL, {"inputs": prompt})
73
+
74
+ return out[0].get("generated_text", "No answer")
75
+
76
+
77
+ with gr.Blocks() as demo:
78
+ gr.Markdown("# 📘 StudyMate — PDF Q&A with IBM Granite")
79
+ with gr.Row():
80
+ pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"])
81
+ upload_btn = gr.Button("Process PDF")
82
+ status = gr.Textbox(label="Status", interactive=False)
83
+
84
+ with gr.Row():
85
+ question = gr.Textbox(label="Ask a Question")
86
+ ask_btn = gr.Button("Get Answer")
87
+ answer = gr.Textbox(label="Answer", interactive=False)
88
+
89
+ upload_btn.click(upload_pdf, inputs=pdf_file, outputs=status)
90
+ ask_btn.click(ask_question, inputs=question, outputs=answer)
91
+
92
+ demo.launch()
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
- altair
2
- pandas
3
- streamlit
 
1
+ gradio
2
+ pdfplumber
3
+ requests