rathil123 commited on
Commit
a2dfbcc
·
verified ·
1 Parent(s): bbc00e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -60
app.py CHANGED
@@ -1,30 +1,15 @@
1
- import torch
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import gradio as gr
 
4
  from PyPDF2 import PdfReader
5
 
6
  # -----------------------------
7
- # Load model with fallback
8
  # -----------------------------
9
- MODEL_NAME = "distilgpt2" # fallback if granite not available
10
-
11
- try:
12
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
13
- model = AutoModelForCausalLM.from_pretrained(
14
- MODEL_NAME,
15
- trust_remote_code=True,
16
- torch_dtype=torch.float32,
17
- device_map="auto"
18
- )
19
- except Exception as e:
20
- print(f"⚠️ Model load failed: {e}")
21
- print("Falling back to distilgpt2...")
22
- tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
23
- model = AutoModelForCausalLM.from_pretrained("distilgpt2")
24
-
25
- # Ensure pad token exists
26
- if tokenizer.pad_token is None:
27
- tokenizer.pad_token = tokenizer.eos_token
28
 
29
 
30
  # -----------------------------
@@ -33,67 +18,96 @@ if tokenizer.pad_token is None:
33
  def extract_text_from_pdf(pdf_file):
34
  text = ""
35
  try:
36
- # pdf_file is a dict in Gradio (with 'name' key)
37
  file_path = pdf_file.name if hasattr(pdf_file, "name") else pdf_file
38
  reader = PdfReader(file_path)
39
  for page in reader.pages:
40
- text += page.extract_text() or ""
 
 
41
  except Exception as e:
42
  text = f"❌ Error reading PDF: {e}"
43
- return text
44
 
45
 
46
  # -----------------------------
47
- # Response generation
48
  # -----------------------------
49
- def generate_response(pdf_file, user_query):
50
- pdf_text = extract_text_from_pdf(pdf_file)
51
- if not pdf_text.strip():
52
- return "⚠️ Could not extract text from the PDF."
53
-
54
- # Build prompt
55
- prompt = f"""
56
- You are a Sustainable Smart City Assistant.
57
- Here is context from a PDF document:
58
 
59
- {pdf_text[:2000]} # limit context length to avoid overflow
 
 
60
 
61
- User query: {user_query}
62
 
63
- Answer clearly and concisely:
64
- """
 
 
 
 
 
 
 
 
65
 
66
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(model.device)
 
67
 
68
- with torch.no_grad():
69
- outputs = model.generate(
70
- **inputs,
71
- max_new_tokens=200,
72
- temperature=0.7,
73
- pad_token_id=tokenizer.eos_token_id
74
- )
75
 
76
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
77
- return response
 
 
 
78
 
79
 
80
  # -----------------------------
81
- # Gradio UI
82
  # -----------------------------
83
  with gr.Blocks() as app:
84
- gr.Markdown("## 🌍 Sustainable Smart City Assistant")
85
-
86
- with gr.Row():
87
- pdf_file = gr.File(label="Upload PDF", type="file")
88
- user_query = gr.Textbox(label="Ask your question")
89
-
90
- output = gr.Textbox(label="Assistant Response")
91
-
92
- submit = gr.Button("Get Answer")
93
- submit.click(fn=generate_response, inputs=[pdf_file, user_query], outputs=output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  # -----------------------------
96
  # Run app
97
  # -----------------------------
98
  if __name__ == "__main__":
99
  app.launch()
 
 
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
  from PyPDF2 import PdfReader
4
 
5
  # -----------------------------
6
+ # Load HuggingFace pipelines
7
  # -----------------------------
8
+ # Small, lightweight generation model
9
+ generator = pipeline("text-generation", model="distilgpt2")
10
+
11
+ # Summarization model
12
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
 
15
  # -----------------------------
 
18
  def extract_text_from_pdf(pdf_file):
19
  text = ""
20
  try:
 
21
  file_path = pdf_file.name if hasattr(pdf_file, "name") else pdf_file
22
  reader = PdfReader(file_path)
23
  for page in reader.pages:
24
+ page_text = page.extract_text()
25
+ if page_text:
26
+ text += page_text + "\n"
27
  except Exception as e:
28
  text = f"❌ Error reading PDF: {e}"
29
+ return text.strip()
30
 
31
 
32
  # -----------------------------
33
+ # Eco Tips Generator
34
  # -----------------------------
35
+ def eco_tips_generator(problem_keywords):
36
+ if not problem_keywords.strip():
37
+ return "⚠️ Please enter some keywords."
 
 
 
 
 
 
38
 
39
+ prompt = f"Give 5 practical eco-friendly tips related to {problem_keywords}:"
40
+ result = generator(prompt, max_new_tokens=120, num_return_sequences=1, temperature=0.7)
41
+ return result[0]["generated_text"]
42
 
 
43
 
44
+ # -----------------------------
45
+ # Policy Summarization
46
+ # -----------------------------
47
+ def policy_summarization(pdf_file, policy_text):
48
+ # Extract text from PDF or use textbox input
49
+ content = ""
50
+ if pdf_file is not None:
51
+ content = extract_text_from_pdf(pdf_file)
52
+ elif policy_text.strip():
53
+ content = policy_text.strip()
54
 
55
+ if not content:
56
+ return "⚠️ Please upload a PDF or paste some policy text."
57
 
58
+ # Limit input size for summarizer
59
+ content = content[:3000]
 
 
 
 
 
60
 
61
+ try:
62
+ summary = summarizer(content, max_length=200, min_length=60, do_sample=False)
63
+ return summary[0]["summary_text"]
64
+ except Exception as e:
65
+ return f"❌ Error during summarization: {e}"
66
 
67
 
68
  # -----------------------------
69
+ # Gradio Interface
70
  # -----------------------------
71
  with gr.Blocks() as app:
72
+ gr.Markdown("# 🌍 Eco Assistant & Policy Analyzer")
73
+
74
+ with gr.Tabs():
75
+ # Eco Tips Tab
76
+ with gr.TabItem("♻️ Eco Tips Generator"):
77
+ with gr.Row():
78
+ with gr.Column():
79
+ keywords_input = gr.Textbox(
80
+ label="Environmental Problem/Keywords",
81
+ placeholder="e.g., plastic, solar, water waste, energy saving...",
82
+ lines=2
83
+ )
84
+ generate_tips_btn = gr.Button("Generate Eco Tips")
85
+
86
+ with gr.Column():
87
+ tips_output = gr.Textbox(label="Sustainable Living Tips", lines=10)
88
+
89
+ generate_tips_btn.click(eco_tips_generator, inputs=keywords_input, outputs=tips_output)
90
+
91
+ # Policy Summarization Tab
92
+ with gr.TabItem("📑 Policy Summarization"):
93
+ with gr.Row():
94
+ with gr.Column():
95
+ pdf_upload = gr.File(label="Upload Policy PDF", file_types=[".pdf"])
96
+ policy_text_input = gr.Textbox(
97
+ label="Or paste policy text here",
98
+ placeholder="Paste policy document text...",
99
+ lines=5
100
+ )
101
+ summarize_btn = gr.Button("Summarize Policy")
102
+
103
+ with gr.Column():
104
+ summary_output = gr.Textbox(label="Policy Summary & Key Points", lines=15)
105
+
106
+ summarize_btn.click(policy_summarization, inputs=[pdf_upload, policy_text_input], outputs=summary_output)
107
 
108
  # -----------------------------
109
  # Run app
110
  # -----------------------------
111
  if __name__ == "__main__":
112
  app.launch()
113
+