rathil123 commited on
Commit
bbc00e6
·
verified ·
1 Parent(s): d30b786

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -61
app.py CHANGED
@@ -1,78 +1,99 @@
1
- import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
- import PyPDF2
5
-
6
- # ----------------- Model Setup -----------------
7
- model_name = "ibm-granite/granite-3.2-2b-instruct"
8
 
9
- # Some models require trust_remote_code
10
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
 
11
 
12
- model = AutoModelForCausalLM.from_pretrained(
13
- model_name,
14
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
15
- device_map="auto" if torch.cuda.is_available() else None,
16
- trust_remote_code=True
17
- )
 
 
 
 
 
 
 
18
 
19
- # Ensure pad token is set
20
  if tokenizer.pad_token is None:
21
  tokenizer.pad_token = tokenizer.eos_token
22
 
23
 
24
- # ----------------- Core Functions -----------------
25
- def generate_response(prompt, max_length=512):
 
 
 
26
  try:
27
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=256)
28
-
29
- if torch.cuda.is_available():
30
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
31
-
32
- with torch.no_grad():
33
- outputs = model.generate(
34
- **inputs,
35
- max_length=max_length,
36
- temperature=0.7,
37
- do_sample=True,
38
- pad_token_id=tokenizer.eos_token_id
39
- )
40
-
41
- # Slice only the generated continuation
42
- response_ids = outputs[0][inputs["input_ids"].shape[-1]:]
43
- response = tokenizer.decode(response_ids, skip_special_tokens=True)
44
- return response.strip()
45
  except Exception as e:
46
- return f"⚠️ Error generating response: {str(e)}"
 
47
 
48
 
49
- def extract_text_from_pdf(pdf_file):
50
- if pdf_file is None:
51
- return ""
52
- try:
53
- pdf_reader = PyPDF2.PdfReader(pdf_file)
54
- text = ""
55
- for page in pdf_reader.pages:
56
- page_text = page.extract_text()
57
- if page_text:
58
- text += page_text + "\n"
59
- return text.strip()
60
- except Exception as e:
61
- return f"⚠️ Error reading PDF: {str(e)}"
62
 
 
 
 
 
63
 
64
- def eco_tips_generator(problem_keywords):
65
- if not problem_keywords.strip():
66
- return "⚠️ Please enter some keywords."
67
- prompt = (
68
- f"Generate practical and actionable eco-friendly tips for sustainable living "
69
- f"related to: {problem_keywords}. Provide specific solutions and suggestions:"
70
- )
71
- return generate_response(prompt, max_length=400)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
 
 
73
 
74
- def policy_summarization(pdf_file, policy_text):
75
- if pdf_file is not None:
76
- content = extract_text_from_pdf(pdf_file)
77
- else:
78
- content = polic
 
 
1
  import torch
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import gradio as gr
4
+ from PyPDF2 import PdfReader
 
 
5
 
6
+ # -----------------------------
7
+ # Load model with fallback
8
+ # -----------------------------
9
+ MODEL_NAME = "distilgpt2" # fallback if granite not available
10
 
11
+ try:
12
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ MODEL_NAME,
15
+ trust_remote_code=True,
16
+ torch_dtype=torch.float32,
17
+ device_map="auto"
18
+ )
19
+ except Exception as e:
20
+ print(f"⚠️ Model load failed: {e}")
21
+ print("Falling back to distilgpt2...")
22
+ tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
23
+ model = AutoModelForCausalLM.from_pretrained("distilgpt2")
24
 
25
+ # Ensure pad token exists
26
  if tokenizer.pad_token is None:
27
  tokenizer.pad_token = tokenizer.eos_token
28
 
29
 
30
+ # -----------------------------
31
+ # PDF text extraction
32
+ # -----------------------------
33
+ def extract_text_from_pdf(pdf_file):
34
+ text = ""
35
  try:
36
+ # pdf_file is a dict in Gradio (with 'name' key)
37
+ file_path = pdf_file.name if hasattr(pdf_file, "name") else pdf_file
38
+ reader = PdfReader(file_path)
39
+ for page in reader.pages:
40
+ text += page.extract_text() or ""
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  except Exception as e:
42
+ text = f" Error reading PDF: {e}"
43
+ return text
44
 
45
 
46
+ # -----------------------------
47
+ # Response generation
48
+ # -----------------------------
49
+ def generate_response(pdf_file, user_query):
50
+ pdf_text = extract_text_from_pdf(pdf_file)
51
+ if not pdf_text.strip():
52
+ return "⚠️ Could not extract text from the PDF."
 
 
 
 
 
 
53
 
54
+ # Build prompt
55
+ prompt = f"""
56
+ You are a Sustainable Smart City Assistant.
57
+ Here is context from a PDF document:
58
 
59
+ {pdf_text[:2000]} # limit context length to avoid overflow
60
+
61
+ User query: {user_query}
62
+
63
+ Answer clearly and concisely:
64
+ """
65
+
66
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(model.device)
67
+
68
+ with torch.no_grad():
69
+ outputs = model.generate(
70
+ **inputs,
71
+ max_new_tokens=200,
72
+ temperature=0.7,
73
+ pad_token_id=tokenizer.eos_token_id
74
+ )
75
+
76
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
77
+ return response
78
+
79
+
80
+ # -----------------------------
81
+ # Gradio UI
82
+ # -----------------------------
83
+ with gr.Blocks() as app:
84
+ gr.Markdown("## 🌍 Sustainable Smart City Assistant")
85
+
86
+ with gr.Row():
87
+ pdf_file = gr.File(label="Upload PDF", type="file")
88
+ user_query = gr.Textbox(label="Ask your question")
89
+
90
+ output = gr.Textbox(label="Assistant Response")
91
 
92
+ submit = gr.Button("Get Answer")
93
+ submit.click(fn=generate_response, inputs=[pdf_file, user_query], outputs=output)
94
 
95
+ # -----------------------------
96
+ # Run app
97
+ # -----------------------------
98
+ if __name__ == "__main__":
99
+ app.launch()