AryanJh commited on
Commit
a05515b
Β·
verified Β·
1 Parent(s): 14bdd38

old version test

Browse files
Files changed (1) hide show
  1. app.py +405 -49
app.py CHANGED
@@ -1,70 +1,426 @@
 
 
1
  import gradio as gr
2
- from event_matcher import EventMatcher
3
- from typing import List, Tuple
 
 
 
 
 
 
 
 
4
 
5
- class EventAssistant:
6
  def __init__(self):
7
- self.matcher = EventMatcher()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- def process_query(self, query: str, history: List[Tuple[str, str]]) -> Tuple[str, List[Tuple[str, str]]]:
10
- """Process user query and update chat history"""
11
  try:
12
- # Get response from event matcher
13
- response = self.matcher.query(query)
 
 
 
14
 
15
- # Update history
16
- new_history = list(history)
17
- new_history.append((query, response))
 
 
 
 
 
 
 
 
 
18
 
19
- return response, new_history
20
  except Exception as e:
21
- error_msg = f"I encountered an error: {str(e)}. Please try again!"
22
- new_history = list(history)
23
- new_history.append((query, error_msg))
24
- return error_msg, new_history
25
 
26
- def create_demo():
27
- assistant = EventAssistant()
28
-
29
- with gr.Blocks() as demo:
30
- chatbot = gr.Chatbot(
31
- label="Event Assistant Chat",
32
- bubble_full_width=False,
33
- )
34
- msg = gr.Textbox(
35
- label="Type your message here",
36
- placeholder="e.g., What events are happening this week?",
37
- lines=1
38
- )
39
- clear = gr.Button("Clear")
 
 
 
 
 
 
40
 
41
- def user(user_message, history):
42
- """Handle user message"""
43
- return "", history + [[user_message, None]]
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- def bot(history):
46
- """Process bot response"""
47
- user_message = history[-1][0]
48
- response, updated_history = assistant.process_query(user_message, history[:-1])
49
- history[-1][1] = response
50
- return history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- def clear_chat():
53
- """Clear chat history"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  return None
 
 
 
 
 
 
 
55
 
56
- # Set up event handlers
57
- msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
58
- bot, chatbot, chatbot
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  )
60
- clear.click(clear_chat, None, chatbot, queue=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- # Set up interface properties
63
- demo.title = "Brock Events Assistant"
64
- demo.description = "Ask me about events happening at Brock University!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  return demo
67
 
68
  if __name__ == "__main__":
69
  demo = create_demo()
70
- demo.launch()
 
 
 
 
 
 
1
+ # app.py
2
+
3
  import gradio as gr
4
+ import feedparser
5
+ from bs4 import BeautifulSoup
6
+ from datetime import datetime, timedelta
7
+ import pytz
8
+ from typing import List, Dict
9
+ from sentence_transformers import SentenceTransformer
10
+ import chromadb
11
+ import gc
12
+ import json
13
+ import os
14
 
15
+ class BrockEventsRAG:
16
  def __init__(self):
17
+ """Initialize the RAG system with improved caching"""
18
+ self.model = SentenceTransformer('all-MiniLM-L6-v2')
19
+ self.chroma_client = chromadb.Client()
20
+
21
+ # Get current date range
22
+ self.eastern = pytz.timezone('America/New_York')
23
+ self.today = datetime.now(self.eastern).replace(hour=0, minute=0, second=0, microsecond=0)
24
+ self.date_range_end = self.today + timedelta(days=14)
25
+
26
+ # Cache directory setup
27
+ os.makedirs("cache", exist_ok=True)
28
+ self.cache_file = "cache/events_cache.json"
29
+
30
+ # Initialize or reset collection
31
+ try:
32
+ self.collection = self.chroma_client.create_collection(
33
+ name="brock_events",
34
+ metadata={"description": "Brock University Events Database"}
35
+ )
36
+ except Exception:
37
+ self.chroma_client.delete_collection("brock_events")
38
+ self.collection = self.chroma_client.create_collection(
39
+ name="brock_events",
40
+ metadata={"description": "Brock University Events Database"}
41
+ )
42
+
43
+ # Load initial events
44
+ self.update_database()
45
 
46
+ def save_cache(self, data: dict):
47
+ """Save events data to cache file"""
48
  try:
49
+ # Convert datetime objects to strings for JSON serialization
50
+ serializable_data = {
51
+ 'last_update': data['last_update'],
52
+ 'events': []
53
+ }
54
 
55
+ for event in data['events']:
56
+ event_copy = event.copy()
57
+ # Convert datetime objects to strings
58
+ if event_copy.get('start_time'):
59
+ event_copy['start_time'] = event_copy['start_time'].isoformat()
60
+ if event_copy.get('end_time'):
61
+ event_copy['end_time'] = event_copy['end_time'].isoformat()
62
+ serializable_data['events'].append(event_copy)
63
+
64
+ with open(self.cache_file, 'w', encoding='utf-8') as f:
65
+ json.dump(serializable_data, f, ensure_ascii=False, indent=2)
66
+ print(f"Cache saved successfully to {self.cache_file}")
67
 
 
68
  except Exception as e:
69
+ print(f"Error saving cache: {e}")
 
 
 
70
 
71
+ def load_cache(self) -> dict:
72
+ """Load and parse cached events data"""
73
+ try:
74
+ if os.path.exists(self.cache_file):
75
+ with open(self.cache_file, 'r', encoding='utf-8') as f:
76
+ data = json.load(f)
77
+
78
+ # Convert string timestamps back to datetime objects
79
+ for event in data['events']:
80
+ if event.get('start_time'):
81
+ event['start_time'] = datetime.fromisoformat(event['start_time'])
82
+ if event.get('end_time'):
83
+ event['end_time'] = datetime.fromisoformat(event['end_time'])
84
+
85
+ return data
86
+ return {'last_update': None, 'events': []}
87
+
88
+ except Exception as e:
89
+ print(f"Error loading cache: {e}")
90
+ return {'last_update': None, 'events': []}
91
 
92
+ def should_update_cache(self) -> bool:
93
+ """Check if cache needs updating (older than 24 hours)"""
94
+ try:
95
+ cached_data = self.load_cache()
96
+ if not cached_data['last_update']:
97
+ return True
98
+
99
+ last_update = datetime.fromisoformat(cached_data['last_update'])
100
+ time_since_update = datetime.now() - last_update
101
+
102
+ return time_since_update.total_seconds() > 86400 # 24 hours
103
+
104
+ except Exception as e:
105
+ print(f"Error checking cache: {e}")
106
+ return True
107
 
108
+ def parse_event_datetime(self, entry) -> tuple:
109
+ """Parse start and end times from both RSS and HTML"""
110
+ try:
111
+ # First try to get times from the events namespace
112
+ start_time = entry.get('start', None)
113
+ end_time = entry.get('end', None)
114
+
115
+ # Parse the RSS feed times if available
116
+ if start_time:
117
+ start_dt = datetime.strptime(start_time, '%a, %d %b %Y %H:%M:%S %Z')
118
+ start_dt = pytz.UTC.localize(start_dt).astimezone(self.eastern)
119
+ else:
120
+ start_dt = None
121
+
122
+ if end_time:
123
+ end_dt = datetime.strptime(end_time, '%a, %d %b %Y %H:%M:%S %Z')
124
+ end_dt = pytz.UTC.localize(end_dt).astimezone(self.eastern)
125
+ else:
126
+ end_dt = None
127
+
128
+ # If we didn't get times from RSS, try HTML
129
+ if not start_dt or not end_dt:
130
+ soup = BeautifulSoup(entry.description, 'html.parser')
131
+ start_elem = soup.find('time', class_='dt-start')
132
+ end_elem = soup.find('time', class_='dt-end')
133
+
134
+ if start_elem and 'datetime' in start_elem.attrs:
135
+ dt_str = start_elem['datetime'].split('.')[0]
136
+ start_dt = datetime.strptime(dt_str, '%Y-%m-%dT%H:%M:%S')
137
+ start_dt = self.eastern.localize(start_dt)
138
+
139
+ if end_elem and 'datetime' in end_elem.attrs:
140
+ dt_str = end_elem['datetime'].split('.')[0]
141
+ end_dt = datetime.strptime(dt_str, '%Y-%m-%dT%H:%M:%S')
142
+ end_dt = self.eastern.localize(end_dt)
143
+
144
+ return start_dt, end_dt
145
+
146
+ except Exception as e:
147
+ print(f"Error parsing dates: {e}")
148
+ return None, None
149
 
150
+ def get_location(self, entry) -> str:
151
+ """Extract location from both RSS and HTML"""
152
+ try:
153
+ # First try RSS events namespace
154
+ location = entry.get('location', None)
155
+
156
+ # If not found, try HTML
157
+ if not location:
158
+ soup = BeautifulSoup(entry.description, 'html.parser')
159
+ location_elem = soup.find('span', class_='p-location')
160
+ if location_elem:
161
+ location = location_elem.get_text().strip()
162
+
163
+ return location if location else "Location not specified"
164
+
165
+ except Exception as e:
166
+ print(f"Error getting location: {e}")
167
+ return "Location not specified"
168
+
169
+ def process_event(self, entry) -> Dict:
170
+ """Process a single event entry"""
171
+ try:
172
+ # Get times
173
+ start_time, end_time = self.parse_event_datetime(entry)
174
+
175
+ # Skip if event is not in our date range
176
+ if not start_time or not self.is_event_in_range(start_time):
177
+ return None
178
+
179
+ # Get location
180
+ location = self.get_location(entry)
181
+
182
+ # Get categories
183
+ categories = [tag.term for tag in entry.get('tags', [])]
184
+ categories_str = '; '.join(categories) if categories else 'No categories'
185
+
186
+ # Get hosts
187
+ hosts = entry.get('host', [])
188
+ if not isinstance(hosts, list):
189
+ hosts = [hosts]
190
+ hosts_str = '; '.join(hosts) if hosts else 'No host specified'
191
+
192
+ # Clean description
193
+ soup = BeautifulSoup(entry.description, 'html.parser')
194
+ description = ' '.join(soup.get_text().split())
195
+
196
+ return {
197
+ 'title': entry.title,
198
+ 'start_time': start_time,
199
+ 'end_time': end_time,
200
+ 'location': location,
201
+ 'categories': categories_str,
202
+ 'hosts': hosts_str,
203
+ 'description': description,
204
+ 'link': entry.link,
205
+ 'guid': entry.guid
206
+ }
207
+
208
+ except Exception as e:
209
+ print(f"Error processing event {entry.get('title', 'Unknown')}: {e}")
210
+ return None
211
+
212
+ def is_event_in_range(self, event_time: datetime) -> bool:
213
+ """Check if event falls within our date range"""
214
+ if not event_time:
215
+ return False
216
+ return self.today <= event_time <= self.date_range_end
217
+
218
+ def format_event_text(self, event: Dict) -> str:
219
+ """Format event information for embedding"""
220
+ return f"""
221
+ Event: {event['title']}
222
+ Date: {event['start_time'].strftime('%A, %B %d, %Y')}
223
+ Time: {event['start_time'].strftime('%I:%M %p')} to {event['end_time'].strftime('%I:%M %p') if event['end_time'] else 'not specified'}
224
+ Location: {event['location']}
225
+ Categories: {event['categories']}
226
+ Hosted by: {event['hosts']}
227
+ Description: {event['description'][:500]}
228
+ """
229
+
230
+ def update_database(self):
231
+ """Update database with events in date range"""
232
+ print("Fetching events...")
233
+ feed = feedparser.parse("https://experiencebu.brocku.ca/events.rss")
234
+ print(f"Found {len(feed.entries)} total events")
235
+
236
+ # Process events
237
+ valid_events = []
238
+ for entry in feed.entries:
239
+ event = self.process_event(entry)
240
+ if event: # Only include events in our date range
241
+ valid_events.append(event)
242
+
243
+ print(f"Found {len(valid_events)} events in the next 14 days")
244
+
245
+ if not valid_events:
246
+ print("No events found in date range")
247
+ return
248
+
249
+ # Prepare data for database
250
+ documents = [self.format_event_text(event) for event in valid_events]
251
+ metadatas = [{
252
+ 'title': event['title'],
253
+ 'date': event['start_time'].strftime('%Y-%m-%d'),
254
+ 'time': event['start_time'].strftime('%I:%M %p'),
255
+ 'location': event['location'],
256
+ 'categories': event['categories'],
257
+ 'link': event['link']
258
+ } for event in valid_events]
259
+ ids = [f"event_{i}" for i in range(len(valid_events))]
260
+
261
+ # Generate embeddings and add to database
262
+ try:
263
+ embeddings = self.model.encode(documents)
264
+ self.collection.add(
265
+ documents=documents,
266
+ embeddings=embeddings.tolist(),
267
+ metadatas=metadatas,
268
+ ids=ids
269
+ )
270
+ print(f"Successfully added {len(valid_events)} events to database")
271
+ except Exception as e:
272
+ print(f"Error adding events to database: {e}")
273
+
274
+ # Save to cache
275
+ cache_data = {
276
+ 'last_update': datetime.now().isoformat(),
277
+ 'events': valid_events
278
+ }
279
+ self.save_cache(cache_data)
280
+
281
+ # Clean up
282
+ gc.collect()
283
+
284
+ def query(self, question: str, n_results: int = 3) -> List[Dict]:
285
+ """Query the database"""
286
+ try:
287
+ question_embedding = self.model.encode(question)
288
+ results = self.collection.query(
289
+ query_embeddings=[question_embedding.tolist()],
290
+ n_results=n_results,
291
+ include=['documents', 'metadatas', 'distances']
292
+ )
293
+ return results
294
+ except Exception as e:
295
+ print(f"Error during query: {e}")
296
  return None
297
+ def generate_response(self, question: str, history: list) -> str:
298
+ """Generate a response based on the query and chat history"""
299
+ try:
300
+ # Query the database
301
+ results = self.query(question)
302
+ if not results or not results['documents'] or not results['documents'][0]:
303
+ return "I couldn't find any events matching your query. Try asking about upcoming events in a different way!"
304
 
305
+ # Analyze the question type
306
+ question_lower = question.lower()
307
+ is_time_query = any(word in question_lower for word in ['when', 'time', 'date', 'week', 'today', 'tomorrow'])
308
+ is_location_query = any(word in question_lower for word in ['where', 'location', 'place', 'building', 'room'])
309
+
310
+ # Format the response
311
+ response = "Here are some relevant events I found:\n\n"
312
+
313
+ # Add top 3 matching events
314
+ for i, (doc, metadata) in enumerate(zip(results['documents'][0][:3], results['metadatas'][0][:3]), 1):
315
+ response += f"{i}. **{metadata['title']}**\n"
316
+ response += f"πŸ“… {metadata['date']} at {metadata['time']}\n"
317
+ response += f"πŸ“ {metadata['location']}\n"
318
+ if 'categories' in metadata:
319
+ response += f"🏷️ {metadata['categories']}\n"
320
+ response += f"πŸ”— More info: {metadata['link']}\n\n"
321
+
322
+ # Add a helpful prompt
323
+ response += "\nYou can ask me for more specific details about any of these events!"
324
+ return response
325
+
326
+ except Exception as e:
327
+ print(f"Error generating response: {e}")
328
+ return "I encountered an error while searching for events. Please try asking in a different way."
329
+
330
+ def create_demo():
331
+ # Initialize the RAG system
332
+ rag_system = BrockEventsRAG()
333
+
334
+ # Custom CSS for better appearance
335
+ custom_css = """
336
+ .gr-button-primary {
337
+ background-color: #8b0000 !important;
338
+ border-color: #8b0000 !important;
339
+ }
340
+ """
341
+
342
+ # Create the Gradio interface
343
+ with gr.Blocks(css=custom_css) as demo:
344
+ gr.Markdown("""
345
+ # πŸŽ“ Brock University Events Assistant
346
+
347
+ Ask me about upcoming events at Brock! I can help you discover:
348
+ - Academic workshops
349
+ - Student activities
350
+ - Campus events
351
+ - And more!
352
+ """)
353
+
354
+ chatbot = gr.Chatbot(
355
+ label="Chat History",
356
+ height=400,
357
+ bubble_full_width=False,
358
  )
359
+
360
+ with gr.Row():
361
+ msg = gr.Textbox(
362
+ label="Your Question",
363
+ placeholder="e.g., What events are happening this week?",
364
+ scale=4
365
+ )
366
+ submit = gr.Button("Ask", scale=1, variant="primary")
367
+
368
+ with gr.Row():
369
+ clear = gr.Button("Clear Chat")
370
+ refresh = gr.Button("Refresh Events")
371
+
372
+ # Event handlers
373
+ def respond(message, history):
374
+ bot_message = rag_system.generate_response(message, history)
375
+ history.append({"role": "user", "content": message})
376
+ history.append({"role": "assistant", "content": bot_message})
377
+ return "", history
378
 
379
+ # In the create_demo function:
380
+ chatbot = gr.Chatbot(
381
+ label="Chat History",
382
+ height=400,
383
+ bubble_full_width=False,
384
+ type="messages" # Use new message format
385
+ )
386
+
387
+ def refresh_events():
388
+ rag_system.update_database()
389
+ return "Events database has been refreshed!"
390
+
391
+ submit.click(respond, [msg, chatbot], [msg, chatbot])
392
+ msg.submit(respond, [msg, chatbot], [msg, chatbot])
393
+ clear.click(lambda: None, None, chatbot)
394
+ refresh.click(refresh_events, None, msg)
395
+
396
+ # Example questions
397
+ gr.Examples(
398
+ examples=[
399
+ "What events are happening this week?",
400
+ "Are there any workshops in the library?",
401
+ "Tell me about upcoming career events",
402
+ "What's happening in the MakerSpace?",
403
+ "Any student club meetings soon?",
404
+ ],
405
+ inputs=msg
406
+ )
407
+
408
+ gr.Markdown("""
409
+ ### Tips:
410
+ - Ask about specific dates, locations, or event types
411
+ - You can refresh the events database using the button above
412
+ - Click on event links to get more details on ExperienceBU
413
+
414
+ Data is refreshed automatically every 24 hours. Events shown are for the next 14 days.
415
+ """)
416
 
417
  return demo
418
 
419
  if __name__ == "__main__":
420
  demo = create_demo()
421
+ demo.launch(
422
+ server_name="0.0.0.0", # Required for Spaces
423
+ server_port=7860, # Default port
424
+ share=False, # Don't create a public link
425
+ max_threads=40 # Handle concurrent users
426
+ )