LLM Version update

#4
by AryanJh - opened
Files changed (1) hide show
  1. app.py +574 -334
app.py CHANGED
@@ -1,360 +1,619 @@
1
- # app.py
 
 
2
 
3
  import gradio as gr
4
  import feedparser
5
  from bs4 import BeautifulSoup
6
  from datetime import datetime, timedelta
7
  import pytz
8
- from typing import List, Dict
9
  from sentence_transformers import SentenceTransformer
10
  import chromadb
11
- import gc
12
- import json
 
13
  import os
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  class BrockEventsRAG:
16
  def __init__(self):
17
- """Initialize the RAG system with improved caching"""
18
- self.model = SentenceTransformer('all-MiniLM-L6-v2')
19
- self.chroma_client = chromadb.Client()
 
 
 
 
 
 
 
 
 
 
20
 
21
- # Get current date range
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  self.eastern = pytz.timezone('America/New_York')
23
  self.today = datetime.now(self.eastern).replace(hour=0, minute=0, second=0, microsecond=0)
24
- self.date_range_end = self.today + timedelta(days=14)
25
 
26
- # Cache directory setup
27
- os.makedirs("cache", exist_ok=True)
28
- self.cache_file = "cache/events_cache.json"
29
-
30
- # Initialize or reset collection
31
- try:
32
- self.collection = self.chroma_client.create_collection(
33
- name="brock_events",
34
- metadata={"description": "Brock University Events Database"}
35
- )
36
- except Exception:
37
- self.chroma_client.delete_collection("brock_events")
38
- self.collection = self.chroma_client.create_collection(
39
- name="brock_events",
40
- metadata={"description": "Brock University Events Database"}
41
- )
42
 
43
  # Load initial events
44
  self.update_database()
45
 
46
- def save_cache(self, data: dict):
47
- """Save events data to cache file"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  try:
49
- # Convert datetime objects to strings for JSON serialization
50
- serializable_data = {
51
- 'last_update': data['last_update'],
52
- 'events': []
53
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- for event in data['events']:
56
- event_copy = event.copy()
57
- # Convert datetime objects to strings
58
- if event_copy.get('start_time'):
59
- event_copy['start_time'] = event_copy['start_time'].isoformat()
60
- if event_copy.get('end_time'):
61
- event_copy['end_time'] = event_copy['end_time'].isoformat()
62
- serializable_data['events'].append(event_copy)
63
 
64
- with open(self.cache_file, 'w', encoding='utf-8') as f:
65
- json.dump(serializable_data, f, ensure_ascii=False, indent=2)
66
- print(f"Cache saved successfully to {self.cache_file}")
 
 
 
 
67
 
68
- except Exception as e:
69
- print(f"Error saving cache: {e}")
70
-
71
- def load_cache(self) -> dict:
72
- """Load and parse cached events data"""
73
- try:
74
- if os.path.exists(self.cache_file):
75
- with open(self.cache_file, 'r', encoding='utf-8') as f:
76
- data = json.load(f)
77
-
78
- # Convert string timestamps back to datetime objects
79
- for event in data['events']:
80
- if event.get('start_time'):
81
- event['start_time'] = datetime.fromisoformat(event['start_time'])
82
- if event.get('end_time'):
83
- event['end_time'] = datetime.fromisoformat(event['end_time'])
84
-
85
- return data
86
- return {'last_update': None, 'events': []}
87
 
88
- except Exception as e:
89
- print(f"Error loading cache: {e}")
90
- return {'last_update': None, 'events': []}
91
-
92
- def should_update_cache(self) -> bool:
93
- """Check if cache needs updating (older than 24 hours)"""
94
- try:
95
- cached_data = self.load_cache()
96
- if not cached_data['last_update']:
97
- return True
98
-
99
- last_update = datetime.fromisoformat(cached_data['last_update'])
100
- time_since_update = datetime.now() - last_update
101
 
102
- return time_since_update.total_seconds() > 86400 # 24 hours
103
 
104
  except Exception as e:
105
- print(f"Error checking cache: {e}")
106
- return True
 
 
107
 
108
- def parse_event_datetime(self, entry) -> tuple:
109
- """Parse start and end times from both RSS and HTML"""
 
110
  try:
111
- # First try to get times from the events namespace
112
- start_time = entry.get('start', None)
113
- end_time = entry.get('end', None)
114
-
115
- # Parse the RSS feed times if available
116
- if start_time:
117
- start_dt = datetime.strptime(start_time, '%a, %d %b %Y %H:%M:%S %Z')
118
- start_dt = pytz.UTC.localize(start_dt).astimezone(self.eastern)
119
- else:
120
- start_dt = None
121
-
122
- if end_time:
123
- end_dt = datetime.strptime(end_time, '%a, %d %b %Y %H:%M:%S %Z')
124
- end_dt = pytz.UTC.localize(end_dt).astimezone(self.eastern)
125
- else:
126
- end_dt = None
127
-
128
- # If we didn't get times from RSS, try HTML
129
- if not start_dt or not end_dt:
130
- soup = BeautifulSoup(entry.description, 'html.parser')
131
- start_elem = soup.find('time', class_='dt-start')
132
- end_elem = soup.find('time', class_='dt-end')
133
-
134
- if start_elem and 'datetime' in start_elem.attrs:
135
- dt_str = start_elem['datetime'].split('.')[0]
136
- start_dt = datetime.strptime(dt_str, '%Y-%m-%dT%H:%M:%S')
137
- start_dt = self.eastern.localize(start_dt)
138
-
139
- if end_elem and 'datetime' in end_elem.attrs:
140
- dt_str = end_elem['datetime'].split('.')[0]
141
- end_dt = datetime.strptime(dt_str, '%Y-%m-%dT%H:%M:%S')
142
- end_dt = self.eastern.localize(end_dt)
143
-
144
- return start_dt, end_dt
145
-
146
  except Exception as e:
147
- print(f"Error parsing dates: {e}")
148
- return None, None
149
 
150
- def get_location(self, entry) -> str:
151
- """Extract location from both RSS and HTML"""
152
  try:
153
- # First try RSS events namespace
154
- location = entry.get('location', None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
- # If not found, try HTML
 
 
 
 
 
157
  if not location:
158
  soup = BeautifulSoup(entry.description, 'html.parser')
159
  location_elem = soup.find('span', class_='p-location')
160
  if location_elem:
161
  location = location_elem.get_text().strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
- return location if location else "Location not specified"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
  except Exception as e:
166
- print(f"Error getting location: {e}")
167
- return "Location not specified"
 
 
 
168
 
169
  def process_event(self, entry) -> Dict:
170
- """Process a single event entry"""
171
  try:
172
- # Get times
173
- start_time, end_time = self.parse_event_datetime(entry)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
- # Skip if event is not in our date range
176
- if not start_time or not self.is_event_in_range(start_time):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  return None
 
 
 
178
 
179
- # Get location
180
- location = self.get_location(entry)
 
 
 
 
181
 
182
- # Get categories
183
- categories = [tag.term for tag in entry.get('tags', [])]
184
- categories_str = '; '.join(categories) if categories else 'No categories'
 
 
185
 
186
- # Get hosts
187
- hosts = entry.get('host', [])
188
- if not isinstance(hosts, list):
189
- hosts = [hosts]
190
- hosts_str = '; '.join(hosts) if hosts else 'No host specified'
191
 
192
- # Clean description
193
- soup = BeautifulSoup(entry.description, 'html.parser')
194
- description = ' '.join(soup.get_text().split())
 
 
 
 
195
 
196
- return {
197
- 'title': entry.title,
198
- 'start_time': start_time,
199
- 'end_time': end_time,
200
- 'location': location,
201
- 'categories': categories_str,
202
- 'hosts': hosts_str,
203
- 'description': description,
204
- 'link': entry.link,
205
- 'guid': entry.guid
206
  }
207
 
 
 
 
 
 
 
208
  except Exception as e:
209
  print(f"Error processing event {entry.get('title', 'Unknown')}: {e}")
 
 
210
  return None
 
211
 
212
- def is_event_in_range(self, event_time: datetime) -> bool:
213
- """Check if event falls within our date range"""
214
- if not event_time:
215
- return False
216
- return self.today <= event_time <= self.date_range_end
217
-
218
- def format_event_text(self, event: Dict) -> str:
219
- """Format event information for embedding"""
220
- return f"""
221
- Event: {event['title']}
222
- Date: {event['start_time'].strftime('%A, %B %d, %Y')}
223
- Time: {event['start_time'].strftime('%I:%M %p')} to {event['end_time'].strftime('%I:%M %p') if event['end_time'] else 'not specified'}
224
- Location: {event['location']}
225
- Categories: {event['categories']}
226
- Hosted by: {event['hosts']}
227
- Description: {event['description'][:500]}
228
- """
229
-
230
- def update_database(self):
231
- """Update database with events in date range"""
232
- print("Fetching events...")
233
- feed = feedparser.parse("https://experiencebu.brocku.ca/events.rss")
234
- print(f"Found {len(feed.entries)} total events")
235
-
236
- # Process events
237
- valid_events = []
238
- for entry in feed.entries:
239
- event = self.process_event(entry)
240
- if event: # Only include events in our date range
241
- valid_events.append(event)
242
-
243
- print(f"Found {len(valid_events)} events in the next 14 days")
244
-
245
- if not valid_events:
246
- print("No events found in date range")
247
- return
248
-
249
- # Prepare data for database
250
- documents = [self.format_event_text(event) for event in valid_events]
251
- metadatas = [{
252
- 'title': event['title'],
253
- 'date': event['start_time'].strftime('%Y-%m-%d'),
254
- 'time': event['start_time'].strftime('%I:%M %p'),
255
- 'location': event['location'],
256
- 'categories': event['categories'],
257
- 'link': event['link']
258
- } for event in valid_events]
259
- ids = [f"event_{i}" for i in range(len(valid_events))]
260
 
261
- # Generate embeddings and add to database
262
- try:
263
- embeddings = self.model.encode(documents)
264
- self.collection.add(
265
- documents=documents,
266
- embeddings=embeddings.tolist(),
267
- metadatas=metadatas,
268
- ids=ids
269
- )
270
- print(f"Successfully added {len(valid_events)} events to database")
271
- except Exception as e:
272
- print(f"Error adding events to database: {e}")
273
-
274
- # Save to cache
275
- cache_data = {
276
- 'last_update': datetime.now().isoformat(),
277
- 'events': valid_events
278
  }
279
- self.save_cache(cache_data)
280
 
281
- # Clean up
282
- gc.collect()
 
 
 
283
 
284
- def query(self, question: str, n_results: int = 3) -> List[Dict]:
285
- """Query the database"""
286
  try:
287
- question_embedding = self.model.encode(question)
288
- results = self.collection.query(
289
- query_embeddings=[question_embedding.tolist()],
290
- n_results=n_results,
291
- include=['documents', 'metadatas', 'distances']
292
- )
293
- return results
 
 
 
 
 
 
 
 
294
  except Exception as e:
295
- print(f"Error during query: {e}")
296
- return None
297
- def generate_response(self, question: str, history: list) -> str:
298
- """Generate a response based on the query and chat history"""
 
 
 
 
 
 
 
299
  try:
300
- # Query the database
301
- results = self.query(question)
302
- if not results or not results['documents'] or not results['documents'][0]:
303
- return "I couldn't find any events matching your query. Try asking about upcoming events in a different way!"
304
-
305
- # Analyze the question type
306
- question_lower = question.lower()
307
- is_time_query = any(word in question_lower for word in ['when', 'time', 'date', 'week', 'today', 'tomorrow'])
308
- is_location_query = any(word in question_lower for word in ['where', 'location', 'place', 'building', 'room'])
309
-
310
- # Format the response
311
- response = "Here are some relevant events I found:\n\n"
312
-
313
- # Add top 3 matching events
314
- for i, (doc, metadata) in enumerate(zip(results['documents'][0][:3], results['metadatas'][0][:3]), 1):
315
- response += f"{i}. **{metadata['title']}**\n"
316
- response += f"πŸ“… {metadata['date']} at {metadata['time']}\n"
317
- response += f"πŸ“ {metadata['location']}\n"
318
- if 'categories' in metadata:
319
- response += f"🏷️ {metadata['categories']}\n"
320
- response += f"πŸ”— More info: {metadata['link']}\n\n"
321
-
322
- # Add a helpful prompt
323
- response += "\nYou can ask me for more specific details about any of these events!"
324
- return response
 
 
 
 
 
 
 
 
 
 
 
 
325
 
326
  except Exception as e:
327
- print(f"Error generating response: {e}")
328
- return "I encountered an error while searching for events. Please try asking in a different way."
329
-
330
- def create_demo():
331
- # Initialize the RAG system
332
- rag_system = BrockEventsRAG()
333
-
334
- # Custom CSS for better appearance
335
- custom_css = """
336
- .gr-button-primary {
337
- background-color: #8b0000 !important;
338
- border-color: #8b0000 !important;
339
- }
340
- """
341
 
342
- # Create the Gradio interface
343
- with gr.Blocks(css=custom_css) as demo:
344
- gr.Markdown("""
345
- # πŸŽ“ Brock University Events Assistant
346
-
347
- Ask me about upcoming events at Brock! I can help you discover:
348
- - Academic workshops
349
- - Student activities
350
- - Campus events
351
- - And more!
352
- """)
 
 
 
 
 
353
 
354
- chatbot = gr.Chatbot(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  label="Chat History",
356
  height=400,
357
- bubble_full_width=False,
358
  )
359
 
360
  with gr.Row():
@@ -365,62 +624,43 @@ def create_demo():
365
  )
366
  submit = gr.Button("Ask", scale=1, variant="primary")
367
 
368
- with gr.Row():
369
- clear = gr.Button("Clear Chat")
370
- refresh = gr.Button("Refresh Events")
371
-
372
  # Event handlers
373
- def respond(message, history):
374
- bot_message = rag_system.generate_response(message, history)
375
- history.append({"role": "user", "content": message})
376
- history.append({"role": "assistant", "content": bot_message})
377
- return "", history
378
-
379
- # In the create_demo function:
380
- chatbot = gr.Chatbot(
381
- label="Chat History",
382
- height=400,
383
- bubble_full_width=False,
384
- type="messages" # Use new message format
 
 
 
 
 
385
  )
386
-
387
- def refresh_events():
388
- rag_system.update_database()
389
- return "Events database has been refreshed!"
390
-
391
- submit.click(respond, [msg, chatbot], [msg, chatbot])
392
- msg.submit(respond, [msg, chatbot], [msg, chatbot])
393
  clear.click(lambda: None, None, chatbot)
394
- refresh.click(refresh_events, None, msg)
395
-
396
- # Example questions
397
  gr.Examples(
398
  examples=[
399
- "What events are happening this week?",
400
- "Are there any workshops in the library?",
401
- "Tell me about upcoming career events",
402
- "What's happening in the MakerSpace?",
403
- "Any student club meetings soon?",
404
  ],
405
  inputs=msg
406
  )
407
-
408
- gr.Markdown("""
409
- ### Tips:
410
- - Ask about specific dates, locations, or event types
411
- - You can refresh the events database using the button above
412
- - Click on event links to get more details on ExperienceBU
413
-
414
- Data is refreshed automatically every 24 hours. Events shown are for the next 14 days.
415
- """)
416
-
417
  return demo
418
 
419
  if __name__ == "__main__":
420
- demo = create_demo()
421
- demo.launch(
422
- server_name="0.0.0.0", # Required for Spaces
423
- server_port=7860, # Default port
424
- share=False, # Don't create a public link
425
- max_threads=40 # Handle concurrent users
426
- )
 
1
+ # @title Default title text
2
+ ###WORKING MODEL
3
+
4
 
5
  import gradio as gr
6
  import feedparser
7
  from bs4 import BeautifulSoup
8
  from datetime import datetime, timedelta
9
  import pytz
10
+ from typing import List, Dict, Tuple
11
  from sentence_transformers import SentenceTransformer
12
  import chromadb
13
+ from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
14
+ from dateutil.parser import parse as dateutil_parse
15
+ from dateutil.parser import ParserError
16
  import os
17
+ import json
18
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
19
+ #from langchain_community.embeddings import HuggingFaceEmbeddings
20
+ from langchain_community.vectorstores import Chroma
21
+ from langchain_core.output_parsers import StrOutputParser
22
+ from langchain_core.prompts import ChatPromptTemplate
23
+ from langchain_core.runnables import RunnablePassthrough
24
+ from langchain.chains import LLMChain
25
+ from langchain_huggingface import HuggingFacePipeline
26
+ from dateutil import parser
27
+ from langchain.embeddings import HuggingFaceEmbeddings
28
 
29
  class BrockEventsRAG:
30
  def __init__(self):
31
+ """Initialize the RAG system with improved data handling"""
32
+ print("Initializing RAG system...")
33
+
34
+ #Slider Initialization for interface
35
+ self.temperature = 0.7
36
+ self.top_p = 0.95
37
+ self.top_k = 50
38
+
39
+
40
+ # Initialize embedding function
41
+ self.emodel_name="multi-qa-MiniLM-L6-cos-v1"
42
+ self.embedding_function = SentenceTransformerEmbeddingFunction(self.emodel_name)
43
+ #self.embeddings = HuggingFaceEmbeddings(model_name)
44
 
45
+ # Setup ChromaDB with in-memory client for Colab
46
+ try:
47
+ # First try in-memory client
48
+ self.chroma_client = chromadb.Client()
49
+ print("Using in-memory ChromaDB client")
50
+ except Exception as e:
51
+ print(f"Error with in-memory client: {e}")
52
+ # Fallback to persistent client with temporary directory
53
+ import tempfile
54
+ temp_dir = tempfile.mkdtemp()
55
+ print(f"Using temporary directory: {temp_dir}")
56
+ self.chroma_client = chromadb.PersistentClient(path=temp_dir)
57
+
58
+ # Create collection with retry logic
59
+ max_retries = 3
60
+ for attempt in range(max_retries):
61
+ try:
62
+ self.collection = self.chroma_client.get_or_create_collection(
63
+ name="brock_events",
64
+ embedding_function=self.embedding_function,
65
+ metadata={"hnsw:space": "cosine"}
66
+ )
67
+ print("Successfully created collection")
68
+ break
69
+ except Exception as e:
70
+ print(f"Attempt {attempt + 1} failed: {e}")
71
+ if attempt == max_retries - 1:
72
+ raise
73
+
74
+ # Setup date handling
75
  self.eastern = pytz.timezone('America/New_York')
76
  self.today = datetime.now(self.eastern).replace(hour=0, minute=0, second=0, microsecond=0)
77
+ self.date_range_end = self.today + timedelta(days=25)
78
 
79
+ # Initialize LLM components
80
+ self.setup_llm()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  # Load initial events
83
  self.update_database()
84
 
85
+ #Prompt Template for LLM and RAG
86
+ RESPONSE_TEMPLATE = """You are a helpful Brock University events assistant.
87
+ Create an engaging opening line to get students excited about events related to this query:
88
+
89
+ Query: {query}
90
+
91
+ Guidelines:
92
+ - Be friendly and enthusiastic
93
+ - Match the tone to the type of event
94
+ - Keep it brief but engaging
95
+
96
+ Examples:
97
+ - Query: Are there any business networking events coming up?
98
+ Introduction: "Get ready to connect! We've got some exciting business networking opportunities coming soon."
99
+ - Query: What workshops are happening next week?
100
+ Introduction: "Boost your skills! Check out these awesome workshops happening next week."
101
+ """
102
+
103
+ def setup_llm(self):
104
+ """Setup LLM pipeline and chain"""
105
  try:
106
+ print("Setting up LLM components...")
107
+
108
+ # Using a more powerful model
109
+ self.model_name = "google/flan-t5-base"
110
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
111
+ self.llm_model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
112
+
113
+ hf_pipeline = pipeline(
114
+ task="text2text-generation",
115
+ model=self.llm_model,
116
+ tokenizer=self.tokenizer,
117
+ do_sample=True,
118
+ temperature=self.temperature, # Increased for more creative responses
119
+ top_k=self.top_k, # Reduced to be more focused
120
+ top_p=self.top_p, # Slightly reduced for more focused sampling
121
+ max_length=50, # Reduced to force more concise responses
122
+ min_length=10, # Reduced minimum length
123
+ repetition_penalty=1.5, # Increased to more strongly prevent repetition
124
+ no_repeat_ngram_size=3 # Prevent repeating phrases of 3 or more tokens
125
+ )
126
+
127
+ # Initialize the LLM
128
+ self.llm = HuggingFacePipeline(pipeline=hf_pipeline)
129
 
130
+ # Set up vector store and retriever
131
+ self.vectorstore = Chroma(
132
+ client=self.chroma_client,
133
+ collection_name="brock_events",
134
+ embedding_function=self.embedding_function
135
+ )
 
 
136
 
137
+ # Configure retriever with appropriate parameters
138
+ self.retriever = self.vectorstore.as_retriever(
139
+ search_kwargs={
140
+ "k": 3,
141
+ #"fetch_k": 6 # Fetch more candidates than needed
142
+ }
143
+ )
144
 
145
+ # Create the prompt
146
+ self.prompt = ChatPromptTemplate.from_template(self.RESPONSE_TEMPLATE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
+ # Create the RAG chain
149
+ self.chain = (
150
+ {"context": self.retriever, "question": RunnablePassthrough()}
151
+ | self.prompt
152
+ | self.llm
153
+ | StrOutputParser()
154
+ )
 
 
 
 
 
 
155
 
156
+ print("LLM setup completed successfully")
157
 
158
  except Exception as e:
159
+ print(f"Error setting up LLM: {e}")
160
+ import traceback
161
+ print(f"Full error: {traceback.format_exc()}")
162
+ raise # Re-raise the exception to handle it in the calling code
163
 
164
+ def fetch_rss_feed(self) -> List[Dict]:
165
+ """Fetch and parse RSS feed"""
166
+ url = "https://experiencebu.brocku.ca/events.rss"
167
  try:
168
+ feed = feedparser.parse(url)
169
+ print(f"Fetched {len(feed.entries)} entries from feed")
170
+ return feed.entries
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  except Exception as e:
172
+ print(f"Error fetching RSS feed: {e}")
173
+ return []
174
 
175
+ def process_event(self, entry) -> Dict:
176
+ """Process a single event entry with proper date handling"""
177
  try:
178
+ # Parse start time
179
+ try:
180
+ if 'start' in entry:
181
+ start = dateutil_parse(entry.start)
182
+ elif 'published_parsed' in entry:
183
+ start = datetime(*entry.published_parsed[:6])
184
+ else:
185
+ # Try to parse from description HTML
186
+ soup = BeautifulSoup(entry.description, 'html.parser')
187
+ time_elem = soup.find('time', class_='dt-start')
188
+ if time_elem and 'datetime' in time_elem.attrs:
189
+ start = dateutil_parse(time_elem['datetime'])
190
+ else:
191
+ print(f"No valid date found for event: {entry.get('title', 'Unknown')}")
192
+ return None
193
+ except (ParserError, ValueError) as e:
194
+ print(f"Error parsing date for event {entry.get('title', 'Unknown')}: {e}")
195
+ return None
196
+
197
+ # Convert to eastern timezone
198
+ if not start.tzinfo:
199
+ start = self.eastern.localize(start)
200
 
201
+ # Skip if outside date range
202
+ if not (self.today <= start <= self.date_range_end):
203
+ return None
204
+
205
+ # Extract location
206
+ location = entry.get('location', 'Location not specified')
207
  if not location:
208
  soup = BeautifulSoup(entry.description, 'html.parser')
209
  location_elem = soup.find('span', class_='p-location')
210
  if location_elem:
211
  location = location_elem.get_text().strip()
212
+
213
+ # Clean description
214
+ description = BeautifulSoup(entry.description, 'html.parser').get_text().strip()
215
+
216
+ return {
217
+ "title": entry.title,
218
+ "location": location,
219
+ "start": start.isoformat(), # Store as string
220
+ "description": description,
221
+ "link": entry.link
222
+ }
223
+
224
+ except Exception as e:
225
+ print(f"Error processing event: {e}")
226
+ return None
227
+
228
+ def update_database(self):
229
+ """Update the database with new events"""
230
+ try:
231
+ print("Starting database update...")
232
+ entries = self.fetch_rss_feed()
233
+ if not entries:
234
+ print("No entries fetched from RSS feed")
235
+ return
236
+
237
+ print(f"Processing {len(entries)} entries...")
238
+ new_events = []
239
+ # Delete and recreate collection
240
+ self.chroma_client.delete_collection("brock_events")
241
+ self.collection = self.chroma_client.create_collection(
242
+ name="brock_events",
243
+ embedding_function=self.embedding_function,
244
+ metadata={"hnsw:space": "cosine"}
245
+ )
246
+ # Process each entry
247
+ for entry in entries:
248
+ event = self.process_event(entry)
249
+ if event: # Only add if event processing was successful
250
+ new_events.append(event)
251
+
252
+ if new_events:
253
+ print(f"\nAdding {len(new_events)} events to database...")
254
+ for i, event in enumerate(new_events):
255
+ try:
256
+ # Use the already formatted event text
257
+ event_text = event['text']
258
+
259
+ print(f"\nAdding event {i+1}/{len(new_events)}")
260
+ print("Event text sample:", event_text[:200])
261
+
262
+ # Create unique ID using the event's ID or index
263
+ unique_id = event['id'] or f"event_{i}_{datetime.now().timestamp()}"
264
+
265
+ # Add to collection with metadata
266
+ self.collection.add(
267
+ documents=[event_text],
268
+ ids=[unique_id],
269
+ metadatas=[event['metadata']]
270
+ )
271
+ print(f"Successfully added event {i+1}")
272
+
273
+ except Exception as e:
274
+ print(f"Error adding event {i+1}: {e}")
275
+ import traceback
276
+ print(f"Full error trace for event {i+1}: {traceback.format_exc()}")
277
+ continue # Continue with next event even if this one fails
278
+
279
+ print(f"\nSuccessfully added {len(new_events)} events to the database")
280
+
281
+ except Exception as e:
282
+ print(f"Error updating database: {e}")
283
+ import traceback
284
+ print(f"Full error: {traceback.format_exc()}")
285
+
286
+
287
+ def query_events(self, query: str) -> str:
288
+ """Query events using semantic search with category-specific enhancement"""
289
+ try:
290
+ print(f"\nProcessing query: {query}")
291
+
292
+ collection_count = self.collection.count()
293
+ print(f"Current collection size: {collection_count} documents")
294
+
295
+ if collection_count == 0:
296
+ return "No events are currently loaded in the database. Please try again later."
297
+
298
+ # Define category-specific terms
299
+ query_lower = query.lower()
300
+ enhanced_query = query
301
 
302
+ # Category-specific query enhancement
303
+ if 'makerspace' in query_lower:
304
+ enhanced_query = f"{query} maker making create creative workshop lab hands-on"
305
+ elif 'math' in query_lower or 'science' in query_lower:
306
+ enhanced_query = f"{query} mathematics physics chemistry biology research laboratory"
307
+ elif 'business' in query_lower or 'networking' in query_lower:
308
+ enhanced_query = f"{query} business networking professional career development"
309
+ elif 'career' in query_lower or 'job' in query_lower:
310
+ enhanced_query = f"{query} career employment job fair hiring recruitment"
311
+
312
+ # Query the collection
313
+ results = self.collection.query(
314
+ query_texts=[enhanced_query],
315
+ n_results=5,
316
+ include=['documents', 'metadatas']
317
+ )
318
+
319
+ if not results or not results['documents'] or not results['documents'][0]:
320
+ return "I couldn't find any events matching your query."
321
+
322
+ # Format responses based on query type
323
+ events_found = []
324
+ for doc, metadata in zip(results['documents'][0], results['metadatas'][0]):
325
+ # Define relevancy based on query type
326
+ if 'makerspace' in query_lower:
327
+ is_relevant = any(term in doc.lower() for term in
328
+ ['makerspace', 'maker', 'create', 'workshop', 'lab'])
329
+ elif 'math' in query_lower or 'science' in query_lower:
330
+ is_relevant = any(term in doc.lower() for term in
331
+ ['math', 'science', 'physics', 'chemistry', 'biology', 'research'])
332
+ elif 'business' in query_lower or 'networking' in query_lower:
333
+ is_relevant = any(term in doc.lower() for term in
334
+ ['business', 'network', 'professional', 'entrepreneur'])
335
+ elif 'career' in query_lower or 'job' in query_lower:
336
+ is_relevant = any(term in doc.lower() for term in
337
+ ['career', 'job', 'employment', 'hiring', 'fair'])
338
+ else:
339
+ is_relevant = True # For general queries, show all events
340
+
341
+ if is_relevant:
342
+ # Add appropriate emoji based on event type
343
+ emoji = "πŸ“…" # Default emoji
344
+ if "workshop" in doc.lower():
345
+ emoji = "πŸ”§"
346
+ elif "makerspace" in doc.lower():
347
+ emoji = "πŸ› οΈ"
348
+ elif "career" in doc.lower() or "job" in doc.lower():
349
+ emoji = "πŸ’Ό"
350
+ elif "research" in doc.lower() or "science" in doc.lower():
351
+ emoji = "πŸ”¬"
352
+
353
+ events_found.append(
354
+ f"{emoji} {metadata.get('title', 'Untitled Event')}\n"
355
+ f"Hosted by: {metadata.get('host', 'No host specified')}\n"
356
+ f"Type: {metadata.get('categories', 'General Event')}\n"
357
+ )
358
+
359
+ if not events_found:
360
+ return f"I couldn't find any events matching '{query}' at this time."
361
+
362
+ response = f"Here are some relevant events:\n\n"
363
+ response += "\n".join(events_found)
364
+
365
+ return response
366
 
367
  except Exception as e:
368
+ print(f"Error querying events: {e}")
369
+ import traceback
370
+ print(f"Full error: {traceback.format_exc()}")
371
+ return "I encountered an error while searching for events. Please try again."
372
+
373
 
374
  def process_event(self, entry) -> Dict:
375
+ """Process a single event entry with improved parsing and error handling"""
376
  try:
377
+ # Extract and parse datetime information
378
+ start_time = None
379
+ end_time = None
380
+
381
+ # First try to parse from HTML content
382
+ soup = BeautifulSoup(entry.get('summary', ''), 'html.parser')
383
+
384
+ # Look for start time
385
+ start_elem = soup.find('time', class_='dt-start')
386
+ if start_elem and 'datetime' in start_elem.attrs:
387
+ try:
388
+ start_time = parser.parse(start_elem['datetime'])
389
+ except (ParserError, ValueError) as e:
390
+ print(f"Error parsing start time: {e}")
391
+
392
+ # Look for end time
393
+ end_elem = soup.find('time', class_='dt-end')
394
+ if end_elem and 'datetime' in end_elem.attrs:
395
+ try:
396
+ end_time = parser.parse(end_elem['datetime'])
397
+ except (ParserError, ValueError) as e:
398
+ print(f"Error parsing end time: {e}")
399
 
400
+ # If HTML parsing failed, try RSS feed's native fields
401
+ if not start_time and 'start' in entry:
402
+ try:
403
+ start_time = parser.parse(entry.start)
404
+ except (ParserError, ValueError) as e:
405
+ print(f"Error parsing RSS start time: {e}")
406
+
407
+ if not end_time and 'end' in entry:
408
+ try:
409
+ end_time = parser.parse(entry.end)
410
+ except (ParserError, ValueError) as e:
411
+ print(f"Error parsing RSS end time: {e}")
412
+
413
+ # If still no start time, try published date as last resort
414
+ if not start_time and 'published_parsed' in entry:
415
+ start_time = datetime(*entry.published_parsed[:6])
416
+
417
+ # Skip if no valid start time or outside date range
418
+ if not start_time:
419
+ print("No valid start time found for event")
420
+ return None
421
+
422
+ # Ensure timezone awareness
423
+ if not start_time.tzinfo:
424
+ start_time = self.eastern.localize(start_time)
425
+
426
+ # Check if event is within our date range
427
+ if not (self.today <= start_time <= self.date_range_end):
428
  return None
429
+
430
+ # Extract base information
431
+ title = entry.get('title', 'No title')
432
 
433
+ # Extract author info - preferring name over email
434
+ author = None
435
+ if 'authors' in entry and entry.authors:
436
+ author = entry.authors[0].get('name', None)
437
+ if not author and 'author' in entry:
438
+ author = entry.author.split('(')[0].strip() # Clean up email format
439
 
440
+ # Get tags/categories
441
+ categories = []
442
+ if 'tags' in entry:
443
+ categories = [tag.get('term', '') for tag in entry.tags]
444
+ categories_str = '; '.join(filter(None, categories)) or "General Event"
445
 
446
+ # Extract host if available
447
+ host = entry.get('host', 'No host specified')
 
 
 
448
 
449
+ # Create event text that emphasizes searchable metadata
450
+ event_text = f"""
451
+ Event: {title}
452
+ Department: {host}
453
+ Host: {author or host}
454
+ Type: {categories_str}
455
+ """
456
 
457
+ # Add to metadata for better filtering
458
+ metadata = {
459
+ "title": title,
460
+ "author": author or host,
461
+ "categories": categories_str,
462
+ "host": host,
463
+ "department": self.extract_department(title, host) # Helper function to categorize
 
 
 
464
  }
465
 
466
+ return {
467
+ "text": event_text.strip(),
468
+ "metadata": metadata,
469
+ "id": f"{entry.get('id', '')}"
470
+ }
471
+
472
  except Exception as e:
473
  print(f"Error processing event {entry.get('title', 'Unknown')}: {e}")
474
+ import traceback
475
+ print(f"Full error: {traceback.format_exc()}")
476
  return None
477
+
478
 
479
+ def extract_department(self, title: str, host: str) -> str:
480
+ """Extract department information from title and host"""
481
+ text = f"{title} {host}".lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
 
483
+ departments = {
484
+ 'Mathematics': ['math', 'mathematics', 'statistics'],
485
+ 'Sciences': ['science', 'biology', 'chemistry', 'physics'],
486
+ 'Business': ['business', 'accounting', 'finance', 'management'],
487
+ 'Arts': ['arts', 'humanities', 'visual arts', 'performing arts'],
488
+ 'Engineering': ['engineering', 'technology', 'computing'],
489
+ 'Social Sciences': ['psychology', 'sociology', 'political science'],
490
+ 'International': ['international', 'global', 'abroad'],
491
+ 'Student Life': ['student life', 'campus life', 'residence'],
492
+ 'Athletics': ['athletics', 'sports', 'recreation'],
493
+ 'Career': ['career', 'professional', 'employment']
 
 
 
 
 
 
494
  }
 
495
 
496
+ for dept, keywords in departments.items():
497
+ if any(keyword in text for keyword in keywords):
498
+ return dept
499
+
500
+ return 'General'
501
 
502
+ def process_chat(self, message: str, history: List[Tuple[str, str]]) -> str:
503
+ """Process chat messages and maintain context"""
504
  try:
505
+ # Get RAG response
506
+ events_response = self.query_events(message)
507
+
508
+ # Format response in a conversational way - Pass both arguments
509
+ formatted_response = self.format_response(events_response, message) # Fixed: Added message as query
510
+
511
+ # Check if formatted response is empty
512
+ if not formatted_response:
513
+ formatted_response = "I couldn't find any events matching your query."
514
+
515
+ # Update chat history
516
+ self.chat_history = history + [(message, formatted_response)]
517
+
518
+ return formatted_response
519
+
520
  except Exception as e:
521
+ return f"I apologize, but I encountered an error while searching for events: {str(e)}"
522
+
523
+ def format_response(self, events_text: str, query: str) -> str:
524
+ """
525
+ Format the RAG response with an LLM-generated introduction
526
+ Args:
527
+ events_text (str): The events information from RAG
528
+ query (str): The original user query
529
+ Returns:
530
+ str: Formatted response with LLM intro and RAG results
531
+ """
532
  try:
533
+ if not events_text or events_text.strip() == "":
534
+ return "I couldn't find any events matching your query. Could you try rephrasing or being more specific?"
535
+
536
+ # Create prompt for introduction
537
+ intro_prompt = ChatPromptTemplate.from_template(self.RESPONSE_TEMPLATE)
538
+
539
+ # Generate introduction using LLM - Changed line!
540
+ intro_chain = intro_prompt | self.llm | StrOutputParser()
541
+ introduction = intro_chain.invoke(query) # Pass only the query string
542
+
543
+ # Format the RAG results with emojis
544
+ formatted_events = []
545
+ events = events_text.split("\n\n")
546
+
547
+ for event in events:
548
+ if event.strip():
549
+ # Add emoji based on event type/keywords
550
+ if "workshop" in event.lower():
551
+ event = "πŸ”§ " + event
552
+ elif "seminar" in event.lower():
553
+ event = "πŸ“š " + event
554
+ elif "lecture" in event.lower():
555
+ event = "πŸŽ“ " + event
556
+ elif "research" in event.lower():
557
+ event = "πŸ”¬ " + event
558
+ elif "sports" in event.lower():
559
+ event = "πŸƒ " + event
560
+ else:
561
+ event = "πŸ“… " + event
562
+
563
+ formatted_events.append(event)
564
+
565
+ # Combine introduction and events
566
+ full_response = f"{introduction.strip()}\n\n"
567
+ full_response += "\n\n".join(formatted_events)
568
+
569
+ return full_response
570
 
571
  except Exception as e:
572
+ print(f"Error in response formatting: {e}")
573
+ # Fallback to basic formatting if LLM fails
574
+ fallback_response = "Here are some events that might interest you:\n\n"
575
+ fallback_response += events_text
576
+ return fallback_response
577
+
578
+ def create_chat_interface():
579
+ chat_rag = BrockEventsRAG()
 
 
 
 
 
 
580
 
581
+ custom_theme = gr.themes.Soft().set(
582
+ input_background_fill="*primary",
583
+ body_text_color="*secondary",
584
+ )
585
+
586
+ with gr.Blocks(theme=custom_theme) as demo:
587
+ # Header section
588
+ with gr.Row():
589
+ with gr.Column():
590
+ gr.Markdown("# πŸŽ“ Brock University Events Assistant")
591
+ gr.Markdown("Ask me about upcoming events, workshops, or activities!")
592
+ gr.Markdown(f"""
593
+ ### System Information
594
+ - **Embeddings Model**: {chat_rag.emodel_name}
595
+ - **Collection Size**: {chat_rag.collection.count()} documents
596
+ """)
597
 
598
+ # Add sliders
599
+ temperature = gr.Slider(
600
+ minimum=0.1, maximum=1.0, value=0.7, step=0.1,
601
+ label="Response Creativity (Temperature)"
602
+ )
603
+ top_p = gr.Slider(
604
+ minimum=0.1, maximum=1.0, value=0.95, step=0.05,
605
+ label="Response Focus (Top P)"
606
+ )
607
+ top_k = gr.Slider(
608
+ minimum=1, maximum=100, value=50, step=1,
609
+ label="Response Diversity (Top K)"
610
+ )
611
+
612
+ # Chat components
613
+ chatbot = gr.Chatbot( # Removed type="messages"
614
  label="Chat History",
615
  height=400,
616
+ bubble_full_width=False
617
  )
618
 
619
  with gr.Row():
 
624
  )
625
  submit = gr.Button("Ask", scale=1, variant="primary")
626
 
627
+ clear = gr.Button("Clear Chat")
628
+
 
 
629
  # Event handlers
630
+ def process_chat(message, chat_history, temp, p, k):
631
+ chat_rag.temperature = temp
632
+ chat_rag.top_p = p
633
+ chat_rag.top_k = k
634
+ bot_message = chat_rag.process_chat(message, chat_history)
635
+ chat_history.append((message, bot_message))
636
+ return "", chat_history
637
+
638
+ submit.click(
639
+ process_chat,
640
+ inputs=[msg, chatbot, temperature, top_p, top_k],
641
+ outputs=[msg, chatbot]
642
+ )
643
+ msg.submit(
644
+ process_chat,
645
+ inputs=[msg, chatbot, temperature, top_p, top_k],
646
+ outputs=[msg, chatbot]
647
  )
 
 
 
 
 
 
 
648
  clear.click(lambda: None, None, chatbot)
649
+
650
+ # Examples
 
651
  gr.Examples(
652
  examples=[
653
+ "What workshops are happening next week?",
654
+ "Are there any business networking events coming up?",
655
+ "Tell me about math and science events",
656
+ "What's happening at the makerspace?",
657
+ "Are there any career fairs scheduled?"
658
  ],
659
  inputs=msg
660
  )
661
+
 
 
 
 
 
 
 
 
 
662
  return demo
663
 
664
  if __name__ == "__main__":
665
+ demo = create_chat_interface()
666
+ demo.launch(share=True, debug=True)