Spaces:

AryanJh
/

Brock-Events-Assistant

Build error

App Files Files Community

AryanJh commited on Dec 19, 2024

Commit

3e76b59

verified ·

1 Parent(s): d2471c6

created event_matcher.py

Browse files

Focused file to manage the app

Files changed (1) hide show

event_matcher.py +285 -0

event_matcher.py ADDED Viewed

	@@ -0,0 +1,285 @@

+from typing import List, Dict, Tuple, Optional
+from datetime import datetime
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+import torch
+import pytz
+from fuzzywuzzy import fuzz
+class Event:
+    """Class to structure event data"""
+    def __init__(
+        self,
+        title: str,
+        description: str,
+        start_time: datetime,
+        end_time: Optional[datetime],
+        location: str,
+        categories: List[str],
+        hosts: List[str],
+        link: str,
+        guid: str
+    ):
+        self.title = title
+        self.description = description
+        self.start_time = start_time
+        self.end_time = end_time
+        self.location = location
+        self.categories = categories
+        self.hosts = hosts
+        self.link = link
+        self.guid = guid
+class EnhancedEventMatcher:
+    def __init__(self):
+        """Initialize the enhanced event matcher with T5"""
+        print("Initializing event matcher...")
+        # Initialize T5 for response enhancement
+        self.tokenizer = T5Tokenizer.from_pretrained("t5-small")
+        self.t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
+        # Initialize pattern learning
+        self.known_categories = set()
+        self.known_hosts = set()
+        self.known_locations = set()
+        self.faculty_patterns = {}
+        self.category_patterns = {}
+        # Define static patterns
+        self.patterns = {
+            'faculty': {
+                'math': ['mathematics', 'math', 'stats', 'computer science'],
+                'humanities': ['humanities', 'language', 'literature'],
+                'business': ['goodman', 'business', 'accounting'],
+                'science': ['science', 'biology', 'chemistry', 'physics']
+            },
+            'event_type': {
+                'academic': ['lecture', 'seminar', 'workshop', 'conference'],
+                'social': ['meetup', 'social', 'gathering', 'networking'],
+                'career': ['career', 'job', 'employment', 'professional']
+            },
+            'location': {
+                'online': ['online', 'virtual', 'teams', 'zoom'],
+                'campus': ['room', 'hall', 'building', 'plaza'],
+                'library': ['library', 'learning commons', 'makerspace']
+            }
+        }
+    def convert_dict_to_event(self, event_dict: Dict) -> Event:
+        """Convert a dictionary to an Event object"""
+        try:
+            return Event(
+                title=event_dict['title'],
+                description=event_dict.get('description', ''),
+                start_time=event_dict['start_time'],
+                end_time=event_dict.get('end_time'),
+                location=event_dict['location'],
+                categories=event_dict.get('categories', '').split(';'),
+                hosts=event_dict.get('hosts', '').split(';'),
+                link=event_dict['link'],
+                guid=event_dict['guid']
+            )
+        except Exception as e:
+            print(f"Error converting dict to event: {e}")
+            raise
+    def learn_from_events(self, events: List[Event]) -> None:
+        """Learn patterns from existing events"""
+        try:
+            for event in events:
+                # Update known sets
+                self.known_categories.update(event.categories)
+                self.known_hosts.update(event.hosts)
+                self.known_locations.add(event.location)
+                # Learn faculty associations
+                for host in event.hosts:
+                    for category in event.categories:
+                        if 'faculty' in host.lower():
+                            key = (host, category)
+                            self.faculty_patterns[key] = self.faculty_patterns.get(key, 0) + 1
+                # Learn category associations
+                for cat1 in event.categories:
+                    for cat2 in event.categories:
+                        if cat1 != cat2:
+                            key = (cat1, cat2)
+                            self.category_patterns[key] = self.category_patterns.get(key, 0) + 1
+        except Exception as e:
+            print(f"Error learning from events: {e}")
+            raise
+    def get_faculty_score(self, event: Event, query: str) -> float:
+        """Score faculty relevance using learned patterns"""
+        try:
+            score = 0.0
+            query_lower = query.lower()
+            # Direct faculty mention check
+            for host in event.hosts:
+                if 'faculty' in host.lower():
+                    ratio = fuzz.partial_ratio(query_lower, host.lower())
+                    if ratio > 80:
+                        score += 2.0 * (ratio / 100)
+            # Category association check
+            for category in event.categories:
+                for (host, cat), count in self.faculty_patterns.items():
+                    if category == cat and fuzz.partial_ratio(query_lower, host.lower()) > 80:
+                        score += 1.0 * (count / max(self.faculty_patterns.values(), default=1))
+            return score
+        except Exception as e:
+            print(f"Error calculating faculty score: {e}")
+            return 0.0
+    def get_category_score(self, event: Event, query_type: str) -> float:
+        """Score category relevance using learned patterns"""
+        try:
+            if not query_type:
+                return 0.0
+            score = 0.0
+            for category in event.categories:
+                # Direct category match
+                ratio = fuzz.partial_ratio(query_type.lower(), category.lower())
+                if ratio > 80:
+                    score += 1.5 * (ratio / 100)
+                # Associated categories
+                for (cat1, cat2), count in self.category_patterns.items():
+                    if category == cat1 and fuzz.partial_ratio(query_type.lower(), cat2.lower()) > 80:
+                        score += 0.5 * (count / max(self.category_patterns.values(), default=1))
+            return score
+        except Exception as e:
+            print(f"Error calculating category score: {e}")
+            return 0.0
+    def get_location_score(self, event: Event, query: str) -> float:
+        """Score location relevance"""
+        try:
+            score = 0.0
+            location_lower = event.location.lower()
+            query_lower = query.lower()
+            # Check online/virtual events
+            if any(term in query_lower for term in self.patterns['location']['online']):
+                if any(term in location_lower for term in self.patterns['location']['online']):
+                    score += 1.5
+            # Check campus/in-person events
+            if any(term in query_lower for term in ['in-person', 'campus', 'building']):
+                if any(term in location_lower for term in self.patterns['location']['campus']):
+                    score += 1.5
+            # Check library events
+            if any(term in query_lower for term in self.patterns['location']['library']):
+                if any(term in location_lower for term in self.patterns['location']['library']):
+                    score += 1.5
+            return score
+        except Exception as e:
+            print(f"Error calculating location score: {e}")
+            return 0.0
+    def generate_llm_response(self, query: str, events_text: str) -> str:
+        """Generate response using T5"""
+        try:
+            # Create prompt for T5
+            prompt = f"summarize: Query: {query}\nAvailable Events:\n{events_text}"
+            # Generate response
+            inputs = self.tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)
+            outputs = self.t5_model.generate(
+                inputs,
+                max_length=300,
+                num_beams=4,
+                temperature=0.7,
+                early_stopping=True
+            )
+            return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        except Exception as e:
+            print(f"Error generating LLM response: {e}")
+            return "Here are some relevant events I found:"
+    def format_response(self, events: List[Tuple[Event, float]], llm_response: str) -> str:
+        """Format the final response with event details"""
+        try:
+            response = f"{llm_response}\n\n"
+            for event, score in events:
+                # Determine location icon
+                location_icon = "📱" if any(term in event.location.lower()
+                                         for term in self.patterns['location']['online']) else "📍"
+                # Format event details
+                response += f"""
+**{event.title}** {'🌟' * int(min(score, 5))}
+📅 {event.start_time.strftime('%A, %B %d, %Y')} at {event.start_time.strftime('%I:%M %p')}
+{location_icon} {event.location}
+👥 Hosted by: {', '.join(event.hosts)}
+🏷️ Categories: {', '.join(event.categories)}
+🔗 {event.link}
+"""
+            return response
+        except Exception as e:
+            print(f"Error formatting response: {e}")
+            return "Error formatting the response. Please try again."
+    def match_and_respond(self, events: List[Dict], query: str) -> str:
+        """Main method to match events and generate response"""
+        try:
+            # Convert dictionary events to Event objects
+            event_objects = [self.convert_dict_to_event(event) for event in events]
+            # Learn patterns from events
+            self.learn_from_events(event_objects)
+            # Process query
+            query_lower = query.lower()
+            matched_events = []
+            # Score and match events
+            for event in event_objects:
+                faculty_score = self.get_faculty_score(event, query)
+                category_score = self.get_category_score(event, query_lower)
+                location_score = self.get_location_score(event, query)
+                total_score = (
+                    faculty_score * 1.5 +
+                    category_score * 1.2 +
+                    location_score * 1.0
+                )
+                if total_score > 0:
+                    matched_events.append((event, total_score))
+            # Sort and get top matches
+            matched_events.sort(key=lambda x: x[1], reverse=True)
+            top_matches = matched_events[:3]
+            if not top_matches:
+                return "I couldn't find any events matching your query. Try asking in a different way!"
+            # Format events for LLM
+            events_text = ""
+            for event, score in top_matches:
+                events_text += f"""
+Event: {event.title}
+Date: {event.start_time.strftime('%A, %B %d, %Y')}
+Time: {event.start_time.strftime('%I:%M %p')}
+Location: {event.location}
+Categories: {', '.join(event.categories)}
+Score: {score:.2f}
+"""
+            # Generate LLM response and format final response
+            llm_response = self.generate_llm_response(query, events_text)
+            return self.format_response(top_matches, llm_response)
+        except Exception as e:
+            print(f"Error in match_and_respond: {e}")
+            return "I encountered an error processing your query. Please try again!"