Spaces:

markobinario
/

flaskbot

Running

App Files Files Community

markobinario commited on 23 days ago

Commit

d157fd9

verified ·

1 Parent(s): a124dcf

Update app.py

Browse files

Files changed (1) hide show

app.py +187 -36

app.py CHANGED Viewed

@@ -108,50 +108,103 @@ Just type your question or start a conversation, and I'll do my best to help you
         return random.choice(responses)
     def save_unanswered_question(self, question: str) -> bool:
-        """Save unanswered question to the database"""
         try:
             # Try different possible endpoints for saving unanswered questions
             endpoints = [
                 f"{self.database_url}/unanswered_questions",
                 f"{self.database_url}/api/unanswered_questions",
                 f"{self.database_url}/save_question",
-                f"{self.database_url}/api/save_question"
             ]
             for endpoint in endpoints:
                 try:
-                    # Try POST request with JSON body - matching your table structure
                     response = requests.post(
                         endpoint,
-                        json={
-                            "question": question,
-                            "created_at": self._get_timestamp()
-                        },
                         headers={"Content-Type": "application/json"},
                         timeout=10
                     )
                     if response.status_code in [200, 201]:
                         return True
-                except:
-                    try:
-                        # Try GET request with query parameters
-                        response = requests.get(
-                            endpoint,
-                            params={
-                                "question": question,
-                                "created_at": self._get_timestamp()
-                            },
-                            timeout=10
-                        )
-                        if response.status_code in [200, 201]:
-                            return True
-                    except:
-                        continue
             return False
         except Exception as e:
-            print(f"Error saving unanswered question: {e}")
             return False
     def _get_timestamp(self) -> str:
@@ -167,6 +220,30 @@ Just type your question or start a conversation, and I'll do my best to help you
         text = text.translate(str.maketrans('', '', string.punctuation))
         # Remove extra whitespace
         text = ' '.join(text.split())
         return text
     def _extract_keywords(self, text: str) -> List[str]:
@@ -227,18 +304,35 @@ Just type your question or start a conversation, and I'll do my best to help you
         if norm2 in norm1:
             contains_similarity = max(contains_similarity, 0.9 * (len(norm2) / len(norm1)))
-        # Method 4: Word order similarity
         words1 = norm1.split()
         words2 = norm2.split()
         word_order_similarity = 0.0
         if words1 and words2:
-            # Check for common word sequences
             common_sequences = 0
             max_len = min(len(words1), len(words2))
             for i in range(max_len):
                 if words1[i] == words2[i]:
                     common_sequences += 1
-            word_order_similarity = common_sequences / max_len if max_len > 0 else 0.0
         # Method 5: Semantic similarity using word relationships
         semantic_similarity = self._calculate_semantic_similarity(keywords1, keywords2)
@@ -277,8 +371,9 @@ Just type your question or start a conversation, and I'll do my best to help you
             'contact': {'contact', 'phone', 'email', 'address', 'office', 'reach', 'call'},
             'requirements': {'requirement', 'need', 'required', 'must', 'prerequisite', 'condition'},
             'application': {'apply', 'application', 'submit', 'process', 'procedure'},
-            'programs': {'program', 'course', 'major', 'degree', 'study', 'academic'},
-            'admission': {'admission', 'admit', 'accept', 'enroll', 'entry', 'enter'}
         }
         # Check if keywords belong to the same semantic group
@@ -313,6 +408,50 @@ Just type your question or start a conversation, and I'll do my best to help you
         return min(phrase_score, 1.0)
     def _find_best_match(self, user_question: str, database_questions: List[str], threshold: float = 0.25) -> Optional[str]:
         """Find the best matching question from database with improved logic"""
         if not database_questions:
@@ -422,21 +561,33 @@ Just type your question or start a conversation, and I'll do my best to help you
                         continue
             # If no answer found, save the question as unanswered
-            self.save_unanswered_question(question)
-            return "I'm sorry, I couldn't find a specific answer to your question in our database. I've saved your question for review, and we'll work on providing a better answer in the future. Could you try rephrasing your question or ask me something else?"
         except requests.exceptions.Timeout:
             # Save the question even if there's a timeout
-            self.save_unanswered_question(question)
-            return "I'm sorry, the database is taking too long to respond. I've saved your question for review. Please try again in a moment."
         except requests.exceptions.ConnectionError:
             # Save the question even if there's a connection error
-            self.save_unanswered_question(question)
-            return "I'm sorry, I'm having trouble connecting to our database right now. I've saved your question for review. Please try again later."
         except Exception as e:
             # Save the question even if there's an unexpected error
-            self.save_unanswered_question(question)
-            return f"I encountered an error while searching our database: {str(e)}. I've saved your question for review. Please try again."
     def _get_all_questions(self) -> List[str]:
         """Get all available questions from the database for smart matching"""

         return random.choice(responses)
     def save_unanswered_question(self, question: str) -> bool:
+        """Save unanswered question to the database with detailed logging"""
+        print(f"Attempting to save unanswered question: '{question}'")
         try:
             # Try different possible endpoints for saving unanswered questions
             endpoints = [
                 f"{self.database_url}/unanswered_questions",
                 f"{self.database_url}/api/unanswered_questions",
                 f"{self.database_url}/save_question",
+                f"{self.database_url}/api/save_question",
+                f"{self.database_url}/questions",
+                f"{self.database_url}/api/questions",
+                f"{self.database_url}/faq/unanswered",
+                f"{self.database_url}/api/faq/unanswered"
             ]
+            timestamp = self._get_timestamp()
+            print(f"Using timestamp: {timestamp}")
             for endpoint in endpoints:
+                print(f"Trying endpoint: {endpoint}")
+                # Try POST request with JSON body - matching your table structure
                 try:
+                    post_data = {
+                        "question": question,
+                        "created_at": timestamp
+                    }
+                    print(f"POST data: {post_data}")
                     response = requests.post(
                         endpoint,
+                        json=post_data,
                         headers={"Content-Type": "application/json"},
                         timeout=10
                     )
+                    print(f"POST response status: {response.status_code}")
+                    print(f"POST response text: {response.text[:200]}")
                     if response.status_code in [200, 201]:
+                        print(f"Successfully saved question via POST to {endpoint}")
                         return True
+                except requests.exceptions.RequestException as e:
+                    print(f"POST request failed for {endpoint}: {e}")
+                # Try GET request with query parameters
+                try:
+                    get_params = {
+                        "question": question,
+                        "created_at": timestamp
+                    }
+                    print(f"GET params: {get_params}")
+                    response = requests.get(
+                        endpoint,
+                        params=get_params,
+                        timeout=10
+                    )
+                    print(f"GET response status: {response.status_code}")
+                    print(f"GET response text: {response.text[:200]}")
+                    if response.status_code in [200, 201]:
+                        print(f"Successfully saved question via GET to {endpoint}")
+                        return True
+                except requests.exceptions.RequestException as e:
+                    print(f"GET request failed for {endpoint}: {e}")
+                # Try POST with form data
+                try:
+                    form_data = {
+                        "question": question,
+                        "created_at": timestamp
+                    }
+                    print(f"Form data: {form_data}")
+                    response = requests.post(
+                        endpoint,
+                        data=form_data,
+                        timeout=10
+                    )
+                    print(f"Form POST response status: {response.status_code}")
+                    print(f"Form POST response text: {response.text[:200]}")
+                    if response.status_code in [200, 201]:
+                        print(f"Successfully saved question via form POST to {endpoint}")
+                        return True
+                except requests.exceptions.RequestException as e:
+                    print(f"Form POST request failed for {endpoint}: {e}")
+            print("All endpoints failed to save the question")
             return False
         except Exception as e:
+            print(f"Unexpected error saving unanswered question: {e}")
             return False
     def _get_timestamp(self) -> str:
         text = text.translate(str.maketrans('', '', string.punctuation))
         # Remove extra whitespace
         text = ' '.join(text.split())
+        # Additional normalization for better matching
+        # Replace common variations
+        replacements = {
+            'what are the': 'what',
+            'what is the': 'what',
+            'what are': 'what',
+            'what is': 'what',
+            'how do i': 'how',
+            'how can i': 'how',
+            'how to': 'how',
+            'when is the': 'when',
+            'when are the': 'when',
+            'where is the': 'where',
+            'where are the': 'where',
+            'who is the': 'who',
+            'who are the': 'who'
+        }
+        for old, new in replacements.items():
+            if text.startswith(old):
+                text = text.replace(old, new, 1)
+                break
         return text
     def _extract_keywords(self, text: str) -> List[str]:
         if norm2 in norm1:
             contains_similarity = max(contains_similarity, 0.9 * (len(norm2) / len(norm1)))
+        # Method 4: Enhanced word order similarity
         words1 = norm1.split()
         words2 = norm2.split()
         word_order_similarity = 0.0
         if words1 and words2:
+            # Check for common word sequences (exact order)
             common_sequences = 0
             max_len = min(len(words1), len(words2))
             for i in range(max_len):
                 if words1[i] == words2[i]:
                     common_sequences += 1
+            exact_order_similarity = common_sequences / max_len if max_len > 0 else 0.0
+            # Check for word order flexibility (any order)
+            set1 = set(words1)
+            set2 = set(words2)
+            common_words = set1.intersection(set2)
+            total_words = set1.union(set2)
+            flexible_order_similarity = len(common_words) / len(total_words) if total_words else 0.0
+            # Check for phrase patterns (like "available courses" vs "courses available")
+            phrase_similarity = self._calculate_phrase_order_similarity(words1, words2)
+            # Combine different word order methods
+            word_order_similarity = (
+                exact_order_similarity * 0.3 +
+                flexible_order_similarity * 0.5 +
+                phrase_similarity * 0.2
+            )
         # Method 5: Semantic similarity using word relationships
         semantic_similarity = self._calculate_semantic_similarity(keywords1, keywords2)
             'contact': {'contact', 'phone', 'email', 'address', 'office', 'reach', 'call'},
             'requirements': {'requirement', 'need', 'required', 'must', 'prerequisite', 'condition'},
             'application': {'apply', 'application', 'submit', 'process', 'procedure'},
+            'programs': {'program', 'course', 'major', 'degree', 'study', 'academic', 'available', 'offered', 'listings'},
+            'admission': {'admission', 'admit', 'accept', 'enroll', 'entry', 'enter'},
+            'courses': {'course', 'courses', 'program', 'programs', 'major', 'majors', 'degree', 'degrees', 'available', 'offered', 'listings', 'what', 'which'}
         }
         # Check if keywords belong to the same semantic group
         return min(phrase_score, 1.0)
+    def _calculate_phrase_order_similarity(self, words1: List[str], words2: List[str]) -> float:
+        """Calculate similarity based on phrase order flexibility"""
+        if not words1 or not words2:
+            return 0.0
+        # Common phrase patterns that should match regardless of order
+        phrase_patterns = [
+            (['available', 'courses'], ['courses', 'available']),
+            (['admission', 'requirements'], ['requirements', 'admission']),
+            (['financial', 'aid'], ['aid', 'financial']),
+            (['tuition', 'cost'], ['cost', 'tuition']),
+            (['application', 'deadline'], ['deadline', 'application']),
+            (['contact', 'admissions'], ['admissions', 'contact']),
+            (['gpa', 'requirement'], ['requirement', 'gpa']),
+            (['academic', 'requirements'], ['requirements', 'academic']),
+            (['programs', 'available'], ['available', 'programs']),
+            (['what', 'programs'], ['programs', 'what']),
+            (['what', 'courses'], ['courses', 'what']),
+            (['what', 'available'], ['available', 'what'])
+        ]
+        # Check for phrase pattern matches
+        for pattern1, pattern2 in phrase_patterns:
+            # Check if words1 contains pattern1 and words2 contains pattern2
+            if (all(word in words1 for word in pattern1) and
+                all(word in words2 for word in pattern2)):
+                return 0.8
+            # Check if words1 contains pattern2 and words2 contains pattern1
+            if (all(word in words1 for word in pattern2) and
+                all(word in words2 for word in pattern1)):
+                return 0.8
+        # Check for partial phrase matches
+        for pattern1, pattern2 in phrase_patterns:
+            # Check if at least 2 words from each pattern are present
+            words1_matches = sum(1 for word in pattern1 if word in words1)
+            words2_matches = sum(1 for word in pattern2 if word in words2)
+            if words1_matches >= 2 and words2_matches >= 2:
+                return 0.6
+        return 0.0
     def _find_best_match(self, user_question: str, database_questions: List[str], threshold: float = 0.25) -> Optional[str]:
         """Find the best matching question from database with improved logic"""
         if not database_questions:
                         continue
             # If no answer found, save the question as unanswered
+            saved = self.save_unanswered_question(question)
+            if saved:
+                return "I'm sorry, I couldn't find a specific answer to your question in our database. I've saved your question for review, and we'll work on providing a better answer in the future. Could you try rephrasing your question or ask me something else?"
+            else:
+                return "I'm sorry, I couldn't find a specific answer to your question in our database. I tried to save your question for review, but there was an issue with our database connection. Could you try rephrasing your question or ask me something else?"
         except requests.exceptions.Timeout:
             # Save the question even if there's a timeout
+            saved = self.save_unanswered_question(question)
+            if saved:
+                return "I'm sorry, the database is taking too long to respond. I've saved your question for review. Please try again in a moment."
+            else:
+                return "I'm sorry, the database is taking too long to respond. Please try again in a moment."
         except requests.exceptions.ConnectionError:
             # Save the question even if there's a connection error
+            saved = self.save_unanswered_question(question)
+            if saved:
+                return "I'm sorry, I'm having trouble connecting to our database right now. I've saved your question for review. Please try again later."
+            else:
+                return "I'm sorry, I'm having trouble connecting to our database right now. Please try again later."
         except Exception as e:
             # Save the question even if there's an unexpected error
+            saved = self.save_unanswered_question(question)
+            if saved:
+                return f"I encountered an error while searching our database: {str(e)}. I've saved your question for review. Please try again."
+            else:
+                return f"I encountered an error while searching our database: {str(e)}. Please try again."
     def _get_all_questions(self) -> List[str]:
         """Get all available questions from the database for smart matching"""