markobinario commited on
Commit
d157fd9
·
verified ·
1 Parent(s): a124dcf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -36
app.py CHANGED
@@ -108,50 +108,103 @@ Just type your question or start a conversation, and I'll do my best to help you
108
  return random.choice(responses)
109
 
110
  def save_unanswered_question(self, question: str) -> bool:
111
- """Save unanswered question to the database"""
 
 
112
  try:
113
  # Try different possible endpoints for saving unanswered questions
114
  endpoints = [
115
  f"{self.database_url}/unanswered_questions",
116
  f"{self.database_url}/api/unanswered_questions",
117
  f"{self.database_url}/save_question",
118
- f"{self.database_url}/api/save_question"
 
 
 
 
119
  ]
120
 
 
 
 
121
  for endpoint in endpoints:
 
 
 
122
  try:
123
- # Try POST request with JSON body - matching your table structure
 
 
 
 
 
124
  response = requests.post(
125
  endpoint,
126
- json={
127
- "question": question,
128
- "created_at": self._get_timestamp()
129
- },
130
  headers={"Content-Type": "application/json"},
131
  timeout=10
132
  )
 
 
 
133
  if response.status_code in [200, 201]:
 
134
  return True
135
- except:
136
- try:
137
- # Try GET request with query parameters
138
- response = requests.get(
139
- endpoint,
140
- params={
141
- "question": question,
142
- "created_at": self._get_timestamp()
143
- },
144
- timeout=10
145
- )
146
- if response.status_code in [200, 201]:
147
- return True
148
- except:
149
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
 
151
  return False
152
 
153
  except Exception as e:
154
- print(f"Error saving unanswered question: {e}")
155
  return False
156
 
157
  def _get_timestamp(self) -> str:
@@ -167,6 +220,30 @@ Just type your question or start a conversation, and I'll do my best to help you
167
  text = text.translate(str.maketrans('', '', string.punctuation))
168
  # Remove extra whitespace
169
  text = ' '.join(text.split())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  return text
171
 
172
  def _extract_keywords(self, text: str) -> List[str]:
@@ -227,18 +304,35 @@ Just type your question or start a conversation, and I'll do my best to help you
227
  if norm2 in norm1:
228
  contains_similarity = max(contains_similarity, 0.9 * (len(norm2) / len(norm1)))
229
 
230
- # Method 4: Word order similarity
231
  words1 = norm1.split()
232
  words2 = norm2.split()
233
  word_order_similarity = 0.0
234
  if words1 and words2:
235
- # Check for common word sequences
236
  common_sequences = 0
237
  max_len = min(len(words1), len(words2))
238
  for i in range(max_len):
239
  if words1[i] == words2[i]:
240
  common_sequences += 1
241
- word_order_similarity = common_sequences / max_len if max_len > 0 else 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
  # Method 5: Semantic similarity using word relationships
244
  semantic_similarity = self._calculate_semantic_similarity(keywords1, keywords2)
@@ -277,8 +371,9 @@ Just type your question or start a conversation, and I'll do my best to help you
277
  'contact': {'contact', 'phone', 'email', 'address', 'office', 'reach', 'call'},
278
  'requirements': {'requirement', 'need', 'required', 'must', 'prerequisite', 'condition'},
279
  'application': {'apply', 'application', 'submit', 'process', 'procedure'},
280
- 'programs': {'program', 'course', 'major', 'degree', 'study', 'academic'},
281
- 'admission': {'admission', 'admit', 'accept', 'enroll', 'entry', 'enter'}
 
282
  }
283
 
284
  # Check if keywords belong to the same semantic group
@@ -313,6 +408,50 @@ Just type your question or start a conversation, and I'll do my best to help you
313
 
314
  return min(phrase_score, 1.0)
315
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  def _find_best_match(self, user_question: str, database_questions: List[str], threshold: float = 0.25) -> Optional[str]:
317
  """Find the best matching question from database with improved logic"""
318
  if not database_questions:
@@ -422,21 +561,33 @@ Just type your question or start a conversation, and I'll do my best to help you
422
  continue
423
 
424
  # If no answer found, save the question as unanswered
425
- self.save_unanswered_question(question)
426
- return "I'm sorry, I couldn't find a specific answer to your question in our database. I've saved your question for review, and we'll work on providing a better answer in the future. Could you try rephrasing your question or ask me something else?"
 
 
 
427
 
428
  except requests.exceptions.Timeout:
429
  # Save the question even if there's a timeout
430
- self.save_unanswered_question(question)
431
- return "I'm sorry, the database is taking too long to respond. I've saved your question for review. Please try again in a moment."
 
 
 
432
  except requests.exceptions.ConnectionError:
433
  # Save the question even if there's a connection error
434
- self.save_unanswered_question(question)
435
- return "I'm sorry, I'm having trouble connecting to our database right now. I've saved your question for review. Please try again later."
 
 
 
436
  except Exception as e:
437
  # Save the question even if there's an unexpected error
438
- self.save_unanswered_question(question)
439
- return f"I encountered an error while searching our database: {str(e)}. I've saved your question for review. Please try again."
 
 
 
440
 
441
  def _get_all_questions(self) -> List[str]:
442
  """Get all available questions from the database for smart matching"""
 
108
  return random.choice(responses)
109
 
110
  def save_unanswered_question(self, question: str) -> bool:
111
+ """Save unanswered question to the database with detailed logging"""
112
+ print(f"Attempting to save unanswered question: '{question}'")
113
+
114
  try:
115
  # Try different possible endpoints for saving unanswered questions
116
  endpoints = [
117
  f"{self.database_url}/unanswered_questions",
118
  f"{self.database_url}/api/unanswered_questions",
119
  f"{self.database_url}/save_question",
120
+ f"{self.database_url}/api/save_question",
121
+ f"{self.database_url}/questions",
122
+ f"{self.database_url}/api/questions",
123
+ f"{self.database_url}/faq/unanswered",
124
+ f"{self.database_url}/api/faq/unanswered"
125
  ]
126
 
127
+ timestamp = self._get_timestamp()
128
+ print(f"Using timestamp: {timestamp}")
129
+
130
  for endpoint in endpoints:
131
+ print(f"Trying endpoint: {endpoint}")
132
+
133
+ # Try POST request with JSON body - matching your table structure
134
  try:
135
+ post_data = {
136
+ "question": question,
137
+ "created_at": timestamp
138
+ }
139
+ print(f"POST data: {post_data}")
140
+
141
  response = requests.post(
142
  endpoint,
143
+ json=post_data,
 
 
 
144
  headers={"Content-Type": "application/json"},
145
  timeout=10
146
  )
147
+ print(f"POST response status: {response.status_code}")
148
+ print(f"POST response text: {response.text[:200]}")
149
+
150
  if response.status_code in [200, 201]:
151
+ print(f"Successfully saved question via POST to {endpoint}")
152
  return True
153
+
154
+ except requests.exceptions.RequestException as e:
155
+ print(f"POST request failed for {endpoint}: {e}")
156
+
157
+ # Try GET request with query parameters
158
+ try:
159
+ get_params = {
160
+ "question": question,
161
+ "created_at": timestamp
162
+ }
163
+ print(f"GET params: {get_params}")
164
+
165
+ response = requests.get(
166
+ endpoint,
167
+ params=get_params,
168
+ timeout=10
169
+ )
170
+ print(f"GET response status: {response.status_code}")
171
+ print(f"GET response text: {response.text[:200]}")
172
+
173
+ if response.status_code in [200, 201]:
174
+ print(f"Successfully saved question via GET to {endpoint}")
175
+ return True
176
+
177
+ except requests.exceptions.RequestException as e:
178
+ print(f"GET request failed for {endpoint}: {e}")
179
+
180
+ # Try POST with form data
181
+ try:
182
+ form_data = {
183
+ "question": question,
184
+ "created_at": timestamp
185
+ }
186
+ print(f"Form data: {form_data}")
187
+
188
+ response = requests.post(
189
+ endpoint,
190
+ data=form_data,
191
+ timeout=10
192
+ )
193
+ print(f"Form POST response status: {response.status_code}")
194
+ print(f"Form POST response text: {response.text[:200]}")
195
+
196
+ if response.status_code in [200, 201]:
197
+ print(f"Successfully saved question via form POST to {endpoint}")
198
+ return True
199
+
200
+ except requests.exceptions.RequestException as e:
201
+ print(f"Form POST request failed for {endpoint}: {e}")
202
 
203
+ print("All endpoints failed to save the question")
204
  return False
205
 
206
  except Exception as e:
207
+ print(f"Unexpected error saving unanswered question: {e}")
208
  return False
209
 
210
  def _get_timestamp(self) -> str:
 
220
  text = text.translate(str.maketrans('', '', string.punctuation))
221
  # Remove extra whitespace
222
  text = ' '.join(text.split())
223
+
224
+ # Additional normalization for better matching
225
+ # Replace common variations
226
+ replacements = {
227
+ 'what are the': 'what',
228
+ 'what is the': 'what',
229
+ 'what are': 'what',
230
+ 'what is': 'what',
231
+ 'how do i': 'how',
232
+ 'how can i': 'how',
233
+ 'how to': 'how',
234
+ 'when is the': 'when',
235
+ 'when are the': 'when',
236
+ 'where is the': 'where',
237
+ 'where are the': 'where',
238
+ 'who is the': 'who',
239
+ 'who are the': 'who'
240
+ }
241
+
242
+ for old, new in replacements.items():
243
+ if text.startswith(old):
244
+ text = text.replace(old, new, 1)
245
+ break
246
+
247
  return text
248
 
249
  def _extract_keywords(self, text: str) -> List[str]:
 
304
  if norm2 in norm1:
305
  contains_similarity = max(contains_similarity, 0.9 * (len(norm2) / len(norm1)))
306
 
307
+ # Method 4: Enhanced word order similarity
308
  words1 = norm1.split()
309
  words2 = norm2.split()
310
  word_order_similarity = 0.0
311
  if words1 and words2:
312
+ # Check for common word sequences (exact order)
313
  common_sequences = 0
314
  max_len = min(len(words1), len(words2))
315
  for i in range(max_len):
316
  if words1[i] == words2[i]:
317
  common_sequences += 1
318
+ exact_order_similarity = common_sequences / max_len if max_len > 0 else 0.0
319
+
320
+ # Check for word order flexibility (any order)
321
+ set1 = set(words1)
322
+ set2 = set(words2)
323
+ common_words = set1.intersection(set2)
324
+ total_words = set1.union(set2)
325
+ flexible_order_similarity = len(common_words) / len(total_words) if total_words else 0.0
326
+
327
+ # Check for phrase patterns (like "available courses" vs "courses available")
328
+ phrase_similarity = self._calculate_phrase_order_similarity(words1, words2)
329
+
330
+ # Combine different word order methods
331
+ word_order_similarity = (
332
+ exact_order_similarity * 0.3 +
333
+ flexible_order_similarity * 0.5 +
334
+ phrase_similarity * 0.2
335
+ )
336
 
337
  # Method 5: Semantic similarity using word relationships
338
  semantic_similarity = self._calculate_semantic_similarity(keywords1, keywords2)
 
371
  'contact': {'contact', 'phone', 'email', 'address', 'office', 'reach', 'call'},
372
  'requirements': {'requirement', 'need', 'required', 'must', 'prerequisite', 'condition'},
373
  'application': {'apply', 'application', 'submit', 'process', 'procedure'},
374
+ 'programs': {'program', 'course', 'major', 'degree', 'study', 'academic', 'available', 'offered', 'listings'},
375
+ 'admission': {'admission', 'admit', 'accept', 'enroll', 'entry', 'enter'},
376
+ 'courses': {'course', 'courses', 'program', 'programs', 'major', 'majors', 'degree', 'degrees', 'available', 'offered', 'listings', 'what', 'which'}
377
  }
378
 
379
  # Check if keywords belong to the same semantic group
 
408
 
409
  return min(phrase_score, 1.0)
410
 
411
+ def _calculate_phrase_order_similarity(self, words1: List[str], words2: List[str]) -> float:
412
+ """Calculate similarity based on phrase order flexibility"""
413
+ if not words1 or not words2:
414
+ return 0.0
415
+
416
+ # Common phrase patterns that should match regardless of order
417
+ phrase_patterns = [
418
+ (['available', 'courses'], ['courses', 'available']),
419
+ (['admission', 'requirements'], ['requirements', 'admission']),
420
+ (['financial', 'aid'], ['aid', 'financial']),
421
+ (['tuition', 'cost'], ['cost', 'tuition']),
422
+ (['application', 'deadline'], ['deadline', 'application']),
423
+ (['contact', 'admissions'], ['admissions', 'contact']),
424
+ (['gpa', 'requirement'], ['requirement', 'gpa']),
425
+ (['academic', 'requirements'], ['requirements', 'academic']),
426
+ (['programs', 'available'], ['available', 'programs']),
427
+ (['what', 'programs'], ['programs', 'what']),
428
+ (['what', 'courses'], ['courses', 'what']),
429
+ (['what', 'available'], ['available', 'what'])
430
+ ]
431
+
432
+ # Check for phrase pattern matches
433
+ for pattern1, pattern2 in phrase_patterns:
434
+ # Check if words1 contains pattern1 and words2 contains pattern2
435
+ if (all(word in words1 for word in pattern1) and
436
+ all(word in words2 for word in pattern2)):
437
+ return 0.8
438
+
439
+ # Check if words1 contains pattern2 and words2 contains pattern1
440
+ if (all(word in words1 for word in pattern2) and
441
+ all(word in words2 for word in pattern1)):
442
+ return 0.8
443
+
444
+ # Check for partial phrase matches
445
+ for pattern1, pattern2 in phrase_patterns:
446
+ # Check if at least 2 words from each pattern are present
447
+ words1_matches = sum(1 for word in pattern1 if word in words1)
448
+ words2_matches = sum(1 for word in pattern2 if word in words2)
449
+
450
+ if words1_matches >= 2 and words2_matches >= 2:
451
+ return 0.6
452
+
453
+ return 0.0
454
+
455
  def _find_best_match(self, user_question: str, database_questions: List[str], threshold: float = 0.25) -> Optional[str]:
456
  """Find the best matching question from database with improved logic"""
457
  if not database_questions:
 
561
  continue
562
 
563
  # If no answer found, save the question as unanswered
564
+ saved = self.save_unanswered_question(question)
565
+ if saved:
566
+ return "I'm sorry, I couldn't find a specific answer to your question in our database. I've saved your question for review, and we'll work on providing a better answer in the future. Could you try rephrasing your question or ask me something else?"
567
+ else:
568
+ return "I'm sorry, I couldn't find a specific answer to your question in our database. I tried to save your question for review, but there was an issue with our database connection. Could you try rephrasing your question or ask me something else?"
569
 
570
  except requests.exceptions.Timeout:
571
  # Save the question even if there's a timeout
572
+ saved = self.save_unanswered_question(question)
573
+ if saved:
574
+ return "I'm sorry, the database is taking too long to respond. I've saved your question for review. Please try again in a moment."
575
+ else:
576
+ return "I'm sorry, the database is taking too long to respond. Please try again in a moment."
577
  except requests.exceptions.ConnectionError:
578
  # Save the question even if there's a connection error
579
+ saved = self.save_unanswered_question(question)
580
+ if saved:
581
+ return "I'm sorry, I'm having trouble connecting to our database right now. I've saved your question for review. Please try again later."
582
+ else:
583
+ return "I'm sorry, I'm having trouble connecting to our database right now. Please try again later."
584
  except Exception as e:
585
  # Save the question even if there's an unexpected error
586
+ saved = self.save_unanswered_question(question)
587
+ if saved:
588
+ return f"I encountered an error while searching our database: {str(e)}. I've saved your question for review. Please try again."
589
+ else:
590
+ return f"I encountered an error while searching our database: {str(e)}. Please try again."
591
 
592
  def _get_all_questions(self) -> List[str]:
593
  """Get all available questions from the database for smart matching"""