markobinario commited on
Commit
2fc19f4
·
verified ·
1 Parent(s): 4b7f251

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -0
app.py CHANGED
@@ -11,6 +11,76 @@ class AIChatbot:
11
  self.database_url = database_url
12
  self.conversation_history = []
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # Simple conversation patterns
15
  self.greeting_patterns = [
16
  r'\b(hi|hello|hey|good morning|good afternoon|good evening)\b',
@@ -62,6 +132,88 @@ class AIChatbot:
62
  return True
63
  return False
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  def get_greeting_response(self) -> str:
66
  """Generate a greeting response"""
67
  responses = [
@@ -695,6 +847,14 @@ Just type your question or start a conversation, and I'll do my best to help you
695
  if not message.strip():
696
  return "Please enter a message so I can help you!"
697
 
 
 
 
 
 
 
 
 
698
  # Store conversation history
699
  self.conversation_history.append(("user", message))
700
 
 
11
  self.database_url = database_url
12
  self.conversation_history = []
13
 
14
+ # Profanity filter - list of bad words to filter (English and Tagalog)
15
+ self.bad_words = {
16
+ # English bad words
17
+ 'damn', 'hell', 'crap', 'suck', 'idiot', 'fool', 'jerk', 'loser', 'dumb', 'trash',
18
+ 'butt', 'freak', 'nut', 'moron', 'dummy', 'bozo', 'twit', 'dope', 'dumbass',
19
+ 'poophead', 'jerkoff', 'bugger', 'wanker', 'tosser', 'bastard', 'scum', 'slime',
20
+ 'creep', 'brat', 'dweeb', 'goon', 'booby', 'puke', 'vomit', 'dung', 'sap',
21
+ 'clutz', 'knob', 'prick', 'ass', 'shit', 'fuck', 'cock', 'tits', 'pussy',
22
+ 'cunt', 'slut', 'bitch', 'whore', 'skank', 'stupid',
23
+ 'asshole', 'dick', 'douche', 'scumbag', 'slimeball', 'douchebag', 'knobhead',
24
+ 'numskull', 'halfwit', 'nincompoop', 'blockhead', 'dimwit', 'nitwit', 'simpleton',
25
+ 'dunce', 'buffoon', 'doofus', 'clod', 'goober', 'jerkface', 'schmuck', 'scoundrel',
26
+ 'miscreant', 'rat', 'git', 'wazzock', 'pillock', 'prat', 'plonker', 'div', 'bellend',
27
+ 'tosserhead', 'twitbrain', 'sapbrain', 'knucklehead', 'dopey', 'boob', 'dingbat', 'oaf',
28
+ 'ninnyhammer', 'chucklehead', 'saphead', 'pukehead', 'fuckface', 'assface', 'dickhead',
29
+ 'cockhead', 'shithead', 'twatface', 'doucheface', 'bastardface', 'motherfucker', 'shitbag',
30
+ 'cocksucker', 'jackass', 'wankerface', 'tosserface', 'arsehole', 'shitstain', 'assholeface',
31
+ 'prickface', 'dumbfuck', 'fucknut', 'twatwaffle', 'shitbagger', 'dickweed', 'cumdump',
32
+ 'asswipe', 'cockwomble', 'bollocks', 'twat', 'dick', 'fucking',
33
+ # Tagalog bad words
34
+ 'gago', 'putangina', 'putang', 'hayop', 'lintik', 'walang', 'hiya', 'bobo', 'leche',
35
+ 'punyeta', 'sira', 'ulo', 'bwisit', 'pakshet', 'tarantado', 'ulol', 'buwisit',
36
+ 'hudas', 'kupal', 'shet', 'tae', 'tanga', 'tangina', 'bastos', 'maldita', 'loko',
37
+ 'asar', 'pekpek', 'burat', 'kantot', 'puke', 'kantotin', 'tarantadoin', 'ulolan',
38
+ 'bading', 'bakla', 'unggoy', 'asarin', 'bastusin', 'malditahin', 'buratin', 'pekpekin',
39
+ 'pukein', 'tangain', 'gagoan', 'tarantadohin', 'ina'
40
+ }
41
+
42
+ # Bad phrases (multi-word profanity - English and Tagalog)
43
+ self.bad_phrases = {
44
+ # English phrases
45
+ 'fuck you', 'shit you', 'damn you', 'hell you',
46
+ 'you bastard', 'you bitch', 'you dick', 'you prick', 'you cunt', 'you slut', 'you whore',
47
+ 'you jerk', 'you idiot', 'you fool', 'you moron', 'you dumbass', 'you douche', 'you twat',
48
+ 'you bugger', 'you wanker', 'you tosser', 'you poophead', 'you scumbag', 'you slimeball',
49
+ 'you douchebag', 'you knobhead', 'you bozo', 'you twit', 'you dope', 'you numskull',
50
+ 'you halfwit', 'you nincompoop', 'you blockhead', 'you dimwit', 'you nitwit', 'you simpleton',
51
+ 'you dunce', 'you buffoon', 'you doofus', 'you clod', 'you goober', 'you jerkface',
52
+ 'you schmuck', 'you scoundrel', 'you miscreant', 'you rat', 'you puke', 'you vomit',
53
+ 'you dung', 'you ass', 'you tits', 'you pussy', 'you cock', 'you fuckface', 'you assface',
54
+ 'you dickhead', 'you cockhead', 'you shithead', 'you twatface', 'you knobhead', 'you doucheface',
55
+ 'you loser', 'you bastardface', 'you motherfucker', 'you shitbag', 'you cocksucker',
56
+ 'you jackass', 'you wankerface', 'you tosserface', 'you arsehole', 'you asshole', 'you freak', 'you nut',
57
+ 'you scum', 'you creep', 'you brat', 'you dweeb', 'you goon', 'you pukehead', 'you shitstain',
58
+ 'you assholeface', 'you prickface', 'you dumbfuck', 'you fucknut', 'you twatwaffle',
59
+ 'you shitbagger', 'you dickweed', 'you cumdump', 'you asswipe', 'you cockwomble',
60
+ 'you bollocks', 'you wazzock', 'you pillock', 'you plonker', 'you div', 'you bellend',
61
+ 'you twitbrain', 'you motherfucking idiot', 'fuckig stupid',
62
+ # Tagalog phrases
63
+ 'walang hiya', 'sira ulo', 'walang kwenta', 'walang silbe',
64
+ 'putang ina', 'putang ina ka', 'putang ina mo',
65
+ 'gago ka', 'gago mo', 'gago-gago', 'gago-gago ka', 'gago-gago mo', 'gagoan ka', 'gagoan mo',
66
+ 'tanga ka', 'tanga mo', 'tanga-tanga', 'tanga-tanga ka', 'tanga-tanga mo', 'tangain ka', 'tangain mo', 'tanga-in ka', 'tanga-in mo',
67
+ 'bobo ka', 'bobo mo', 'bobo-bobo', 'bobo-bobo ka', 'bobo-bobo mo', 'bobo-in ka', 'bobo-in mo',
68
+ 'ulol ka', 'ulol mo', 'ulol-ulol', 'ulol-ulol ka', 'ulol-ulol mo', 'ulolan ka', 'ulolan mo', 'ulol-in ka', 'ulol-in mo',
69
+ 'tarantado ka', 'tarantado mo', 'tarantado-tarantado', 'tarantado-tarantado ka', 'tarantado-tarantado mo',
70
+ 'tarantadoin ka', 'tarantadoin mo', 'tarantado-in ka', 'tarantado-in mo', 'tarantadohin ka', 'tarantadohin mo',
71
+ 'bastos ka', 'bastos mo', 'bastusin ka', 'bastusin mo',
72
+ 'maldita ka', 'maldita mo', 'malditahin ka', 'malditahin mo',
73
+ 'loko ka', 'loko mo', 'loko-loko', 'loko-loko ka', 'loko-loko mo',
74
+ 'asar ka', 'asar mo', 'asarin ka', 'asarin mo',
75
+ 'pekpek ka', 'pekpek mo', 'pekpekin ka', 'pekpekin mo',
76
+ 'burat ka', 'burat mo', 'buratin ka', 'buratin mo',
77
+ 'kantot ka', 'kantot mo', 'kantotin ka', 'kantotin mo',
78
+ 'puke ka', 'puke mo', 'pukein ka', 'pukein mo',
79
+ 'bading ka', 'bading mo',
80
+ 'bakla ka', 'bakla mo',
81
+ 'unggoy ka', 'unggoy mo'
82
+ }
83
+
84
  # Simple conversation patterns
85
  self.greeting_patterns = [
86
  r'\b(hi|hello|hey|good morning|good afternoon|good evening)\b',
 
132
  return True
133
  return False
134
 
135
+ def contains_profanity(self, message: str) -> bool:
136
+ """Check if the message contains any profanity"""
137
+ # Normalize message: convert to lowercase
138
+ message_lower = message.lower()
139
+
140
+ # First, check for bad phrases (multi-word profanity like "walang hiya", "sira ulo", "gago-gago")
141
+ for phrase in self.bad_phrases:
142
+ # Replace hyphens with spaces for better matching (handles "gago-gago" as "gago gago")
143
+ phrase_normalized = phrase.replace('-', ' ')
144
+ # Remove punctuation but keep spaces, normalize whitespace
145
+ phrase_clean = re.sub(r'[^\w\s]', '', phrase_normalized)
146
+ phrase_clean = re.sub(r'\s+', ' ', phrase_clean).strip()
147
+
148
+ # Normalize message similarly - replace hyphens with spaces
149
+ message_normalized = message_lower.replace('-', ' ')
150
+ message_clean_phrase = re.sub(r'[^\w\s]', '', message_normalized)
151
+ message_clean_phrase = re.sub(r'\s+', ' ', message_clean_phrase).strip()
152
+
153
+ # Check if phrase appears in message (with flexible spacing)
154
+ # Split phrase into words and create pattern that matches with any whitespace
155
+ phrase_words = phrase_clean.split()
156
+ if len(phrase_words) > 0:
157
+ # Create pattern that matches words with one or more spaces between them
158
+ # Using word boundaries to ensure whole words are matched
159
+ phrase_pattern = r'\b' + r'\s+'.join(re.escape(word) for word in phrase_words) + r'\b'
160
+ if re.search(phrase_pattern, message_clean_phrase, re.IGNORECASE):
161
+ return True
162
+
163
+ # Normalize common obfuscation characters
164
+ # Replace common character substitutions (numbers/symbols) with letters
165
+ obfuscation_map = {
166
+ '0': 'o', '1': 'i', '3': 'e', '4': 'a', '5': 's',
167
+ '7': 't', '@': 'a', '!': 'i', '$': 's', '&': 'a'
168
+ }
169
+
170
+ # Create a normalized version for checking
171
+ normalized = message_lower
172
+ for char, replacement in obfuscation_map.items():
173
+ normalized = normalized.replace(char, replacement)
174
+
175
+ # Replace hyphens with spaces to handle hyphenated words like "gago-gago"
176
+ normalized = normalized.replace('-', ' ')
177
+
178
+ # Remove all non-word characters (except spaces) for word boundary checking
179
+ message_clean = re.sub(r'[^\w\s]', '', normalized)
180
+ # Normalize multiple spaces to single space
181
+ message_clean = re.sub(r'\s+', ' ', message_clean).strip()
182
+ words = message_clean.split()
183
+
184
+ # Check for exact word matches in cleaned message
185
+ for word in words:
186
+ if word in self.bad_words:
187
+ return True
188
+
189
+ # Check for words that start with bad words (handles variations like "fucking" from "fuck")
190
+ # Also check the original message for word boundaries
191
+ for bad_word in self.bad_words:
192
+ # Pattern 1: Word boundary followed by bad word (handles "fuck", "fucking", etc.)
193
+ pattern1 = r'\b' + re.escape(bad_word) + r'\w*'
194
+ if re.search(pattern1, normalized):
195
+ return True
196
+
197
+ # Pattern 2: Check in cleaned message (handles words with punctuation removed)
198
+ if bad_word in message_clean:
199
+ # Make sure it's a whole word, not part of another word
200
+ pattern2 = r'\b' + re.escape(bad_word) + r'\b'
201
+ if re.search(pattern2, message_clean):
202
+ return True
203
+
204
+ return False
205
+
206
+ def get_profanity_warning(self) -> str:
207
+ """Get a polite response when profanity is detected"""
208
+ responses = [
209
+ "I understand you might be frustrated, but please keep our conversation respectful. I'm here to help you with any questions or concerns you might have.",
210
+ "I appreciate your message, but let's keep our conversation friendly and professional. How can I assist you today?",
211
+ "I'm here to help, but I'd prefer we keep our conversation appropriate. Is there something specific you'd like to ask me?",
212
+ "Let's maintain a respectful conversation. I'm happy to help you with any questions or information you need."
213
+ ]
214
+ import random
215
+ return random.choice(responses)
216
+
217
  def get_greeting_response(self) -> str:
218
  """Generate a greeting response"""
219
  responses = [
 
847
  if not message.strip():
848
  return "Please enter a message so I can help you!"
849
 
850
+ # Check for profanity first
851
+ if self.contains_profanity(message):
852
+ response = self.get_profanity_warning()
853
+ # Store conversation history (but don't process the message)
854
+ self.conversation_history.append(("user", "[Filtered]"))
855
+ self.conversation_history.append(("bot", response))
856
+ return response
857
+
858
  # Store conversation history
859
  self.conversation_history.append(("user", message))
860