Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,6 +11,76 @@ class AIChatbot:
|
|
| 11 |
self.database_url = database_url
|
| 12 |
self.conversation_history = []
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# Simple conversation patterns
|
| 15 |
self.greeting_patterns = [
|
| 16 |
r'\b(hi|hello|hey|good morning|good afternoon|good evening)\b',
|
|
@@ -62,6 +132,88 @@ class AIChatbot:
|
|
| 62 |
return True
|
| 63 |
return False
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
def get_greeting_response(self) -> str:
|
| 66 |
"""Generate a greeting response"""
|
| 67 |
responses = [
|
|
@@ -695,6 +847,14 @@ Just type your question or start a conversation, and I'll do my best to help you
|
|
| 695 |
if not message.strip():
|
| 696 |
return "Please enter a message so I can help you!"
|
| 697 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 698 |
# Store conversation history
|
| 699 |
self.conversation_history.append(("user", message))
|
| 700 |
|
|
|
|
| 11 |
self.database_url = database_url
|
| 12 |
self.conversation_history = []
|
| 13 |
|
| 14 |
+
# Profanity filter - list of bad words to filter (English and Tagalog)
|
| 15 |
+
self.bad_words = {
|
| 16 |
+
# English bad words
|
| 17 |
+
'damn', 'hell', 'crap', 'suck', 'idiot', 'fool', 'jerk', 'loser', 'dumb', 'trash',
|
| 18 |
+
'butt', 'freak', 'nut', 'moron', 'dummy', 'bozo', 'twit', 'dope', 'dumbass',
|
| 19 |
+
'poophead', 'jerkoff', 'bugger', 'wanker', 'tosser', 'bastard', 'scum', 'slime',
|
| 20 |
+
'creep', 'brat', 'dweeb', 'goon', 'booby', 'puke', 'vomit', 'dung', 'sap',
|
| 21 |
+
'clutz', 'knob', 'prick', 'ass', 'shit', 'fuck', 'cock', 'tits', 'pussy',
|
| 22 |
+
'cunt', 'slut', 'bitch', 'whore', 'skank', 'stupid',
|
| 23 |
+
'asshole', 'dick', 'douche', 'scumbag', 'slimeball', 'douchebag', 'knobhead',
|
| 24 |
+
'numskull', 'halfwit', 'nincompoop', 'blockhead', 'dimwit', 'nitwit', 'simpleton',
|
| 25 |
+
'dunce', 'buffoon', 'doofus', 'clod', 'goober', 'jerkface', 'schmuck', 'scoundrel',
|
| 26 |
+
'miscreant', 'rat', 'git', 'wazzock', 'pillock', 'prat', 'plonker', 'div', 'bellend',
|
| 27 |
+
'tosserhead', 'twitbrain', 'sapbrain', 'knucklehead', 'dopey', 'boob', 'dingbat', 'oaf',
|
| 28 |
+
'ninnyhammer', 'chucklehead', 'saphead', 'pukehead', 'fuckface', 'assface', 'dickhead',
|
| 29 |
+
'cockhead', 'shithead', 'twatface', 'doucheface', 'bastardface', 'motherfucker', 'shitbag',
|
| 30 |
+
'cocksucker', 'jackass', 'wankerface', 'tosserface', 'arsehole', 'shitstain', 'assholeface',
|
| 31 |
+
'prickface', 'dumbfuck', 'fucknut', 'twatwaffle', 'shitbagger', 'dickweed', 'cumdump',
|
| 32 |
+
'asswipe', 'cockwomble', 'bollocks', 'twat', 'dick', 'fucking',
|
| 33 |
+
# Tagalog bad words
|
| 34 |
+
'gago', 'putangina', 'putang', 'hayop', 'lintik', 'walang', 'hiya', 'bobo', 'leche',
|
| 35 |
+
'punyeta', 'sira', 'ulo', 'bwisit', 'pakshet', 'tarantado', 'ulol', 'buwisit',
|
| 36 |
+
'hudas', 'kupal', 'shet', 'tae', 'tanga', 'tangina', 'bastos', 'maldita', 'loko',
|
| 37 |
+
'asar', 'pekpek', 'burat', 'kantot', 'puke', 'kantotin', 'tarantadoin', 'ulolan',
|
| 38 |
+
'bading', 'bakla', 'unggoy', 'asarin', 'bastusin', 'malditahin', 'buratin', 'pekpekin',
|
| 39 |
+
'pukein', 'tangain', 'gagoan', 'tarantadohin', 'ina'
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
# Bad phrases (multi-word profanity - English and Tagalog)
|
| 43 |
+
self.bad_phrases = {
|
| 44 |
+
# English phrases
|
| 45 |
+
'fuck you', 'shit you', 'damn you', 'hell you',
|
| 46 |
+
'you bastard', 'you bitch', 'you dick', 'you prick', 'you cunt', 'you slut', 'you whore',
|
| 47 |
+
'you jerk', 'you idiot', 'you fool', 'you moron', 'you dumbass', 'you douche', 'you twat',
|
| 48 |
+
'you bugger', 'you wanker', 'you tosser', 'you poophead', 'you scumbag', 'you slimeball',
|
| 49 |
+
'you douchebag', 'you knobhead', 'you bozo', 'you twit', 'you dope', 'you numskull',
|
| 50 |
+
'you halfwit', 'you nincompoop', 'you blockhead', 'you dimwit', 'you nitwit', 'you simpleton',
|
| 51 |
+
'you dunce', 'you buffoon', 'you doofus', 'you clod', 'you goober', 'you jerkface',
|
| 52 |
+
'you schmuck', 'you scoundrel', 'you miscreant', 'you rat', 'you puke', 'you vomit',
|
| 53 |
+
'you dung', 'you ass', 'you tits', 'you pussy', 'you cock', 'you fuckface', 'you assface',
|
| 54 |
+
'you dickhead', 'you cockhead', 'you shithead', 'you twatface', 'you knobhead', 'you doucheface',
|
| 55 |
+
'you loser', 'you bastardface', 'you motherfucker', 'you shitbag', 'you cocksucker',
|
| 56 |
+
'you jackass', 'you wankerface', 'you tosserface', 'you arsehole', 'you asshole', 'you freak', 'you nut',
|
| 57 |
+
'you scum', 'you creep', 'you brat', 'you dweeb', 'you goon', 'you pukehead', 'you shitstain',
|
| 58 |
+
'you assholeface', 'you prickface', 'you dumbfuck', 'you fucknut', 'you twatwaffle',
|
| 59 |
+
'you shitbagger', 'you dickweed', 'you cumdump', 'you asswipe', 'you cockwomble',
|
| 60 |
+
'you bollocks', 'you wazzock', 'you pillock', 'you plonker', 'you div', 'you bellend',
|
| 61 |
+
'you twitbrain', 'you motherfucking idiot', 'fuckig stupid',
|
| 62 |
+
# Tagalog phrases
|
| 63 |
+
'walang hiya', 'sira ulo', 'walang kwenta', 'walang silbe',
|
| 64 |
+
'putang ina', 'putang ina ka', 'putang ina mo',
|
| 65 |
+
'gago ka', 'gago mo', 'gago-gago', 'gago-gago ka', 'gago-gago mo', 'gagoan ka', 'gagoan mo',
|
| 66 |
+
'tanga ka', 'tanga mo', 'tanga-tanga', 'tanga-tanga ka', 'tanga-tanga mo', 'tangain ka', 'tangain mo', 'tanga-in ka', 'tanga-in mo',
|
| 67 |
+
'bobo ka', 'bobo mo', 'bobo-bobo', 'bobo-bobo ka', 'bobo-bobo mo', 'bobo-in ka', 'bobo-in mo',
|
| 68 |
+
'ulol ka', 'ulol mo', 'ulol-ulol', 'ulol-ulol ka', 'ulol-ulol mo', 'ulolan ka', 'ulolan mo', 'ulol-in ka', 'ulol-in mo',
|
| 69 |
+
'tarantado ka', 'tarantado mo', 'tarantado-tarantado', 'tarantado-tarantado ka', 'tarantado-tarantado mo',
|
| 70 |
+
'tarantadoin ka', 'tarantadoin mo', 'tarantado-in ka', 'tarantado-in mo', 'tarantadohin ka', 'tarantadohin mo',
|
| 71 |
+
'bastos ka', 'bastos mo', 'bastusin ka', 'bastusin mo',
|
| 72 |
+
'maldita ka', 'maldita mo', 'malditahin ka', 'malditahin mo',
|
| 73 |
+
'loko ka', 'loko mo', 'loko-loko', 'loko-loko ka', 'loko-loko mo',
|
| 74 |
+
'asar ka', 'asar mo', 'asarin ka', 'asarin mo',
|
| 75 |
+
'pekpek ka', 'pekpek mo', 'pekpekin ka', 'pekpekin mo',
|
| 76 |
+
'burat ka', 'burat mo', 'buratin ka', 'buratin mo',
|
| 77 |
+
'kantot ka', 'kantot mo', 'kantotin ka', 'kantotin mo',
|
| 78 |
+
'puke ka', 'puke mo', 'pukein ka', 'pukein mo',
|
| 79 |
+
'bading ka', 'bading mo',
|
| 80 |
+
'bakla ka', 'bakla mo',
|
| 81 |
+
'unggoy ka', 'unggoy mo'
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
# Simple conversation patterns
|
| 85 |
self.greeting_patterns = [
|
| 86 |
r'\b(hi|hello|hey|good morning|good afternoon|good evening)\b',
|
|
|
|
| 132 |
return True
|
| 133 |
return False
|
| 134 |
|
| 135 |
+
def contains_profanity(self, message: str) -> bool:
|
| 136 |
+
"""Check if the message contains any profanity"""
|
| 137 |
+
# Normalize message: convert to lowercase
|
| 138 |
+
message_lower = message.lower()
|
| 139 |
+
|
| 140 |
+
# First, check for bad phrases (multi-word profanity like "walang hiya", "sira ulo", "gago-gago")
|
| 141 |
+
for phrase in self.bad_phrases:
|
| 142 |
+
# Replace hyphens with spaces for better matching (handles "gago-gago" as "gago gago")
|
| 143 |
+
phrase_normalized = phrase.replace('-', ' ')
|
| 144 |
+
# Remove punctuation but keep spaces, normalize whitespace
|
| 145 |
+
phrase_clean = re.sub(r'[^\w\s]', '', phrase_normalized)
|
| 146 |
+
phrase_clean = re.sub(r'\s+', ' ', phrase_clean).strip()
|
| 147 |
+
|
| 148 |
+
# Normalize message similarly - replace hyphens with spaces
|
| 149 |
+
message_normalized = message_lower.replace('-', ' ')
|
| 150 |
+
message_clean_phrase = re.sub(r'[^\w\s]', '', message_normalized)
|
| 151 |
+
message_clean_phrase = re.sub(r'\s+', ' ', message_clean_phrase).strip()
|
| 152 |
+
|
| 153 |
+
# Check if phrase appears in message (with flexible spacing)
|
| 154 |
+
# Split phrase into words and create pattern that matches with any whitespace
|
| 155 |
+
phrase_words = phrase_clean.split()
|
| 156 |
+
if len(phrase_words) > 0:
|
| 157 |
+
# Create pattern that matches words with one or more spaces between them
|
| 158 |
+
# Using word boundaries to ensure whole words are matched
|
| 159 |
+
phrase_pattern = r'\b' + r'\s+'.join(re.escape(word) for word in phrase_words) + r'\b'
|
| 160 |
+
if re.search(phrase_pattern, message_clean_phrase, re.IGNORECASE):
|
| 161 |
+
return True
|
| 162 |
+
|
| 163 |
+
# Normalize common obfuscation characters
|
| 164 |
+
# Replace common character substitutions (numbers/symbols) with letters
|
| 165 |
+
obfuscation_map = {
|
| 166 |
+
'0': 'o', '1': 'i', '3': 'e', '4': 'a', '5': 's',
|
| 167 |
+
'7': 't', '@': 'a', '!': 'i', '$': 's', '&': 'a'
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
# Create a normalized version for checking
|
| 171 |
+
normalized = message_lower
|
| 172 |
+
for char, replacement in obfuscation_map.items():
|
| 173 |
+
normalized = normalized.replace(char, replacement)
|
| 174 |
+
|
| 175 |
+
# Replace hyphens with spaces to handle hyphenated words like "gago-gago"
|
| 176 |
+
normalized = normalized.replace('-', ' ')
|
| 177 |
+
|
| 178 |
+
# Remove all non-word characters (except spaces) for word boundary checking
|
| 179 |
+
message_clean = re.sub(r'[^\w\s]', '', normalized)
|
| 180 |
+
# Normalize multiple spaces to single space
|
| 181 |
+
message_clean = re.sub(r'\s+', ' ', message_clean).strip()
|
| 182 |
+
words = message_clean.split()
|
| 183 |
+
|
| 184 |
+
# Check for exact word matches in cleaned message
|
| 185 |
+
for word in words:
|
| 186 |
+
if word in self.bad_words:
|
| 187 |
+
return True
|
| 188 |
+
|
| 189 |
+
# Check for words that start with bad words (handles variations like "fucking" from "fuck")
|
| 190 |
+
# Also check the original message for word boundaries
|
| 191 |
+
for bad_word in self.bad_words:
|
| 192 |
+
# Pattern 1: Word boundary followed by bad word (handles "fuck", "fucking", etc.)
|
| 193 |
+
pattern1 = r'\b' + re.escape(bad_word) + r'\w*'
|
| 194 |
+
if re.search(pattern1, normalized):
|
| 195 |
+
return True
|
| 196 |
+
|
| 197 |
+
# Pattern 2: Check in cleaned message (handles words with punctuation removed)
|
| 198 |
+
if bad_word in message_clean:
|
| 199 |
+
# Make sure it's a whole word, not part of another word
|
| 200 |
+
pattern2 = r'\b' + re.escape(bad_word) + r'\b'
|
| 201 |
+
if re.search(pattern2, message_clean):
|
| 202 |
+
return True
|
| 203 |
+
|
| 204 |
+
return False
|
| 205 |
+
|
| 206 |
+
def get_profanity_warning(self) -> str:
|
| 207 |
+
"""Get a polite response when profanity is detected"""
|
| 208 |
+
responses = [
|
| 209 |
+
"I understand you might be frustrated, but please keep our conversation respectful. I'm here to help you with any questions or concerns you might have.",
|
| 210 |
+
"I appreciate your message, but let's keep our conversation friendly and professional. How can I assist you today?",
|
| 211 |
+
"I'm here to help, but I'd prefer we keep our conversation appropriate. Is there something specific you'd like to ask me?",
|
| 212 |
+
"Let's maintain a respectful conversation. I'm happy to help you with any questions or information you need."
|
| 213 |
+
]
|
| 214 |
+
import random
|
| 215 |
+
return random.choice(responses)
|
| 216 |
+
|
| 217 |
def get_greeting_response(self) -> str:
|
| 218 |
"""Generate a greeting response"""
|
| 219 |
responses = [
|
|
|
|
| 847 |
if not message.strip():
|
| 848 |
return "Please enter a message so I can help you!"
|
| 849 |
|
| 850 |
+
# Check for profanity first
|
| 851 |
+
if self.contains_profanity(message):
|
| 852 |
+
response = self.get_profanity_warning()
|
| 853 |
+
# Store conversation history (but don't process the message)
|
| 854 |
+
self.conversation_history.append(("user", "[Filtered]"))
|
| 855 |
+
self.conversation_history.append(("bot", response))
|
| 856 |
+
return response
|
| 857 |
+
|
| 858 |
# Store conversation history
|
| 859 |
self.conversation_history.append(("user", message))
|
| 860 |
|