Spaces:

markobinario
/

flaskbot

Running

App Files Files Community

markobinario commited on 3 days ago

Commit

2fc19f4

verified ·

1 Parent(s): 4b7f251

Update app.py

Browse files

Files changed (1) hide show

app.py +160 -0

app.py CHANGED Viewed

@@ -11,6 +11,76 @@ class AIChatbot:
         self.database_url = database_url
         self.conversation_history = []
         # Simple conversation patterns
         self.greeting_patterns = [
             r'\b(hi|hello|hey|good morning|good afternoon|good evening)\b',
@@ -62,6 +132,88 @@ class AIChatbot:
                 return True
         return False
     def get_greeting_response(self) -> str:
         """Generate a greeting response"""
         responses = [
@@ -695,6 +847,14 @@ Just type your question or start a conversation, and I'll do my best to help you
         if not message.strip():
             return "Please enter a message so I can help you!"
         # Store conversation history
         self.conversation_history.append(("user", message))

         self.database_url = database_url
         self.conversation_history = []
+        # Profanity filter - list of bad words to filter (English and Tagalog)
+        self.bad_words = {
+            # English bad words
+            'damn', 'hell', 'crap', 'suck', 'idiot', 'fool', 'jerk', 'loser', 'dumb', 'trash',
+            'butt', 'freak', 'nut', 'moron', 'dummy', 'bozo', 'twit', 'dope', 'dumbass',
+            'poophead', 'jerkoff', 'bugger', 'wanker', 'tosser', 'bastard', 'scum', 'slime',
+            'creep', 'brat', 'dweeb', 'goon', 'booby', 'puke', 'vomit', 'dung', 'sap',
+            'clutz', 'knob', 'prick', 'ass', 'shit', 'fuck', 'cock', 'tits', 'pussy',
+            'cunt', 'slut', 'bitch', 'whore', 'skank', 'stupid',
+            'asshole', 'dick', 'douche', 'scumbag', 'slimeball', 'douchebag', 'knobhead',
+            'numskull', 'halfwit', 'nincompoop', 'blockhead', 'dimwit', 'nitwit', 'simpleton',
+            'dunce', 'buffoon', 'doofus', 'clod', 'goober', 'jerkface', 'schmuck', 'scoundrel',
+            'miscreant', 'rat', 'git', 'wazzock', 'pillock', 'prat', 'plonker', 'div', 'bellend',
+            'tosserhead', 'twitbrain', 'sapbrain', 'knucklehead', 'dopey', 'boob', 'dingbat', 'oaf',
+            'ninnyhammer', 'chucklehead', 'saphead', 'pukehead', 'fuckface', 'assface', 'dickhead',
+            'cockhead', 'shithead', 'twatface', 'doucheface', 'bastardface', 'motherfucker', 'shitbag',
+            'cocksucker', 'jackass', 'wankerface', 'tosserface', 'arsehole', 'shitstain', 'assholeface',
+            'prickface', 'dumbfuck', 'fucknut', 'twatwaffle', 'shitbagger', 'dickweed', 'cumdump',
+            'asswipe', 'cockwomble', 'bollocks', 'twat', 'dick', 'fucking',
+            # Tagalog bad words
+            'gago', 'putangina', 'putang', 'hayop', 'lintik', 'walang', 'hiya', 'bobo', 'leche',
+            'punyeta', 'sira', 'ulo', 'bwisit', 'pakshet', 'tarantado', 'ulol', 'buwisit',
+            'hudas', 'kupal', 'shet', 'tae', 'tanga', 'tangina', 'bastos', 'maldita', 'loko',
+            'asar', 'pekpek', 'burat', 'kantot', 'puke', 'kantotin', 'tarantadoin', 'ulolan',
+            'bading', 'bakla', 'unggoy', 'asarin', 'bastusin', 'malditahin', 'buratin', 'pekpekin',
+            'pukein', 'tangain', 'gagoan', 'tarantadohin', 'ina'
+        }
+        # Bad phrases (multi-word profanity - English and Tagalog)
+        self.bad_phrases = {
+            # English phrases
+            'fuck you', 'shit you', 'damn you', 'hell you',
+            'you bastard', 'you bitch', 'you dick', 'you prick', 'you cunt', 'you slut', 'you whore',
+            'you jerk', 'you idiot', 'you fool', 'you moron', 'you dumbass', 'you douche', 'you twat',
+            'you bugger', 'you wanker', 'you tosser', 'you poophead', 'you scumbag', 'you slimeball',
+            'you douchebag', 'you knobhead', 'you bozo', 'you twit', 'you dope', 'you numskull',
+            'you halfwit', 'you nincompoop', 'you blockhead', 'you dimwit', 'you nitwit', 'you simpleton',
+            'you dunce', 'you buffoon', 'you doofus', 'you clod', 'you goober', 'you jerkface',
+            'you schmuck', 'you scoundrel', 'you miscreant', 'you rat', 'you puke', 'you vomit',
+            'you dung', 'you ass', 'you tits', 'you pussy', 'you cock', 'you fuckface', 'you assface',
+            'you dickhead', 'you cockhead', 'you shithead', 'you twatface', 'you knobhead', 'you doucheface',
+            'you loser', 'you bastardface', 'you motherfucker', 'you shitbag', 'you cocksucker',
+            'you jackass', 'you wankerface', 'you tosserface', 'you arsehole', 'you asshole', 'you freak', 'you nut',
+            'you scum', 'you creep', 'you brat', 'you dweeb', 'you goon', 'you pukehead', 'you shitstain',
+            'you assholeface', 'you prickface', 'you dumbfuck', 'you fucknut', 'you twatwaffle',
+            'you shitbagger', 'you dickweed', 'you cumdump', 'you asswipe', 'you cockwomble',
+            'you bollocks', 'you wazzock', 'you pillock', 'you plonker', 'you div', 'you bellend',
+            'you twitbrain', 'you motherfucking idiot', 'fuckig stupid',
+            # Tagalog phrases
+            'walang hiya', 'sira ulo', 'walang kwenta', 'walang silbe',
+            'putang ina', 'putang ina ka', 'putang ina mo',
+            'gago ka', 'gago mo', 'gago-gago', 'gago-gago ka', 'gago-gago mo', 'gagoan ka', 'gagoan mo',
+            'tanga ka', 'tanga mo', 'tanga-tanga', 'tanga-tanga ka', 'tanga-tanga mo', 'tangain ka', 'tangain mo', 'tanga-in ka', 'tanga-in mo',
+            'bobo ka', 'bobo mo', 'bobo-bobo', 'bobo-bobo ka', 'bobo-bobo mo', 'bobo-in ka', 'bobo-in mo',
+            'ulol ka', 'ulol mo', 'ulol-ulol', 'ulol-ulol ka', 'ulol-ulol mo', 'ulolan ka', 'ulolan mo', 'ulol-in ka', 'ulol-in mo',
+            'tarantado ka', 'tarantado mo', 'tarantado-tarantado', 'tarantado-tarantado ka', 'tarantado-tarantado mo',
+            'tarantadoin ka', 'tarantadoin mo', 'tarantado-in ka', 'tarantado-in mo', 'tarantadohin ka', 'tarantadohin mo',
+            'bastos ka', 'bastos mo', 'bastusin ka', 'bastusin mo',
+            'maldita ka', 'maldita mo', 'malditahin ka', 'malditahin mo',
+            'loko ka', 'loko mo', 'loko-loko', 'loko-loko ka', 'loko-loko mo',
+            'asar ka', 'asar mo', 'asarin ka', 'asarin mo',
+            'pekpek ka', 'pekpek mo', 'pekpekin ka', 'pekpekin mo',
+            'burat ka', 'burat mo', 'buratin ka', 'buratin mo',
+            'kantot ka', 'kantot mo', 'kantotin ka', 'kantotin mo',
+            'puke ka', 'puke mo', 'pukein ka', 'pukein mo',
+            'bading ka', 'bading mo',
+            'bakla ka', 'bakla mo',
+            'unggoy ka', 'unggoy mo'
+        }
         # Simple conversation patterns
         self.greeting_patterns = [
             r'\b(hi|hello|hey|good morning|good afternoon|good evening)\b',
                 return True
         return False
+    def contains_profanity(self, message: str) -> bool:
+        """Check if the message contains any profanity"""
+        # Normalize message: convert to lowercase
+        message_lower = message.lower()
+        # First, check for bad phrases (multi-word profanity like "walang hiya", "sira ulo", "gago-gago")
+        for phrase in self.bad_phrases:
+            # Replace hyphens with spaces for better matching (handles "gago-gago" as "gago gago")
+            phrase_normalized = phrase.replace('-', ' ')
+            # Remove punctuation but keep spaces, normalize whitespace
+            phrase_clean = re.sub(r'[^\w\s]', '', phrase_normalized)
+            phrase_clean = re.sub(r'\s+', ' ', phrase_clean).strip()
+            # Normalize message similarly - replace hyphens with spaces
+            message_normalized = message_lower.replace('-', ' ')
+            message_clean_phrase = re.sub(r'[^\w\s]', '', message_normalized)
+            message_clean_phrase = re.sub(r'\s+', ' ', message_clean_phrase).strip()
+            # Check if phrase appears in message (with flexible spacing)
+            # Split phrase into words and create pattern that matches with any whitespace
+            phrase_words = phrase_clean.split()
+            if len(phrase_words) > 0:
+                # Create pattern that matches words with one or more spaces between them
+                # Using word boundaries to ensure whole words are matched
+                phrase_pattern = r'\b' + r'\s+'.join(re.escape(word) for word in phrase_words) + r'\b'
+                if re.search(phrase_pattern, message_clean_phrase, re.IGNORECASE):
+                    return True
+        # Normalize common obfuscation characters
+        # Replace common character substitutions (numbers/symbols) with letters
+        obfuscation_map = {
+            '0': 'o', '1': 'i', '3': 'e', '4': 'a', '5': 's',
+            '7': 't', '@': 'a', '!': 'i', '$': 's', '&': 'a'
+        }
+        # Create a normalized version for checking
+        normalized = message_lower
+        for char, replacement in obfuscation_map.items():
+            normalized = normalized.replace(char, replacement)
+        # Replace hyphens with spaces to handle hyphenated words like "gago-gago"
+        normalized = normalized.replace('-', ' ')
+        # Remove all non-word characters (except spaces) for word boundary checking
+        message_clean = re.sub(r'[^\w\s]', '', normalized)
+        # Normalize multiple spaces to single space
+        message_clean = re.sub(r'\s+', ' ', message_clean).strip()
+        words = message_clean.split()
+        # Check for exact word matches in cleaned message
+        for word in words:
+            if word in self.bad_words:
+                return True
+        # Check for words that start with bad words (handles variations like "fucking" from "fuck")
+        # Also check the original message for word boundaries
+        for bad_word in self.bad_words:
+            # Pattern 1: Word boundary followed by bad word (handles "fuck", "fucking", etc.)
+            pattern1 = r'\b' + re.escape(bad_word) + r'\w*'
+            if re.search(pattern1, normalized):
+                return True
+            # Pattern 2: Check in cleaned message (handles words with punctuation removed)
+            if bad_word in message_clean:
+                # Make sure it's a whole word, not part of another word
+                pattern2 = r'\b' + re.escape(bad_word) + r'\b'
+                if re.search(pattern2, message_clean):
+                    return True
+        return False
+    def get_profanity_warning(self) -> str:
+        """Get a polite response when profanity is detected"""
+        responses = [
+            "I understand you might be frustrated, but please keep our conversation respectful. I'm here to help you with any questions or concerns you might have.",
+            "I appreciate your message, but let's keep our conversation friendly and professional. How can I assist you today?",
+            "I'm here to help, but I'd prefer we keep our conversation appropriate. Is there something specific you'd like to ask me?",
+            "Let's maintain a respectful conversation. I'm happy to help you with any questions or information you need."
+        ]
+        import random
+        return random.choice(responses)
     def get_greeting_response(self) -> str:
         """Generate a greeting response"""
         responses = [
         if not message.strip():
             return "Please enter a message so I can help you!"
+        # Check for profanity first
+        if self.contains_profanity(message):
+            response = self.get_profanity_warning()
+            # Store conversation history (but don't process the message)
+            self.conversation_history.append(("user", "[Filtered]"))
+            self.conversation_history.append(("bot", response))
+            return response
         # Store conversation history
         self.conversation_history.append(("user", message))