Spaces:

Bindumiryala
/

smart-hate-speech-classifier

Sleeping

App Files Files Community

Bindumiryala commited on Sep 29

Commit

9442257

verified ·

1 Parent(s): 5d0abc1

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -11

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import whisper
@@ -7,9 +6,9 @@ import os
 import torch
 import sqlite3
 import bcrypt
-import imageio_ffmpeg
-import subprocess
 from moviepy.editor import VideoFileClip
 # ------------------------------- DB Setup -------------------------------
 conn = sqlite3.connect('users.db', check_same_thread=False)
@@ -97,12 +96,13 @@ whisper.audio.run = custom_run
 # ------------------------------- Load Models -------------------------------
 @st.cache_resource
 def load_whisper_model():
-    return whisper.load_model("tiny")
 @st.cache_resource
 def load_bert_model():
-    tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
-    model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
     model.eval()
     return tokenizer, model
@@ -151,11 +151,11 @@ if page == "overview":
 This AI-based project detects hate speech in:
 - ✍️ Text Input
 - 🔊 Audio Files
-- 🎥 Video Uploads or URLs
 ### 🔧 Models Used:
 - `OpenAI Whisper` for Speech-to-Text
-- `Toxic-BERT` for Hate Speech Classification
     """)
 elif page == "detector":
@@ -184,6 +184,7 @@ elif page == "detector":
             st.info(transcribed)
             label, score = classify_text(transcribed)
             show_result(label, score)
     elif input_mode == "Video Upload":
         video_file = st.file_uploader("📤 Upload Video File:", type=["mp4", "mov", "avi"])
@@ -193,7 +194,7 @@ elif page == "detector":
                 video_path = temp_video.name
             st.video(video_path)
             clip = VideoFileClip(video_path)
-            audio_path = "temp_video_audio.wav"
             clip.audio.write_audiofile(audio_path)
             result = whisper_model.transcribe(audio_path)
             transcribed = result["text"]
@@ -201,7 +202,8 @@ elif page == "detector":
             st.info(transcribed)
             label, score = classify_text(transcribed)
             show_result(label, score)
 st.markdown("---")
-st.caption("Built with ❤️ using Streamlit, Whisper, and BERT.")

 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import whisper
 import torch
 import sqlite3
 import bcrypt
 from moviepy.editor import VideoFileClip
+import subprocess
+import imageio_ffmpeg
 # ------------------------------- DB Setup -------------------------------
 conn = sqlite3.connect('users.db', check_same_thread=False)
 # ------------------------------- Load Models -------------------------------
 @st.cache_resource
 def load_whisper_model():
+    return whisper.load_model("tiny")  # smaller model for less storage
 @st.cache_resource
 def load_bert_model():
+    model_name = "Hate-speech-CNERG/bert-base-uncased-hatexplain"
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSequenceClassification.from_pretrained(model_name)
     model.eval()
     return tokenizer, model
 This AI-based project detects hate speech in:
 - ✍️ Text Input
 - 🔊 Audio Files
+- 🎥 Video Uploads
 ### 🔧 Models Used:
 - `OpenAI Whisper` for Speech-to-Text
+- `HateXplain BERT` for Hate Speech Classification
     """)
 elif page == "detector":
             st.info(transcribed)
             label, score = classify_text(transcribed)
             show_result(label, score)
+            os.remove(audio_path)  # remove temp file to save space
     elif input_mode == "Video Upload":
         video_file = st.file_uploader("📤 Upload Video File:", type=["mp4", "mov", "avi"])
                 video_path = temp_video.name
             st.video(video_path)
             clip = VideoFileClip(video_path)
+            audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
             clip.audio.write_audiofile(audio_path)
             result = whisper_model.transcribe(audio_path)
             transcribed = result["text"]
             st.info(transcribed)
             label, score = classify_text(transcribed)
             show_result(label, score)
+            os.remove(video_path)
+            os.remove(audio_path)
 st.markdown("---")
+st.caption("Built with ❤️ using Streamlit, Whisper, and BERT.")