import streamlit as st from transformers import AutoTokenizer, AutoModelForSequenceClassification import whisper import tempfile import os import torch import sqlite3 import bcrypt from moviepy.editor import VideoFileClip import subprocess import imageio_ffmpeg # ------------------------------- DB Setup ------------------------------- conn = sqlite3.connect('users.db', check_same_thread=False) cursor = conn.cursor() cursor.execute('''CREATE TABLE IF NOT EXISTS users ( username TEXT PRIMARY KEY, password TEXT NOT NULL )''') conn.commit() # ------------------------------- Auth Helpers ------------------------------- def hash_password(password): return bcrypt.hashpw(password.encode(), bcrypt.gensalt()) def verify_password(password, hashed): return bcrypt.checkpw(password.encode(), hashed) def add_user(username, password): hashed_pwd = hash_password(password) try: cursor.execute("INSERT INTO users (username, password) VALUES (?, ?)", (username, hashed_pwd)) conn.commit() return True except: return False def authenticate_user(username, password): cursor.execute("SELECT password FROM users WHERE username = ?", (username,)) result = cursor.fetchone() if result and verify_password(password, result[0]): return True return False # ------------------------------- Login / Signup UI ------------------------------- def login_signup_page(): st.set_page_config(page_title="Login | Hate Speech Classifier", layout="centered") if 'page' not in st.session_state: st.session_state.page = 'login' st.markdown('
Please log in or create a new account to continue.
', unsafe_allow_html=True) if st.session_state.page == 'login': username = st.text_input("👤 Username") password = st.text_input("🔑 Password", type="password") if st.button("Login"): if authenticate_user(username, password): st.session_state.logged_in = True st.session_state.username = username st.session_state.page = "overview" st.success("Login successful!") st.rerun() else: st.error("Invalid credentials") if st.button("New user? Create an account"): st.session_state.page = 'register' elif st.session_state.page == 'register': new_user = st.text_input("👤 New Username") new_pass = st.text_input("🔑 New Password", type="password") if st.button("Create Account"): if add_user(new_user, new_pass): st.success("Account created! You can now log in.") else: st.error("Username already exists!") if st.button("Already have an account? Login"): st.session_state.page = 'login' # ------------------------------- Access Control ------------------------------- if "logged_in" not in st.session_state: st.session_state.logged_in = False if not st.session_state.logged_in: login_signup_page() st.stop() # ------------------------------- FFmpeg Fix ------------------------------- ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe() def custom_run(cmd, *args, **kwargs): if cmd[0] == "ffmpeg": cmd[0] = ffmpeg_path return subprocess.run(cmd, *args, **kwargs) import whisper.audio whisper.audio.run = custom_run # ------------------------------- Load Models ------------------------------- @st.cache_resource def load_whisper_model(): return whisper.load_model("tiny") # smaller model for less storage @st.cache_resource def load_bert_model(): model_name = "Hate-speech-CNERG/bert-base-uncased-hatexplain" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) model.eval() return tokenizer, model whisper_model = load_whisper_model() tokenizer, classifier_model = load_bert_model() # ------------------------------- Classifier ------------------------------- HATE_KEYWORDS = ["ugly", "stupid", "idiot", "hate", "kill", "trash","fuck you","bitch"] def classify_text(text): if any(word in text.lower() for word in HATE_KEYWORDS): return "Hate Speech", 1.0 inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): outputs = classifier_model(**inputs) probs = torch.nn.functional.softmax(outputs.logits, dim=1) confidence, pred = torch.max(probs, dim=1) label = "Hate Speech" if pred.item() == 1 else "Not Hate Speech" return label, confidence.item() def show_result(label, score): st.markdown("### 🔍 Prediction Result:") score_percent = f"{score * 100:.2f}%" if label == "Hate Speech": st.error(f"{label} ({score_percent} confident)") else: st.success(f"{label} ({score_percent} confident)") # ------------------------------- Sidebar ------------------------------- with st.sidebar: st.title("📋 Navigation") if st.button("📘 Project Overview"): st.session_state.page = "overview" if st.button("🎙️ Hate Speech Detector"): st.session_state.page = "detector" if 'page' not in st.session_state: st.session_state.page = "overview" # ------------------------------- Main Page ------------------------------- page = st.session_state.get("page", "overview") if page == "overview": st.title("🗣️ Smart Hate Speech Classifier Using BERT & Whisper") st.markdown(""" This AI-based project detects hate speech in: - ✍️ Text Input - 🔊 Audio Files - 🎥 Video Uploads ### 🔧 Models Used: - `OpenAI Whisper` for Speech-to-Text - `HateXplain BERT` for Hate Speech Classification """) elif page == "detector": st.title("🎙️ Hate Speech Detection") input_mode = st.radio("Choose Input Type", ["Text", "Audio", "Video Upload"]) if input_mode == "Text": text_input = st.text_area("📝 Enter your message:") if st.button("Classify Text"): if text_input.strip(): label, score = classify_text(text_input) show_result(label, score) else: st.warning("⚠️ Please enter some text.") elif input_mode == "Audio": audio_file = st.file_uploader("📤 Upload Audio File:", type=["wav", "mp3"]) if audio_file: with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: temp_audio.write(audio_file.read()) audio_path = temp_audio.name st.audio(audio_path) result = whisper_model.transcribe(audio_path) transcribed = result["text"] st.success("📝 Transcribed Text:") st.info(transcribed) label, score = classify_text(transcribed) show_result(label, score) os.remove(audio_path) # remove temp file to save space elif input_mode == "Video Upload": video_file = st.file_uploader("📤 Upload Video File:", type=["mp4", "mov", "avi"]) if video_file: with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video: temp_video.write(video_file.read()) video_path = temp_video.name st.video(video_path) clip = VideoFileClip(video_path) audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name clip.audio.write_audiofile(audio_path) result = whisper_model.transcribe(audio_path) transcribed = result["text"] st.success("📝 Transcribed Text:") st.info(transcribed) label, score = classify_text(transcribed) show_result(label, score) os.remove(video_path) os.remove(audio_path) st.markdown("---") st.caption("Built with ❤️ using Streamlit, Whisper, and BERT.")