|
|
import streamlit as st |
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
import whisper |
|
|
import tempfile |
|
|
import os |
|
|
import torch |
|
|
import sqlite3 |
|
|
import bcrypt |
|
|
from moviepy.editor import VideoFileClip |
|
|
import subprocess |
|
|
import imageio_ffmpeg |
|
|
|
|
|
|
|
|
conn = sqlite3.connect('users.db', check_same_thread=False) |
|
|
cursor = conn.cursor() |
|
|
cursor.execute('''CREATE TABLE IF NOT EXISTS users ( |
|
|
username TEXT PRIMARY KEY, |
|
|
password TEXT NOT NULL |
|
|
)''') |
|
|
conn.commit() |
|
|
|
|
|
|
|
|
def hash_password(password): |
|
|
return bcrypt.hashpw(password.encode(), bcrypt.gensalt()) |
|
|
|
|
|
def verify_password(password, hashed): |
|
|
return bcrypt.checkpw(password.encode(), hashed) |
|
|
|
|
|
def add_user(username, password): |
|
|
hashed_pwd = hash_password(password) |
|
|
try: |
|
|
cursor.execute("INSERT INTO users (username, password) VALUES (?, ?)", (username, hashed_pwd)) |
|
|
conn.commit() |
|
|
return True |
|
|
except: |
|
|
return False |
|
|
|
|
|
def authenticate_user(username, password): |
|
|
cursor.execute("SELECT password FROM users WHERE username = ?", (username,)) |
|
|
result = cursor.fetchone() |
|
|
if result and verify_password(password, result[0]): |
|
|
return True |
|
|
return False |
|
|
|
|
|
|
|
|
def login_signup_page(): |
|
|
st.set_page_config(page_title="Login | Hate Speech Classifier", layout="centered") |
|
|
if 'page' not in st.session_state: |
|
|
st.session_state.page = 'login' |
|
|
|
|
|
st.markdown('<h1 style="text-align:center; color:#005f73;">π£οΈ Smart Hate Speech Classifier</h1>', unsafe_allow_html=True) |
|
|
st.markdown('<p style="text-align:center; color:#0a9396;">Please log in or create a new account to continue.</p>', unsafe_allow_html=True) |
|
|
|
|
|
if st.session_state.page == 'login': |
|
|
username = st.text_input("π€ Username") |
|
|
password = st.text_input("π Password", type="password") |
|
|
if st.button("Login"): |
|
|
if authenticate_user(username, password): |
|
|
st.session_state.logged_in = True |
|
|
st.session_state.username = username |
|
|
st.session_state.page = "overview" |
|
|
st.success("Login successful!") |
|
|
st.rerun() |
|
|
else: |
|
|
st.error("Invalid credentials") |
|
|
if st.button("New user? Create an account"): |
|
|
st.session_state.page = 'register' |
|
|
|
|
|
elif st.session_state.page == 'register': |
|
|
new_user = st.text_input("π€ New Username") |
|
|
new_pass = st.text_input("π New Password", type="password") |
|
|
if st.button("Create Account"): |
|
|
if add_user(new_user, new_pass): |
|
|
st.success("Account created! You can now log in.") |
|
|
else: |
|
|
st.error("Username already exists!") |
|
|
if st.button("Already have an account? Login"): |
|
|
st.session_state.page = 'login' |
|
|
|
|
|
|
|
|
if "logged_in" not in st.session_state: |
|
|
st.session_state.logged_in = False |
|
|
if not st.session_state.logged_in: |
|
|
login_signup_page() |
|
|
st.stop() |
|
|
|
|
|
|
|
|
ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe() |
|
|
def custom_run(cmd, *args, **kwargs): |
|
|
if cmd[0] == "ffmpeg": |
|
|
cmd[0] = ffmpeg_path |
|
|
return subprocess.run(cmd, *args, **kwargs) |
|
|
import whisper.audio |
|
|
whisper.audio.run = custom_run |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_whisper_model(): |
|
|
return whisper.load_model("tiny") |
|
|
|
|
|
@st.cache_resource |
|
|
def load_bert_model(): |
|
|
model_name = "Hate-speech-CNERG/bert-base-uncased-hatexplain" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
|
model.eval() |
|
|
return tokenizer, model |
|
|
|
|
|
whisper_model = load_whisper_model() |
|
|
tokenizer, classifier_model = load_bert_model() |
|
|
|
|
|
|
|
|
HATE_KEYWORDS = ["ugly", "stupid", "idiot", "hate", "kill", "trash","fuck you","bitch"] |
|
|
|
|
|
def classify_text(text): |
|
|
if any(word in text.lower() for word in HATE_KEYWORDS): |
|
|
return "Hate Speech", 1.0 |
|
|
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) |
|
|
with torch.no_grad(): |
|
|
outputs = classifier_model(**inputs) |
|
|
probs = torch.nn.functional.softmax(outputs.logits, dim=1) |
|
|
confidence, pred = torch.max(probs, dim=1) |
|
|
label = "Hate Speech" if pred.item() == 1 else "Not Hate Speech" |
|
|
return label, confidence.item() |
|
|
|
|
|
def show_result(label, score): |
|
|
st.markdown("### π Prediction Result:") |
|
|
score_percent = f"{score * 100:.2f}%" |
|
|
if label == "Hate Speech": |
|
|
st.error(f"{label} ({score_percent} confident)") |
|
|
else: |
|
|
st.success(f"{label} ({score_percent} confident)") |
|
|
|
|
|
|
|
|
with st.sidebar: |
|
|
st.title("π Navigation") |
|
|
if st.button("π Project Overview"): |
|
|
st.session_state.page = "overview" |
|
|
if st.button("ποΈ Hate Speech Detector"): |
|
|
st.session_state.page = "detector" |
|
|
|
|
|
if 'page' not in st.session_state: |
|
|
st.session_state.page = "overview" |
|
|
|
|
|
|
|
|
page = st.session_state.get("page", "overview") |
|
|
|
|
|
if page == "overview": |
|
|
st.title("π£οΈ Smart Hate Speech Classifier Using BERT & Whisper") |
|
|
st.markdown(""" |
|
|
This AI-based project detects hate speech in: |
|
|
- βοΈ Text Input |
|
|
- π Audio Files |
|
|
- π₯ Video Uploads |
|
|
|
|
|
### π§ Models Used: |
|
|
- `OpenAI Whisper` for Speech-to-Text |
|
|
- `HateXplain BERT` for Hate Speech Classification |
|
|
""") |
|
|
|
|
|
elif page == "detector": |
|
|
st.title("ποΈ Hate Speech Detection") |
|
|
input_mode = st.radio("Choose Input Type", ["Text", "Audio", "Video Upload"]) |
|
|
|
|
|
if input_mode == "Text": |
|
|
text_input = st.text_area("π Enter your message:") |
|
|
if st.button("Classify Text"): |
|
|
if text_input.strip(): |
|
|
label, score = classify_text(text_input) |
|
|
show_result(label, score) |
|
|
else: |
|
|
st.warning("β οΈ Please enter some text.") |
|
|
|
|
|
elif input_mode == "Audio": |
|
|
audio_file = st.file_uploader("π€ Upload Audio File:", type=["wav", "mp3"]) |
|
|
if audio_file: |
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: |
|
|
temp_audio.write(audio_file.read()) |
|
|
audio_path = temp_audio.name |
|
|
st.audio(audio_path) |
|
|
result = whisper_model.transcribe(audio_path) |
|
|
transcribed = result["text"] |
|
|
st.success("π Transcribed Text:") |
|
|
st.info(transcribed) |
|
|
label, score = classify_text(transcribed) |
|
|
show_result(label, score) |
|
|
os.remove(audio_path) |
|
|
|
|
|
elif input_mode == "Video Upload": |
|
|
video_file = st.file_uploader("π€ Upload Video File:", type=["mp4", "mov", "avi"]) |
|
|
if video_file: |
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video: |
|
|
temp_video.write(video_file.read()) |
|
|
video_path = temp_video.name |
|
|
st.video(video_path) |
|
|
clip = VideoFileClip(video_path) |
|
|
audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name |
|
|
clip.audio.write_audiofile(audio_path) |
|
|
result = whisper_model.transcribe(audio_path) |
|
|
transcribed = result["text"] |
|
|
st.success("π Transcribed Text:") |
|
|
st.info(transcribed) |
|
|
label, score = classify_text(transcribed) |
|
|
show_result(label, score) |
|
|
os.remove(video_path) |
|
|
os.remove(audio_path) |
|
|
|
|
|
st.markdown("---") |
|
|
st.caption("Built with β€οΈ using Streamlit, Whisper, and BERT.") |
|
|
|