Bindumiryala commited on
Commit
9442257
Β·
verified Β·
1 Parent(s): 5d0abc1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -11
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import streamlit as st
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  import whisper
@@ -7,9 +6,9 @@ import os
7
  import torch
8
  import sqlite3
9
  import bcrypt
10
- import imageio_ffmpeg
11
- import subprocess
12
  from moviepy.editor import VideoFileClip
 
 
13
 
14
  # ------------------------------- DB Setup -------------------------------
15
  conn = sqlite3.connect('users.db', check_same_thread=False)
@@ -97,12 +96,13 @@ whisper.audio.run = custom_run
97
  # ------------------------------- Load Models -------------------------------
98
  @st.cache_resource
99
  def load_whisper_model():
100
- return whisper.load_model("tiny")
101
 
102
  @st.cache_resource
103
  def load_bert_model():
104
- tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
105
- model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
 
106
  model.eval()
107
  return tokenizer, model
108
 
@@ -151,11 +151,11 @@ if page == "overview":
151
  This AI-based project detects hate speech in:
152
  - ✍️ Text Input
153
  - πŸ”Š Audio Files
154
- - πŸŽ₯ Video Uploads or URLs
155
 
156
  ### πŸ”§ Models Used:
157
  - `OpenAI Whisper` for Speech-to-Text
158
- - `Toxic-BERT` for Hate Speech Classification
159
  """)
160
 
161
  elif page == "detector":
@@ -184,6 +184,7 @@ elif page == "detector":
184
  st.info(transcribed)
185
  label, score = classify_text(transcribed)
186
  show_result(label, score)
 
187
 
188
  elif input_mode == "Video Upload":
189
  video_file = st.file_uploader("πŸ“€ Upload Video File:", type=["mp4", "mov", "avi"])
@@ -193,7 +194,7 @@ elif page == "detector":
193
  video_path = temp_video.name
194
  st.video(video_path)
195
  clip = VideoFileClip(video_path)
196
- audio_path = "temp_video_audio.wav"
197
  clip.audio.write_audiofile(audio_path)
198
  result = whisper_model.transcribe(audio_path)
199
  transcribed = result["text"]
@@ -201,7 +202,8 @@ elif page == "detector":
201
  st.info(transcribed)
202
  label, score = classify_text(transcribed)
203
  show_result(label, score)
204
-
 
205
 
206
  st.markdown("---")
207
- st.caption("Built with ❀️ using Streamlit, Whisper, and BERT.")
 
 
1
  import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import whisper
 
6
  import torch
7
  import sqlite3
8
  import bcrypt
 
 
9
  from moviepy.editor import VideoFileClip
10
+ import subprocess
11
+ import imageio_ffmpeg
12
 
13
  # ------------------------------- DB Setup -------------------------------
14
  conn = sqlite3.connect('users.db', check_same_thread=False)
 
96
  # ------------------------------- Load Models -------------------------------
97
  @st.cache_resource
98
  def load_whisper_model():
99
+ return whisper.load_model("tiny") # smaller model for less storage
100
 
101
  @st.cache_resource
102
  def load_bert_model():
103
+ model_name = "Hate-speech-CNERG/bert-base-uncased-hatexplain"
104
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
105
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
106
  model.eval()
107
  return tokenizer, model
108
 
 
151
  This AI-based project detects hate speech in:
152
  - ✍️ Text Input
153
  - πŸ”Š Audio Files
154
+ - πŸŽ₯ Video Uploads
155
 
156
  ### πŸ”§ Models Used:
157
  - `OpenAI Whisper` for Speech-to-Text
158
+ - `HateXplain BERT` for Hate Speech Classification
159
  """)
160
 
161
  elif page == "detector":
 
184
  st.info(transcribed)
185
  label, score = classify_text(transcribed)
186
  show_result(label, score)
187
+ os.remove(audio_path) # remove temp file to save space
188
 
189
  elif input_mode == "Video Upload":
190
  video_file = st.file_uploader("πŸ“€ Upload Video File:", type=["mp4", "mov", "avi"])
 
194
  video_path = temp_video.name
195
  st.video(video_path)
196
  clip = VideoFileClip(video_path)
197
+ audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
198
  clip.audio.write_audiofile(audio_path)
199
  result = whisper_model.transcribe(audio_path)
200
  transcribed = result["text"]
 
202
  st.info(transcribed)
203
  label, score = classify_text(transcribed)
204
  show_result(label, score)
205
+ os.remove(video_path)
206
+ os.remove(audio_path)
207
 
208
  st.markdown("---")
209
+ st.caption("Built with ❀️ using Streamlit, Whisper, and BERT.")