Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from keras.models import load_model | |
| import nltk | |
| import re | |
| from nltk.tokenize import TweetTokenizer | |
| from tensorflow.keras.preprocessing.text import Tokenizer | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| import subprocess | |
| import numpy as np | |
| # Download NLTK stopwords if not already downloaded | |
| try: | |
| nltk.data.find('corpora/stopwords') | |
| except LookupError: | |
| nltk.download('stopwords') | |
| # Additional imports | |
| from nltk.corpus import stopwords | |
| # Download NLTK punkt tokenizer if not already downloaded | |
| try: | |
| nltk.data.find('tokenizers/punkt/PY3/english.pickle') | |
| except LookupError: | |
| nltk.download('punkt') | |
| # Additional imports | |
| from nltk.tokenize import word_tokenize | |
| # Load the LSTM model | |
| model_path = "./my_model.h5" # Set your model path here | |
| def load_lstm_model(model_path): | |
| return load_model(model_path) | |
| def clean_text(text): | |
| # Remove stopwords | |
| stop_words = set(stopwords.words('english')) | |
| words = word_tokenize(text) | |
| filtered_words = [word for word in words if word not in stop_words] | |
| # Remove Twitter usernames | |
| text = re.sub(r'@\w+', '', ' '.join(filtered_words)) | |
| # Remove URLs | |
| text = re.sub(r'http\S+', '', text) | |
| # Tokenize using TweetTokenizer | |
| tokenizer = TweetTokenizer(preserve_case=True) | |
| text = tokenizer.tokenize(text) | |
| # Remove hashtag symbols | |
| text = [word.replace('#', '') for word in text] | |
| # Remove short words | |
| text = ' '.join([word.lower() for word in text if len(word) > 2]) | |
| # Remove digits | |
| text = re.sub(r'\d+', '', text) | |
| # Remove non-alphanumeric characters | |
| text = re.sub(r'[^a-zA-Z\s]', '', text) | |
| return text | |
| def preprocess_text(text): | |
| # Clean the text | |
| cleaned_text = clean_text(text) | |
| # Tokenize and pad sequences | |
| token = Tokenizer() | |
| token.fit_on_texts([cleaned_text]) | |
| text_sequences = token.texts_to_sequences([cleaned_text]) | |
| padded_sequences = pad_sequences(text_sequences, maxlen=100) | |
| return padded_sequences | |
| # Function to predict hate speech | |
| def predict_hate_speech(text, lstm_model): | |
| # Preprocess the text | |
| padded_sequences = preprocess_text(text) | |
| prediction = lstm_model.predict(padded_sequences) | |
| return prediction | |
| # Main function to run the Streamlit app | |
| def main(): | |
| # Set up Streamlit UI | |
| st.title("Hate Speech Detection") | |
| st.write("Enter text below to detect hate speech:") | |
| input_text = st.text_area("Input Text", "") | |
| if st.button("Detect Hate Speech"): | |
| if input_text: | |
| # Load the model | |
| lstm_model = load_lstm_model(model_path) | |
| # Predict hate speech | |
| prediction = predict_hate_speech(input_text, lstm_model) | |
| # Convert the list to a numpy array | |
| arr = np.array(prediction[0]) | |
| max_index = np.argmax(arr) | |
| if max_index == 1: | |
| #negative | |
| st.error("Hate Speech Detected") | |
| else: | |
| st.success("No Hate Speech Detected") | |
| else: | |
| st.warning("Please enter some text") | |
| # Run the app | |
| if __name__ == "__main__": | |
| main() | |