| import os | |
| import pandas as pd | |
| import tensorflow as tf | |
| import numpy as np | |
| from tensorflow.keras.layers import TextVectorization | |
| import gradio as gr | |
| from tensorflow.keras.layers import TextVectorization | |
| modelbaru = tf.keras.models.load_model('toxicity.h5') | |
| MAX_FEATURES = 200000 | |
| data = pd.read_csv(os.path.join('jigsaw-toxic-comment-classification-challenge', 'train.csv', 'train.csv')) | |
| x = data['comment_text'] | |
| y = data[data.columns[2:]].values | |
| vectorizer = TextVectorization(max_tokens=MAX_FEATURES, output_sequence_length=1800, output_mode='int') | |
| vectorizer.adapt(x.values) | |
| vectorizer('Yo Whats up')[:3] | |
| vectorized_text = vectorizer(x.values) | |
| vectorized_text | |
| input_str = vectorizer('yo i fuckin hate you') | |
| res = modelbaru.predict(np.expand_dims(input_str,0)) | |
| res > 0.5 | |
| data.columns[2:] | |
| data.columns[2:-1] | |
| def score_comment(comment): | |
| vectorized_comment = vectorizer([comment]) | |
| results = modelbaru.predict(vectorized_comment) | |
| text = '' | |
| for idx, col in enumerate(data.columns[2:-1]): | |
| text += '{}: {}\n'.format(col, results[0][idx]>0.5) | |
| return text | |
| interface = gr.Interface(fn=score_comment, inputs=gr.inputs.Textbox(lines=2, placeholder='Toxic Detector by: AezersX'), outputs='text') | |
| interface.launch() |