Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer | |
| # load tokenizer and model, create trainer | |
| model_name = "j-hartmann/emotion-english-distilroberta-base" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| trainer = Trainer(model=model) | |
| # summary function - test for single gradio function interfrace | |
| def bulk_function(filename): | |
| # Create class for data preparation | |
| class SimpleDataset: | |
| def __init__(self, tokenized_texts): | |
| self.tokenized_texts = tokenized_texts | |
| def __len__(self): | |
| return len(self.tokenized_texts["input_ids"]) | |
| def __getitem__(self, idx): | |
| return {k: v[idx] for k, v in self.tokenized_texts.items()} | |
| # read file lines | |
| with open(filename.name, "r") as f: | |
| lines = f.readlines() | |
| # expects unnamed:0 or index, col name -> strip both | |
| lines_s = [item.split("\n")[0].split(",")[-1] for item in lines][1:] | |
| # Tokenize texts and create prediction data set | |
| tokenized_texts = tokenizer(lines_s,truncation=True,padding=True) | |
| pred_dataset = SimpleDataset(tokenized_texts) | |
| # Run predictions -> predict whole df | |
| predictions = trainer.predict(pred_dataset) | |
| # Transform predictions to labels | |
| preds = predictions.predictions.argmax(-1) | |
| labels = pd.Series(preds).map(model.config.id2label) | |
| scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1) | |
| # scores raw | |
| temp = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)) | |
| # work in progress | |
| # container | |
| anger = [] | |
| disgust = [] | |
| fear = [] | |
| joy = [] | |
| neutral = [] | |
| sadness = [] | |
| surprise = [] | |
| # extract scores (as many entries as exist in pred_texts) | |
| for i in range(len(lines_s)): | |
| anger.append(temp[i][0]) | |
| disgust.append(temp[i][1]) | |
| fear.append(temp[i][2]) | |
| joy.append(temp[i][3]) | |
| neutral.append(temp[i][4]) | |
| sadness.append(temp[i][5]) | |
| surprise.append(temp[i][6]) | |
| # define df | |
| df = pd.DataFrame(list(zip(lines_s,preds,labels,scores, anger, disgust, fear, joy, neutral, sadness, surprise)), columns=['text','pred','label','score', 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']) | |
| # save results to csv | |
| YOUR_FILENAME = filename.name.split(".")[0] + "_emotion_predictions" + ".csv" # name your output file | |
| df.to_csv(YOUR_FILENAME) | |
| # return dataframe for space output | |
| return YOUR_FILENAME | |
| gr.Interface(bulk_function, [gr.inputs.File(file_count="single", type="file", label="csv", optional=False),],["file"], | |
| examples=[['emotion_examples.csv'],], | |
| ).launch(debug=True) |