Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer | |
| # summary function - test for single gradio function interfrace | |
| def bulk_function(filename): | |
| # Create class for data preparation | |
| class SimpleDataset: | |
| def __init__(self, tokenized_texts): | |
| self.tokenized_texts = tokenized_texts | |
| def __len__(self): | |
| return len(self.tokenized_texts["input_ids"]) | |
| def __getitem__(self, idx): | |
| return {k: v[idx] for k, v in self.tokenized_texts.items()} | |
| # load tokenizer and model, create trainer | |
| model_name = "j-hartmann/emotion-english-distilroberta-base" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| trainer = Trainer(model=model) | |
| print(filename, type(filename)) | |
| print(filename.name) | |
| # check type of input file | |
| if filename.name.split(".")[1] == "csv": | |
| print("entered") | |
| # read file, drop index if exists | |
| df_input = pd.read_csv(filename.name, index_col=False) | |
| if df_input.columns[0] == "Unnamed: 0": | |
| df_input = df_input.drop("Unnamed: 0", axis=1) | |
| elif filename.name.split(".")[1] == "xlsx": | |
| df_input = pd.read_excel(filename.name, index_col=False) | |
| # handle Unnamed | |
| if df_input.columns[0] == "Unnamed: 0": | |
| df_input = df_input.drop("Unnamed: 0", axis=1) | |
| else: | |
| return | |
| # read csv | |
| # even if index given, drop it | |
| #df_input = pd.read_csv(filename.name, index_col=False) | |
| #print("df_input", df_input) | |
| # expect csv format to be in: | |
| # 1: ID | |
| # 2: Texts | |
| # no index | |
| # store ids in ordered list | |
| ids = df_input[df_input.columns[0]].to_list() | |
| # store sentences in ordered list | |
| # expects sentences to be in second col | |
| # of csv with two cols | |
| lines_s = df_input[df_input.columns[1]].to_list() | |
| # Tokenize texts and create prediction data set | |
| tokenized_texts = tokenizer(lines_s,truncation=True,padding=True) | |
| pred_dataset = SimpleDataset(tokenized_texts) | |
| # Run predictions -> predict whole df | |
| predictions = trainer.predict(pred_dataset) | |
| # Transform predictions to labels | |
| preds = predictions.predictions.argmax(-1) | |
| labels = pd.Series(preds).map(model.config.id2label) | |
| scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1) | |
| # round scores | |
| scores_rounded = [round(score, 3) for score in scores] | |
| # scores raw | |
| temp = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)) | |
| # container | |
| anger = [] | |
| disgust = [] | |
| fear = [] | |
| joy = [] | |
| neutral = [] | |
| sadness = [] | |
| surprise = [] | |
| # extract scores (as many entries as exist in pred_texts) | |
| for i in range(len(lines_s)): | |
| anger.append(round(temp[i][0], 3)) | |
| disgust.append(round(temp[i][1], 3)) | |
| fear.append(round(temp[i][2], 3)) | |
| joy.append(round(temp[i][3], 3)) | |
| neutral.append(round(temp[i][4], 3)) | |
| sadness.append(round(temp[i][5], 3)) | |
| surprise.append(round(temp[i][6], 3)) | |
| # define df | |
| df = pd.DataFrame(list(zip(ids,lines_s,labels,scores_rounded, anger, disgust, fear, joy, neutral, sadness, surprise)), columns=[df_input.columns[0], df_input.columns[1],'max_label','max_score', 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']) | |
| print(df) | |
| # save results to csv | |
| YOUR_FILENAME = filename.name.split(".")[0] + "_emotion_predictions" + ".csv" # name your output file | |
| df.to_csv(YOUR_FILENAME, index=False) | |
| # return dataframe for space output | |
| return YOUR_FILENAME | |
| gr.Interface(bulk_function, inputs=[gr.inputs.File(file_count="single", type="file", label="Upload file", optional=False),], | |
| outputs=[gr.outputs.File(label="Output file")], | |
| # examples=[["YOUR_FILENAME.csv"]], # computes, doesn't export df so far | |
| theme="huggingface", | |
| title="Emotion Classification from CSV", | |
| description="Upload csv file with 2 columns (in order): (a) ID column, (b) text column. The script returns a new file that includes both the ID column and text column together with the emotion predictions using this model: https://huggingface.co/j-hartmann/emotion-english-distilroberta-base.", | |
| allow_flagging=False, | |
| ).launch(debug=True) |