Spaces:

AbstractQbit
/

imdb_classification

Sleeping

App Files Files Community

AbstractQbit commited on Sep 14, 2023

Commit

2058f83

1 Parent(s): f28d6ee

Add regression trained electra

Browse files

Files changed (2) hide show

app.py +15 -5
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import gradio as gr
 import pickle
@@ -9,7 +9,10 @@ sklearn_model = pickle.load(open('classic_pipeline.pickle', 'rb'))
 model_name = "AbstractQbit/electra_large_imdb_htsplice"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSequenceClassification.from_pretrained(model_name)
 def tokenize_with_splicing(text):
@@ -21,20 +24,27 @@ def tokenize_with_splicing(text):
         tokens['attention_mask'] = [1]*512
     return tokens
-def make_stars(prob):
     stars = round(1 + prob*9)
     return '★'*stars + '☆'*(10-stars)
 def run_models(review):
     prob_sklearn = float(sklearn_model.predict_proba([review])[0][1])
     label_sklearn = 'positive' if prob_sklearn > 0.5 else 'negative'
-    res = f"TF-IDF SVC thinks the review is {label_sklearn} ({100*prob_sklearn:.2f}% positive).\n{make_stars(prob_sklearn):s}\n\n"
     input = tokenize_with_splicing(review).convert_to_tensors('pt', True)
     output = torch.nn.functional.softmax(model(**input).logits, dim=1)
     prob_electra = float(output[0][1])
     label_electra = 'positive' if prob_electra > 0.5 else 'negative'
-    res += f"ELECTRA thinks the review is {label_electra} ({100*prob_electra:.2f}% positive).\n{make_stars(prob_electra):s}"
     return res

+from transformers import AutoTokenizer, ElectraForSequenceClassification
 import torch
 import gradio as gr
 import pickle
 model_name = "AbstractQbit/electra_large_imdb_htsplice"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = ElectraForSequenceClassification.from_pretrained(model_name)
+model_reg_name = "AbstractQbit/electra_large_imdb_regression_htsplice"
+model_reg = ElectraForSequenceClassification.from_pretrained(model_reg_name)
 def tokenize_with_splicing(text):
         tokens['attention_mask'] = [1]*512
     return tokens
+def make_stars_from_confidence(prob):
     stars = round(1 + prob*9)
     return '★'*stars + '☆'*(10-stars)
+def make_stars_from_rating(rating):
+    stars = round(float(torch.clamp(rating, 1, 10)))
+    return '★'*stars + '☆'*(10-stars)
 def run_models(review):
     prob_sklearn = float(sklearn_model.predict_proba([review])[0][1])
     label_sklearn = 'positive' if prob_sklearn > 0.5 else 'negative'
+    res = f"TF-IDF SVC trained with polarity classification thinks the review is {label_sklearn} ({100*prob_sklearn:.2f}% positive confidence).\n{make_stars_from_confidence(prob_sklearn):s}\n\n"
     input = tokenize_with_splicing(review).convert_to_tensors('pt', True)
     output = torch.nn.functional.softmax(model(**input).logits, dim=1)
     prob_electra = float(output[0][1])
     label_electra = 'positive' if prob_electra > 0.5 else 'negative'
+    res += f"ELECTRA trained with polarity classification thinks the review is {label_electra} ({100*prob_electra:.2f}% positive confidence).\n{make_stars_from_confidence(prob_electra):s}\n\n"
+    rating_electra_reg = model_reg(**input).logits[0,0]
+    res += f"ELECTRA trained with rating regression thinks the review is rated {rating_electra_reg:.2f}★.\n{make_stars_from_rating(rating_electra_reg):s}"
     return res

requirements.txt CHANGED Viewed

@@ -2,3 +2,4 @@ scikit-learn
 torch
 transformers
 tokenizers

 torch
 transformers
 tokenizers
+accelerate