Spaces:
Sleeping
Sleeping
| import torch | |
| import streamlit as st | |
| import transformers | |
| from transformers import AutoTokenizer, AutoModel | |
| from sklearn.linear_model import LogisticRegression | |
| import pickle | |
| import time | |
| def preprocess_bert(text): | |
| start_time = time.time() | |
| tokenizer = AutoTokenizer.from_pretrained("cointegrated/LaBSE-en-ru") | |
| model = AutoModel.from_pretrained("cointegrated/LaBSE-en-ru") | |
| sentences = text | |
| encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=500, return_tensors='pt') | |
| with torch.no_grad(): | |
| model_output = model(**encoded_input) | |
| embeddings = model_output.pooler_output | |
| embeddings = torch.nn.functional.normalize(embeddings) | |
| embeddings = embeddings.detach().cpu().numpy() | |
| logreg = LogisticRegression(class_weight = 'balanced') | |
| with open('pages/models/linmodel_min.pkl', 'rb') as f: | |
| logreg = pickle.load(f) | |
| predicted_label = logreg.predict(embeddings) | |
| dict = {0:'Bad', 1: 'Neutral', 2:'Good'} | |
| predicted_label_text = dict[predicted_label[0]] | |
| end_time = time.time() | |
| inference_time = end_time - start_time | |
| return f'BERT {predicted_label_text} {inference_time} секунд' |