| import streamlit as st | |
| import pandas as pd | |
| import catboost | |
| from catboost import CatBoostClassifier | |
| import re | |
| import string | |
| from nltk.corpus import stopwords | |
| from pymystem3 import Mystem | |
| from joblib import load | |
| import nltk | |
| nltk.download('stopwords') | |
| def data_preprocessing(text): | |
| stop_words = set(stopwords.words('russian')) | |
| text = text.lower() | |
| text = re.sub("<.*?>", "", text) | |
| text = re.sub(r'http\S+', " ", text) | |
| text = re.sub(r'@\w+', ' ', text) | |
| text = re.sub(r'#\w+', ' ', text) | |
| text = re.sub(r'\d+', ' ', text) | |
| text = "".join([c for c in text if c not in string.punctuation]) | |
| return " ".join([word for word in text.split() if word not in stop_words]) | |
| def lemmatize_text(text): | |
| mystem = Mystem() | |
| lemmas = mystem.lemmatize(text) | |
| return ' '.join(lemmas) | |
| model = CatBoostClassifier() | |
| model.load_model('cat_model4.cbm') | |
| tfidf_vectorizer = load('tfidf_vectorizer.joblib') | |
| def classic_ml_page(): | |
| st.title("Классификация отзывов") | |
| user_review = st.text_area("Введите ваш отзыв здесь:") | |
| if st.button("Классифицировать"): | |
| if user_review: | |
| preprocessed_review = data_preprocessing(user_review) | |
| lemmatized_review = lemmatize_text(preprocessed_review) | |
| vectorized_review = tfidf_vectorizer.transform([lemmatized_review]) | |
| prediction = model.predict(vectorized_review) | |
| if prediction[0] == 1: | |
| st.write("Позитивный отзыв 😀") | |
| else: | |
| st.write("Негативный отзыв 😟") | |
| else: | |
| st.write("Пожалуйста, введите отзыв для классификации.") | |