Spaces:
Running
Running
| import os | |
| import torch | |
| import joblib | |
| import numpy as np | |
| from sklearn.impute import SimpleImputer | |
| from NN_classifier.simple_binary_classifier import Medium_Binary_Network | |
| from NN_classifier.neural_net_t import Neural_Network | |
| from feature_extraction import extract_features | |
| import pandas as pd | |
| DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| def load_model(model_dir='models/medium_binary_classifier'): | |
| model_path = os.path.join(model_dir, 'nn_model.pt') | |
| scaler_path = os.path.join(model_dir, 'scaler.joblib') | |
| encoder_path = os.path.join(model_dir, 'label_encoder.joblib') | |
| imputer_path = os.path.join(model_dir, 'imputer.joblib') | |
| if not os.path.exists(model_path): | |
| raise FileNotFoundError(f"Model not found at: {model_path}") | |
| label_encoder = joblib.load(encoder_path) | |
| scaler = joblib.load(scaler_path) | |
| imputer = None | |
| if os.path.exists(imputer_path): | |
| imputer = joblib.load(imputer_path) | |
| else: | |
| print("Warning: Imputer not found, will create a new one during classification") | |
| input_size = scaler.n_features_in_ | |
| model = Medium_Binary_Network(input_size, hidden_sizes=[256, 192, 128, 64], dropout=0.3).to(DEVICE) | |
| model.load_state_dict(torch.load(model_path, map_location=DEVICE)) | |
| model.eval() | |
| if imputer is not None: | |
| try: | |
| if hasattr(imputer, 'feature_names_in_'): | |
| print(f"Imputer has {len(imputer.feature_names_in_)} features") | |
| print(f"First few feature names: {imputer.feature_names_in_[:5]}") | |
| else: | |
| print("Warning: Imputer does not have feature_names_in_ attribute") | |
| except Exception as e: | |
| print(f"Error checking imputer: {str(e)}") | |
| return model, scaler, label_encoder, imputer | |
| def load_ternary_model(model_dir='models/neural_network'): | |
| model_path = os.path.join(model_dir, 'nn_model.pt') | |
| scaler_path = os.path.join(model_dir, 'scaler.joblib') | |
| encoder_path = os.path.join(model_dir, 'label_encoder.joblib') | |
| imputer_path = os.path.join(model_dir, 'imputer.joblib') | |
| if not os.path.exists(model_path): | |
| raise FileNotFoundError(f"Model not found at: {model_path}") | |
| label_encoder = joblib.load(encoder_path) | |
| scaler = joblib.load(scaler_path) | |
| imputer = None | |
| if os.path.exists(imputer_path): | |
| imputer = joblib.load(imputer_path) | |
| else: | |
| print("Warning: Imputer not found, will create a new one during classification") | |
| input_size = scaler.n_features_in_ | |
| num_classes = len(label_encoder.classes_) | |
| model = Neural_Network(input_size, hidden_layers=[128, 96, 64, 32], num_classes=num_classes, dropout_rate=0.1).to(DEVICE) | |
| model.load_state_dict(torch.load(model_path, map_location=DEVICE)) | |
| model.eval() | |
| print(f"Loaded ternary classifier model with {num_classes} classes: {label_encoder.classes_}") | |
| if imputer is not None: | |
| try: | |
| if hasattr(imputer, 'feature_names_in_'): | |
| print(f"Imputer has {len(imputer.feature_names_in_)} features") | |
| print(f"First few feature names: {imputer.feature_names_in_[:5]}") | |
| else: | |
| print("Warning: Imputer does not have feature_names_in_ attribute") | |
| except Exception as e: | |
| print(f"Error checking imputer: {str(e)}") | |
| return model, scaler, label_encoder, imputer | |
| def classify_text(text, model, scaler, label_encoder, imputer=None, scores=None): | |
| features_df, text_analysis = extract_features(text, scores=scores) | |
| if imputer is not None: | |
| expected_feature_names = imputer.feature_names_in_ | |
| else: | |
| expected_feature_names = None | |
| if expected_feature_names is not None: | |
| aligned_features = pd.DataFrame(columns=expected_feature_names) | |
| for col in features_df.columns: | |
| if col in expected_feature_names: | |
| aligned_features[col] = features_df[col] | |
| for col in expected_feature_names: | |
| if col not in aligned_features.columns or aligned_features[col].isnull().all(): | |
| aligned_features[col] = 0 | |
| print(f"Added missing feature: {col}") | |
| features_df = aligned_features | |
| if imputer is None: | |
| print("Warning: No imputer provided, creating a new one") | |
| imputer = SimpleImputer(strategy='mean') | |
| features = imputer.fit_transform(features_df) | |
| else: | |
| features = imputer.transform(features_df) | |
| features_scaled = scaler.transform(features) | |
| features_tensor = torch.FloatTensor(features_scaled).to(DEVICE) | |
| with torch.no_grad(): | |
| outputs = model(features_tensor) | |
| probabilities = torch.softmax(outputs, dim=1) | |
| pred_class = torch.argmax(probabilities, dim=1).item() | |
| predicted_label = label_encoder.classes_[pred_class] | |
| probs_dict = {label_encoder.classes_[i]: probabilities[0][i].item() for i in range(len(label_encoder.classes_))} | |
| return { | |
| 'predicted_class': predicted_label, | |
| 'probabilities': probs_dict, | |
| 'features': features_df, | |
| 'text_analysis': text_analysis, | |
| 'scores': scores | |
| } |