Spaces:

CoffeBank
/

RU_AI_Detector

Running

App Files Files Community

RU_AI_Detector / model_utils.py

CoffeBank

fix

406d15e 8 months ago

raw

history blame contribute delete

5.4 kB

	import os
	import torch
	import joblib
	import numpy as np
	from sklearn.impute import SimpleImputer
	from NN_classifier.simple_binary_classifier import Medium_Binary_Network
	from NN_classifier.neural_net_t import Neural_Network
	from feature_extraction import extract_features
	import pandas as pd

	DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	def load_model(model_dir='models/medium_binary_classifier'):
	model_path = os.path.join(model_dir, 'nn_model.pt')
	scaler_path = os.path.join(model_dir, 'scaler.joblib')
	encoder_path = os.path.join(model_dir, 'label_encoder.joblib')
	imputer_path = os.path.join(model_dir, 'imputer.joblib')

	if not os.path.exists(model_path):
	raise FileNotFoundError(f"Model not found at: {model_path}")

	label_encoder = joblib.load(encoder_path)
	scaler = joblib.load(scaler_path)

	imputer = None
	if os.path.exists(imputer_path):
	imputer = joblib.load(imputer_path)
	else:
	print("Warning: Imputer not found, will create a new one during classification")

	input_size = scaler.n_features_in_

	model = Medium_Binary_Network(input_size, hidden_sizes=[256, 192, 128, 64], dropout=0.3).to(DEVICE)
	model.load_state_dict(torch.load(model_path, map_location=DEVICE))
	model.eval()

	if imputer is not None:
	try:
	if hasattr(imputer, 'feature_names_in_'):
	print(f"Imputer has {len(imputer.feature_names_in_)} features")
	print(f"First few feature names: {imputer.feature_names_in_[:5]}")
	else:
	print("Warning: Imputer does not have feature_names_in_ attribute")
	except Exception as e:
	print(f"Error checking imputer: {str(e)}")

	return model, scaler, label_encoder, imputer

	def load_ternary_model(model_dir='models/neural_network'):
	model_path = os.path.join(model_dir, 'nn_model.pt')
	scaler_path = os.path.join(model_dir, 'scaler.joblib')
	encoder_path = os.path.join(model_dir, 'label_encoder.joblib')
	imputer_path = os.path.join(model_dir, 'imputer.joblib')

	if not os.path.exists(model_path):
	raise FileNotFoundError(f"Model not found at: {model_path}")

	label_encoder = joblib.load(encoder_path)
	scaler = joblib.load(scaler_path)

	imputer = None
	if os.path.exists(imputer_path):
	imputer = joblib.load(imputer_path)
	else:
	print("Warning: Imputer not found, will create a new one during classification")

	input_size = scaler.n_features_in_
	num_classes = len(label_encoder.classes_)

	model = Neural_Network(input_size, hidden_layers=[128, 96, 64, 32], num_classes=num_classes, dropout_rate=0.1).to(DEVICE)
	model.load_state_dict(torch.load(model_path, map_location=DEVICE))
	model.eval()

	print(f"Loaded ternary classifier model with {num_classes} classes: {label_encoder.classes_}")

	if imputer is not None:
	try:
	if hasattr(imputer, 'feature_names_in_'):
	print(f"Imputer has {len(imputer.feature_names_in_)} features")
	print(f"First few feature names: {imputer.feature_names_in_[:5]}")
	else:
	print("Warning: Imputer does not have feature_names_in_ attribute")
	except Exception as e:
	print(f"Error checking imputer: {str(e)}")

	return model, scaler, label_encoder, imputer

	def classify_text(text, model, scaler, label_encoder, imputer=None, scores=None):
	features_df, text_analysis = extract_features(text, scores=scores)

	if imputer is not None:
	expected_feature_names = imputer.feature_names_in_
	else:
	expected_feature_names = None

	if expected_feature_names is not None:
	aligned_features = pd.DataFrame(columns=expected_feature_names)

	for col in features_df.columns:
	if col in expected_feature_names:
	aligned_features[col] = features_df[col]

	for col in expected_feature_names:
	if col not in aligned_features.columns or aligned_features[col].isnull().all():
	aligned_features[col] = 0
	print(f"Added missing feature: {col}")

	features_df = aligned_features

	if imputer is None:
	print("Warning: No imputer provided, creating a new one")
	imputer = SimpleImputer(strategy='mean')
	features = imputer.fit_transform(features_df)
	else:
	features = imputer.transform(features_df)

	features_scaled = scaler.transform(features)

	features_tensor = torch.FloatTensor(features_scaled).to(DEVICE)

	with torch.no_grad():
	outputs = model(features_tensor)
	probabilities = torch.softmax(outputs, dim=1)
	pred_class = torch.argmax(probabilities, dim=1).item()

	predicted_label = label_encoder.classes_[pred_class]

	probs_dict = {label_encoder.classes_[i]: probabilities[0][i].item() for i in range(len(label_encoder.classes_))}

	return {
	'predicted_class': predicted_label,
	'probabilities': probs_dict,
	'features': features_df,
	'text_analysis': text_analysis,
	'scores': scores
	}