| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import matplotlib.cm as cm | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.metrics import r2_score | |
| from scipy.stats import pearsonr | |
| import torch | |
| import torch.nn as nn | |
| import torch.optim as optim | |
| from torch.utils.data import DataLoader, TensorDataset | |
| import os | |
| script_path = os.path.abspath(__file__) | |
| script_dir = os.path.dirname(script_path) | |
| os.chdir(script_dir) | |
| data = pd.read_csv('data.csv') | |
| X = data.drop(columns=['OS.time']).values | |
| y = data['OS.time'].values | |
| print(np.isnan(X).sum(), np.isnan(y).sum()) | |
| print(np.isinf(X).sum(), np.isinf(y).sum()) | |
| scaler = StandardScaler() | |
| X_scaled = scaler.fit_transform(X) | |
| X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) | |
| X_train_tensor = torch.tensor(X_train, dtype=torch.float32) | |
| y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1) | |
| X_test_tensor = torch.tensor(X_test, dtype=torch.float32) | |
| y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1) | |
| train_dataset = TensorDataset(X_train_tensor, y_train_tensor) | |
| test_dataset = TensorDataset(X_test_tensor, y_test_tensor) | |
| train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) | |
| test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False) | |
| class SimpleNN(nn.Module): | |
| def __init__(self, input_dim): | |
| super(SimpleNN, self).__init__() | |
| self.fc1 = nn.Linear(input_dim, 100) | |
| self.dropout1 = nn.Dropout(0.5) | |
| self.fc2 = nn.Linear(100, 100) | |
| self.dropout2 = nn.Dropout(0.5) | |
| self.fc3 = nn.Linear(100, 1) | |
| def forward(self, x): | |
| x = torch.relu(self.fc1(x)) | |
| x = self.dropout1(x) | |
| x = torch.relu(self.fc2(x)) | |
| x = self.dropout2(x) | |
| x = self.fc3(x) | |
| return x | |
| def weights_init(m): | |
| if isinstance(m, nn.Linear): | |
| nn.init.kaiming_uniform_(m.weight) | |
| nn.init.zeros_(m.bias) | |
| model = SimpleNN(X_train.shape[1]) | |
| model.apply(weights_init) | |
| criterion = nn.MSELoss() | |
| optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) | |
| best_test_loss = float('inf') | |
| best_model_state = None | |
| num_epochs = 10000 | |
| train_losses = [] | |
| test_losses = [] | |
| all_predictions = [] | |
| gradients = [] | |
| r2_scores = [] | |
| for epoch in range(num_epochs): | |
| model.train() | |
| train_loss = 0.0 | |
| epoch_gradients = [] | |
| for inputs, targets in train_loader: | |
| optimizer.zero_grad() | |
| outputs = model(inputs) | |
| loss = criterion(outputs, targets) | |
| loss.backward() | |
| for param in model.parameters(): | |
| epoch_gradients.append(param.grad.abs().mean().item()) | |
| torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) | |
| optimizer.step() | |
| train_loss += loss.item() | |
| train_loss /= len(train_loader) | |
| train_losses.append(train_loss) | |
| gradients.append(epoch_gradients) | |
| print(f'Epoch {epoch+1}, Train Loss: {train_loss}') | |
| model.eval() | |
| test_loss = 0.0 | |
| predictions = [] | |
| with torch.no_grad(): | |
| for inputs, targets in test_loader: | |
| outputs = model(inputs) | |
| loss = criterion(outputs, targets) | |
| test_loss += loss.item() | |
| predictions.append(outputs.numpy()) | |
| test_loss /= len(test_loader) | |
| test_losses.append(test_loss) | |
| all_predictions.append(predictions) | |
| predictions_flat = np.concatenate(predictions).flatten() | |
| r2 = r2_score(y_test, predictions_flat) | |
| r2_scores.append(r2) | |
| print(f'Epoch {epoch+1}, R^2: {r2}') | |
| if test_loss < best_test_loss: | |
| best_test_loss = test_loss | |
| best_model_state = model.state_dict() | |
| torch.save(best_model_state, 'best_model.pth') | |
| print(f'Saved new best model at epoch {epoch+1} with test loss {test_loss}') | |
| plt.figure(figsize=(10, 5)) | |
| plt.plot(range(1, num_epochs + 1), train_losses, label='Train Loss') | |
| plt.plot(range(1, num_epochs + 1), test_losses, label='Test Loss') | |
| window_size = 50 | |
| train_losses_ma = pd.Series(train_losses).rolling(window=window_size).mean() | |
| test_losses_ma = pd.Series(test_losses).rolling(window=window_size).mean() | |
| plt.plot(range(1, num_epochs + 1), train_losses_ma, label='Train Loss (MA)', linestyle='--') | |
| plt.plot(range(1, num_epochs + 1), test_losses_ma, label='Test Loss (MA)', linestyle='--') | |
| plt.xlabel('Epoch') | |
| plt.ylabel('Loss') | |
| plt.title('Train and Test Loss with Moving Average') | |
| plt.legend() | |
| plt.savefig('train_test_loss.png') | |
| plt.close() | |
| final_predictions = np.array(all_predictions[-1]).flatten() | |
| actuals = y_test_tensor.numpy().flatten() | |
| correlation, p_value = pearsonr(actuals, final_predictions) | |
| print(f'Pearson Correlation: {correlation}') | |
| print(f'P-value: {p_value}') | |
| plt.figure(figsize=(10, 5)) | |
| plt.scatter(actuals, final_predictions, color='blue', label=f'Predictions vs Actuals (r={correlation:.2f}, p={p_value:.2g})') | |
| plt.plot([min(actuals), max(actuals)], [min(actuals), max(actuals)], color='red', linestyle='--', label='Ideal Fit') | |
| plt.xlabel('Actual OS.time') | |
| plt.ylabel('Predicted OS.time') | |
| plt.title('Predictions vs Actuals') | |
| plt.legend() | |
| plt.savefig('predictions_vs_actuals.png') | |
| plt.close() | |
| errors = final_predictions - actuals | |
| plt.figure(figsize=(10, 5)) | |
| plt.hist(errors, bins=30, color='purple', alpha=0.7) | |
| plt.xlabel('Prediction Error') | |
| plt.ylabel('Frequency') | |
| plt.title('Error Distribution') | |
| plt.savefig('error_distribution.png') | |
| plt.close() | |
| actuals = y_test_tensor.numpy() | |
| colors = cm.viridis(np.linspace(0, 1, num_epochs)) | |
| plt.figure(figsize=(10, 5)) | |
| plt.plot(actuals, label='Actual Values', color='b', marker='o', linestyle='-') | |
| for i in range(0, num_epochs, max(1, num_epochs // 100)): | |
| predictions = np.array(all_predictions[i]).flatten() | |
| plt.plot(predictions, label=f'Epoch {i+1}', color=colors[i], linestyle='--') | |
| plt.xlabel('Sample Index') | |
| plt.ylabel('OS.time') | |
| plt.title('Actual vs Predicted Values Over Time') | |
| plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left') | |
| plt.savefig('actual_vs_predicted_over_time.png') | |
| plt.close() | |
| for i, layer in enumerate(model.children()): | |
| if isinstance(layer, nn.Linear): | |
| plt.figure(figsize=(10, 5)) | |
| plt.hist(layer.weight.detach().numpy().flatten(), bins=30, alpha=0.6, color='blue') | |
| plt.xlabel(f'Layer {i+1} Weights') | |
| plt.ylabel('Frequency') | |
| plt.title(f'Weight Distribution of Layer {i+1}') | |
| plt.savefig(f'layer_{i+1}_weight_distribution.png') | |
| plt.close() | |
| importances = np.abs(model.fc1.weight.detach().numpy()).sum(axis=0) | |
| indices = np.argsort(importances) | |
| plt.figure(figsize=(10, 5)) | |
| plt.barh(range(X_train.shape[1]), importances[indices], align='center') | |
| plt.xlabel('Importance') | |
| plt.ylabel('Feature Index') | |
| plt.title('Feature Importances in the First Layer') | |
| plt.savefig('feature_importances.png') | |
| plt.close() | |
| for i, layer in enumerate(model.children()): | |
| if isinstance(layer, nn.Linear): | |
| plt.figure(figsize=(10, 5)) | |
| plt.imshow(layer.weight.detach().numpy(), aspect='auto', cmap='viridis') | |
| plt.colorbar() | |
| plt.title(f'Weight Heatmap of Layer {i+1}') | |
| plt.xlabel('Input Features') | |
| plt.ylabel('Neurons') | |
| plt.savefig(f'layer_{i+1}_weight_heatmap.png') | |
| plt.close() | |
| plt.figure(figsize=(10, 5)) | |
| plt.plot(range(1, num_epochs + 1), r2_scores, label='R^2 Score') | |
| r2_scores_ma = pd.Series(r2_scores).rolling(window=window_size).mean() | |
| plt.plot(range(1, num_epochs + 1), r2_scores_ma, label='R^2 Score (MA)', linestyle='--') | |
| plt.xlabel('Epoch') | |
| plt.ylabel('R^2 Score') | |
| plt.title('R^2 Score over Epochs') | |
| plt.legend() | |
| plt.savefig('r2_over_epochs.png') | |
| plt.close() | |