Spaces:
Running
Running
Update src/bin/function_predictor.py
Browse files
src/bin/function_predictor.py
CHANGED
|
@@ -83,8 +83,16 @@ def MultiLabelSVC_cross_val_predict(representation_name, dataset, X, y, classifi
|
|
| 83 |
rc_we_cv.append(np.round(recall_score(y.iloc[fold_test_index, :], y_pred[fold_test_index], average="weighted"), decimals=5))
|
| 84 |
hamm_cv.append(np.round(hamming_loss(y.iloc[fold_test_index, :], y_pred[fold_test_index]), decimals=5))
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
return {
|
| 87 |
"cv_results": [representation_name + "_" + dataset, acc_cv, f1_mi_cv, f1_ma_cv, f1_we_cv, pr_mi_cv, pr_ma_cv, pr_we_cv, rc_mi_cv, rc_ma_cv, rc_we_cv, hamm_cv],
|
|
|
|
|
|
|
| 88 |
"predictions": y_pred
|
| 89 |
}
|
| 90 |
|
|
@@ -101,6 +109,8 @@ def ProtDescModel():
|
|
| 101 |
filtered_datasets = [dataset for dataset in datasets if aspect_type in dataset and dataset_type in dataset]
|
| 102 |
|
| 103 |
cv_results = []
|
|
|
|
|
|
|
| 104 |
|
| 105 |
for dt in tqdm(filtered_datasets, total=len(filtered_datasets)):
|
| 106 |
print(f"Protein function prediction is started for the dataset: {dt.split('.')[0]}")
|
|
@@ -119,17 +129,26 @@ def ProtDescModel():
|
|
| 119 |
|
| 120 |
if model is not None:
|
| 121 |
cv_results.append(model["cv_results"])
|
|
|
|
|
|
|
| 122 |
|
| 123 |
return {
|
| 124 |
-
"cv_results": cv_results
|
|
|
|
|
|
|
| 125 |
}
|
| 126 |
|
| 127 |
def pred_output():
|
| 128 |
model = ProtDescModel()
|
| 129 |
cv_result = model["cv_results"]
|
| 130 |
|
|
|
|
|
|
|
|
|
|
| 131 |
return {
|
| 132 |
-
"cv_result": cv_result
|
|
|
|
|
|
|
| 133 |
}
|
| 134 |
|
| 135 |
# Example call to the function
|
|
|
|
| 83 |
rc_we_cv.append(np.round(recall_score(y.iloc[fold_test_index, :], y_pred[fold_test_index], average="weighted"), decimals=5))
|
| 84 |
hamm_cv.append(np.round(hamming_loss(y.iloc[fold_test_index, :], y_pred[fold_test_index]), decimals=5))
|
| 85 |
|
| 86 |
+
means = list(np.mean([acc_cv, f1_mi_cv, f1_ma_cv, f1_we_cv, pr_mi_cv, pr_ma_cv, pr_we_cv, rc_mi_cv, rc_ma_cv, rc_we_cv, hamm_cv], axis=1))
|
| 87 |
+
means = [np.round(i, decimals=5) for i in means]
|
| 88 |
+
|
| 89 |
+
stds = list(np.std([acc_cv, f1_mi_cv, f1_ma_cv, f1_we_cv, pr_mi_cv, pr_ma_cv, pr_we_cv, rc_mi_cv, rc_ma_cv, rc_we_cv, hamm_cv], axis=1))
|
| 90 |
+
stds = [np.round(i, decimals=5) for i in stds]
|
| 91 |
+
|
| 92 |
return {
|
| 93 |
"cv_results": [representation_name + "_" + dataset, acc_cv, f1_mi_cv, f1_ma_cv, f1_we_cv, pr_mi_cv, pr_ma_cv, pr_we_cv, rc_mi_cv, rc_ma_cv, rc_we_cv, hamm_cv],
|
| 94 |
+
"means": [representation_name + "_" + dataset] + means,
|
| 95 |
+
"stds": [representation_name + "_" + dataset] + stds,
|
| 96 |
"predictions": y_pred
|
| 97 |
}
|
| 98 |
|
|
|
|
| 109 |
filtered_datasets = [dataset for dataset in datasets if aspect_type in dataset and dataset_type in dataset]
|
| 110 |
|
| 111 |
cv_results = []
|
| 112 |
+
cv_mean_results = []
|
| 113 |
+
cv_std_results = []
|
| 114 |
|
| 115 |
for dt in tqdm(filtered_datasets, total=len(filtered_datasets)):
|
| 116 |
print(f"Protein function prediction is started for the dataset: {dt.split('.')[0]}")
|
|
|
|
| 129 |
|
| 130 |
if model is not None:
|
| 131 |
cv_results.append(model["cv_results"])
|
| 132 |
+
cv_mean_results.append(model["means"])
|
| 133 |
+
cv_std_results.append(model["stds"])
|
| 134 |
|
| 135 |
return {
|
| 136 |
+
"cv_results": cv_results,
|
| 137 |
+
"cv_mean_results": cv_mean_results,
|
| 138 |
+
"cv_std_results": cv_std_results
|
| 139 |
}
|
| 140 |
|
| 141 |
def pred_output():
|
| 142 |
model = ProtDescModel()
|
| 143 |
cv_result = model["cv_results"]
|
| 144 |
|
| 145 |
+
cv_mean_result = model["cv_mean_results"]
|
| 146 |
+
cv_std_result = model["cv_std_results"]
|
| 147 |
+
|
| 148 |
return {
|
| 149 |
+
"cv_result": cv_result,
|
| 150 |
+
"cv_mean_result": cv_mean_result,
|
| 151 |
+
"cv_std_result": cv_std_result
|
| 152 |
}
|
| 153 |
|
| 154 |
# Example call to the function
|