Spaces:
Sleeping
Sleeping
Update src/bin/target_family_classifier.py
Browse files
src/bin/target_family_classifier.py
CHANGED
|
@@ -4,6 +4,8 @@ Created on Mon Jun 8 09:32:26 2020
|
|
| 4 |
|
| 5 |
@author: Muammer
|
| 6 |
"""
|
|
|
|
|
|
|
| 7 |
|
| 8 |
import numpy as np
|
| 9 |
from sklearn.model_selection import cross_validate
|
|
@@ -88,7 +90,7 @@ def score_protein_rep(dataset):
|
|
| 88 |
|
| 89 |
vecsize = 0
|
| 90 |
#protein_list = pd.read_csv('../data/auxilary_input/entry_class.csv')
|
| 91 |
-
protein_list = pd.read_csv('../data/preprocess/entry_class_nn.csv')
|
| 92 |
dataframe = pd.read_csv(representation_path)
|
| 93 |
#dataframe = convert_dataframe_to_multi_col(dataframe)
|
| 94 |
#dataframe = pd.read_pickle(pkl_data_path)
|
|
@@ -129,8 +131,8 @@ def score_protein_rep(dataset):
|
|
| 129 |
mcc_perclass = []
|
| 130 |
sup_perclass = []
|
| 131 |
report_list = []
|
| 132 |
-
train_index = pd.read_csv('../data/preprocess/indexes/'+dataset+'_trainindex.csv')
|
| 133 |
-
test_index = pd.read_csv('../data/preprocess/indexes/testindex_family.csv')
|
| 134 |
train_index = train_index.dropna(axis=1)
|
| 135 |
test_index = test_index.dropna(axis=1)
|
| 136 |
#print(train_index)
|
|
@@ -187,7 +189,7 @@ def score_protein_rep(dataset):
|
|
| 187 |
report_list.append(class_report)
|
| 188 |
|
| 189 |
if detailed_output:
|
| 190 |
-
conf.to_csv('../results/Drug_target_protein_family_classification_confusion_'+dataset+'_'+representation_name+'.csv', index=None)
|
| 191 |
|
| 192 |
f1_perclass = pd.concat(f1_perclass, axis=1)
|
| 193 |
ac_perclass = pd.concat(ac_perclass, axis=1)
|
|
@@ -195,7 +197,7 @@ def score_protein_rep(dataset):
|
|
| 195 |
sup_perclass = pd.concat(sup_perclass, axis=1)
|
| 196 |
|
| 197 |
report_list = pd.concat(report_list, axis=1)
|
| 198 |
-
report_list.to_csv('../results/Drug_target_protein_family_classification_class_based_results_'+dataset+'_'+representation_name+'.csv')
|
| 199 |
|
| 200 |
report = pd.DataFrame()
|
| 201 |
f1mean = np.mean(f1, axis=0)
|
|
@@ -212,7 +214,7 @@ def score_protein_rep(dataset):
|
|
| 212 |
report['Accuracy'] = [acmean, acstd]
|
| 213 |
report['MCC'] = [mccmean, mccstd]
|
| 214 |
|
| 215 |
-
report.to_csv('../results/Drug_target_protein_family_classification_mean_results_'+dataset+'_'+representation_name+'.csv',index=False)
|
| 216 |
#report.to_csv('scores_general.csv')
|
| 217 |
#print(report)
|
| 218 |
if detailed_output:
|
|
|
|
| 4 |
|
| 5 |
@author: Muammer
|
| 6 |
"""
|
| 7 |
+
import os
|
| 8 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 9 |
|
| 10 |
import numpy as np
|
| 11 |
from sklearn.model_selection import cross_validate
|
|
|
|
| 90 |
|
| 91 |
vecsize = 0
|
| 92 |
#protein_list = pd.read_csv('../data/auxilary_input/entry_class.csv')
|
| 93 |
+
protein_list = pd.read_csv(os.path.join(script_dir, '../data/preprocess/entry_class_nn.csv'))
|
| 94 |
dataframe = pd.read_csv(representation_path)
|
| 95 |
#dataframe = convert_dataframe_to_multi_col(dataframe)
|
| 96 |
#dataframe = pd.read_pickle(pkl_data_path)
|
|
|
|
| 131 |
mcc_perclass = []
|
| 132 |
sup_perclass = []
|
| 133 |
report_list = []
|
| 134 |
+
train_index = pd.read_csv(os.path.join(script_dir, '../data/preprocess/indexes/'+dataset+'_trainindex.csv'))
|
| 135 |
+
test_index = pd.read_csv(os.path.join(script_dir, '../data/preprocess/indexes/testindex_family.csv'))
|
| 136 |
train_index = train_index.dropna(axis=1)
|
| 137 |
test_index = test_index.dropna(axis=1)
|
| 138 |
#print(train_index)
|
|
|
|
| 189 |
report_list.append(class_report)
|
| 190 |
|
| 191 |
if detailed_output:
|
| 192 |
+
conf.to_csv(os.path.join(script_dir, '../results/Drug_target_protein_family_classification_confusion_'+dataset+'_'+representation_name+'.csv'), index=None)
|
| 193 |
|
| 194 |
f1_perclass = pd.concat(f1_perclass, axis=1)
|
| 195 |
ac_perclass = pd.concat(ac_perclass, axis=1)
|
|
|
|
| 197 |
sup_perclass = pd.concat(sup_perclass, axis=1)
|
| 198 |
|
| 199 |
report_list = pd.concat(report_list, axis=1)
|
| 200 |
+
report_list.to_csv(os,path,join(script_dir, '../results/Drug_target_protein_family_classification_class_based_results_'+dataset+'_'+representation_name+'.csv'))
|
| 201 |
|
| 202 |
report = pd.DataFrame()
|
| 203 |
f1mean = np.mean(f1, axis=0)
|
|
|
|
| 214 |
report['Accuracy'] = [acmean, acstd]
|
| 215 |
report['MCC'] = [mccmean, mccstd]
|
| 216 |
|
| 217 |
+
report.to_csv(os.path.join(script_dir, '../results/Drug_target_protein_family_classification_mean_results_'+dataset+'_'+representation_name+'.csv',index=False))
|
| 218 |
#report.to_csv('scores_general.csv')
|
| 219 |
#print(report)
|
| 220 |
if detailed_output:
|