Spaces:
Sleeping
Sleeping
Update src/bin/PROBE.py
Browse files- src/bin/PROBE.py +0 -44
src/bin/PROBE.py
CHANGED
|
@@ -6,16 +6,6 @@ from . import target_family_classifier as tfc
|
|
| 6 |
from . import function_predictor as fp
|
| 7 |
from . import binding_affinity_estimator as bae
|
| 8 |
|
| 9 |
-
print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is started...\n\n")
|
| 10 |
-
|
| 11 |
-
with open('probe_config.yaml') as f:
|
| 12 |
-
args = yaml.load(f, Loader=yaml.FullLoader)
|
| 13 |
-
|
| 14 |
-
if args["benchmark"] not in ["similarity","family","function","affinity","all"]:
|
| 15 |
-
parser.error('At least one benchmark type should be selected')
|
| 16 |
-
|
| 17 |
-
print(args)
|
| 18 |
-
|
| 19 |
def load_representation(multi_col_representation_vector_file_path):
|
| 20 |
multi_col_representation_vector = pd.read_csv(multi_col_representation_vector_file_path)
|
| 21 |
vals = multi_col_representation_vector.iloc[:,1:(len(multi_col_representation_vector.columns))]
|
|
@@ -25,40 +15,6 @@ def load_representation(multi_col_representation_vector_file_path):
|
|
| 25 |
original_values_as_df.loc[index] = [multi_col_representation_vector.iloc[index]['Entry']] + [list_of_floats]
|
| 26 |
return original_values_as_df
|
| 27 |
|
| 28 |
-
if args["benchmark"] in ["similarity","function","all"]:
|
| 29 |
-
print("\nRepresentation vectors are loading...\n")
|
| 30 |
-
representation_dataframe = load_representation(args["representation_file_human"])
|
| 31 |
-
|
| 32 |
-
if args["benchmark"] in ["similarity","all"]:
|
| 33 |
-
print("\nSemantic similarity Inference Benchmark is running...\n")
|
| 34 |
-
ssi.representation_dataframe = representation_dataframe
|
| 35 |
-
ssi.representation_name = args["representation_name"]
|
| 36 |
-
ssi.protein_names = ssi.representation_dataframe['Entry'].tolist()
|
| 37 |
-
ssi.similarity_tasks = args["similarity_tasks"]
|
| 38 |
-
ssi.detailed_output = args["detailed_output"]
|
| 39 |
-
ssi.calculate_all_correlations()
|
| 40 |
-
if args["benchmark"] in ["function","all"]:
|
| 41 |
-
print("\n\nOntology-based protein function prediction benchmark is running...\n")
|
| 42 |
-
fp.aspect_type = args["function_prediction_aspect"]
|
| 43 |
-
fp.dataset_type = args["function_prediction_dataset"]
|
| 44 |
-
fp.representation_dataframe = representation_dataframe
|
| 45 |
-
fp.representation_name = args["representation_name"]
|
| 46 |
-
fp.detailed_output = args["detailed_output"]
|
| 47 |
-
fp.pred_output()
|
| 48 |
-
if args["benchmark"] in ["family","all"]:
|
| 49 |
-
print("\n\nDrug target protein family classification benchmark is running...\n")
|
| 50 |
-
tfc.representation_path = args["representation_file_human"]
|
| 51 |
-
tfc.representation_name = args["representation_name"]
|
| 52 |
-
tfc.detailed_output = args["detailed_output"]
|
| 53 |
-
for dataset in args["family_prediction_dataset"]:
|
| 54 |
-
tfc.score_protein_rep(dataset)
|
| 55 |
-
if args["benchmark"] in ["affinity","all"]:
|
| 56 |
-
print("\n\nProtein-protein binding affinity estimation benchmark is running...\n")
|
| 57 |
-
bae.skempi_vectors_path = args["representation_file_affinity"]
|
| 58 |
-
bae.representation_name = args["representation_name"]
|
| 59 |
-
bae.predict_affinities_and_report_results()
|
| 60 |
-
print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is finished...\n")
|
| 61 |
-
|
| 62 |
def run_probe(benchmarks, representation_name, representation_file_human, representation_file_affinity, similarity_tasks=["Sparse","200","500"], function_prediction_aspec="All_Aspects", function_prediction_dataset="All_Data_Sets", family_prediction_dataset=["nc","uc50","uc30","mm15"], detailed_output=False):
|
| 63 |
print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is started...\n\n")
|
| 64 |
|
|
|
|
| 6 |
from . import function_predictor as fp
|
| 7 |
from . import binding_affinity_estimator as bae
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
def load_representation(multi_col_representation_vector_file_path):
|
| 10 |
multi_col_representation_vector = pd.read_csv(multi_col_representation_vector_file_path)
|
| 11 |
vals = multi_col_representation_vector.iloc[:,1:(len(multi_col_representation_vector.columns))]
|
|
|
|
| 15 |
original_values_as_df.loc[index] = [multi_col_representation_vector.iloc[index]['Entry']] + [list_of_floats]
|
| 16 |
return original_values_as_df
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
def run_probe(benchmarks, representation_name, representation_file_human, representation_file_affinity, similarity_tasks=["Sparse","200","500"], function_prediction_aspec="All_Aspects", function_prediction_dataset="All_Data_Sets", family_prediction_dataset=["nc","uc50","uc30","mm15"], detailed_output=False):
|
| 19 |
print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is started...\n\n")
|
| 20 |
|