Spaces:
Sleeping
Sleeping
Erva Ulusoy
commited on
Commit
·
c86e7b2
1
Parent(s):
8f00c3f
updated _create_prediction_df function
Browse files- run_prothgt_app.py +29 -18
run_prothgt_app.py
CHANGED
|
@@ -88,30 +88,41 @@ def _create_prediction_df(predictions, heterodata, protein_ids, go_category):
|
|
| 88 |
'GO_term_P': 'Biological Process',
|
| 89 |
'GO_term_C': 'Cellular Component'
|
| 90 |
}
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
# Number of GO terms for this category
|
| 95 |
n_go_terms = len(heterodata[go_category]['id_mapping'])
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
# Process predictions for each protein
|
| 98 |
for i, protein_id in enumerate(protein_ids):
|
| 99 |
-
# Get
|
| 100 |
-
|
|
|
|
|
|
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
})
|
| 108 |
-
all_predictions.append(prediction_df)
|
| 109 |
|
| 110 |
-
#
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
def generate_prediction_df(protein_ids, model_paths, model_config_paths, go_category):
|
| 117 |
all_predictions = []
|
|
|
|
| 88 |
'GO_term_P': 'Biological Process',
|
| 89 |
'GO_term_C': 'Cellular Component'
|
| 90 |
}
|
| 91 |
+
|
| 92 |
+
# Get number of GO terms for this category
|
|
|
|
|
|
|
| 93 |
n_go_terms = len(heterodata[go_category]['id_mapping'])
|
| 94 |
|
| 95 |
+
# Create lists to store the data
|
| 96 |
+
all_proteins = []
|
| 97 |
+
all_go_terms = []
|
| 98 |
+
all_categories = []
|
| 99 |
+
all_probabilities = []
|
| 100 |
+
|
| 101 |
+
# Get list of GO terms once
|
| 102 |
+
go_terms = list(heterodata[go_category]['id_mapping'].keys())
|
| 103 |
+
|
| 104 |
# Process predictions for each protein
|
| 105 |
for i, protein_id in enumerate(protein_ids):
|
| 106 |
+
# Get predictions for this protein
|
| 107 |
+
start_idx = i * n_go_terms
|
| 108 |
+
end_idx = (i + 1) * n_go_terms
|
| 109 |
+
protein_predictions = predictions[start_idx:end_idx]
|
| 110 |
|
| 111 |
+
# Extend the lists
|
| 112 |
+
all_proteins.extend([protein_id] * n_go_terms)
|
| 113 |
+
all_go_terms.extend(go_terms)
|
| 114 |
+
all_categories.extend([go_category_dict[go_category]] * n_go_terms)
|
| 115 |
+
all_probabilities.extend(protein_predictions.tolist())
|
|
|
|
|
|
|
| 116 |
|
| 117 |
+
# Create DataFrame
|
| 118 |
+
prediction_df = pd.DataFrame({
|
| 119 |
+
'Protein': all_proteins,
|
| 120 |
+
'GO_term': all_go_terms,
|
| 121 |
+
'GO_category': all_categories,
|
| 122 |
+
'Probability': all_probabilities
|
| 123 |
+
})
|
| 124 |
+
|
| 125 |
+
return prediction_df
|
| 126 |
|
| 127 |
def generate_prediction_df(protein_ids, model_paths, model_config_paths, go_category):
|
| 128 |
all_predictions = []
|