Spaces:
Sleeping
Sleeping
Erva Ulusoy
commited on
Commit
·
e0fbc94
1
Parent(s):
9fb2870
updated node titles to contain node name instead of id
Browse files- visualize_kg.py +68 -7
visualize_kg.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
from pyvis.network import Network
|
| 2 |
import os
|
|
|
|
|
|
|
| 3 |
|
| 4 |
NODE_TYPE_COLORS = {
|
| 5 |
'Disease': '#079dbb',
|
|
@@ -48,6 +50,40 @@ GO_CATEGORY_MAPPING = {
|
|
| 48 |
'Cellular Component': 'GO_term_C'
|
| 49 |
}
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
def _gather_protein_edges(data, protein_id):
|
| 52 |
|
| 53 |
protein_idx = data['Protein']['id_mapping'][protein_id]
|
|
@@ -134,6 +170,10 @@ def _filter_edges(protein_id, protein_edges, prediction_df, limit=10):
|
|
| 134 |
|
| 135 |
|
| 136 |
def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
protein_edges = _gather_protein_edges(data, protein_id)
|
| 138 |
visualized_edges = _filter_edges(protein_id, protein_edges, prediction_df, limit)
|
| 139 |
print(f'Edges to be visualized: {visualized_edges}')
|
|
@@ -148,7 +188,6 @@ def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
|
|
| 148 |
}
|
| 149 |
|
| 150 |
# Convert groups_config to a JSON-compatible string
|
| 151 |
-
import json
|
| 152 |
groups_json = json.dumps(groups_config)
|
| 153 |
|
| 154 |
# Configure physics options with settings for better clustering
|
|
@@ -196,8 +235,15 @@ def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
|
|
| 196 |
"groups": """ + groups_json + "}")
|
| 197 |
|
| 198 |
# Add the main protein node
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
net.add_node(protein_id,
|
| 200 |
-
label=
|
|
|
|
| 201 |
color={'background': 'white', 'border': '#c1121f'},
|
| 202 |
borderWidth=4,
|
| 203 |
shape="dot",
|
|
@@ -226,11 +272,19 @@ def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
|
|
| 226 |
|
| 227 |
# Add source node if not present
|
| 228 |
if source_str not in added_nodes:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
net.add_node(source_str,
|
| 230 |
-
label=
|
| 231 |
shape="dot",
|
| 232 |
font={'color': '#000000', 'size': 12},
|
| 233 |
-
title=
|
| 234 |
group=source_type,
|
| 235 |
size=15,
|
| 236 |
mass=1.5)
|
|
@@ -238,16 +292,23 @@ def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
|
|
| 238 |
|
| 239 |
# Add target node if not present
|
| 240 |
if target_str not in added_nodes:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
net.add_node(target_str,
|
| 242 |
-
label=
|
| 243 |
shape="dot",
|
| 244 |
font={'color': '#000000', 'size': 12},
|
| 245 |
-
title=
|
| 246 |
group=target_type,
|
| 247 |
size=15,
|
| 248 |
mass=1.5)
|
| 249 |
added_nodes.add(target_str)
|
| 250 |
-
|
| 251 |
# Add edge with relationship type and probability as label
|
| 252 |
edge_label = f"{relation_type}"
|
| 253 |
if probability is not None:
|
|
|
|
| 1 |
from pyvis.network import Network
|
| 2 |
import os
|
| 3 |
+
import json
|
| 4 |
+
import gzip
|
| 5 |
|
| 6 |
NODE_TYPE_COLORS = {
|
| 7 |
'Disease': '#079dbb',
|
|
|
|
| 50 |
'Cellular Component': 'GO_term_C'
|
| 51 |
}
|
| 52 |
|
| 53 |
+
def get_node_url(node_type, node_id):
|
| 54 |
+
"""Get the URL for a node based on its type and ID"""
|
| 55 |
+
if node_type.startswith('GO_term'):
|
| 56 |
+
return f"https://www.ebi.ac.uk/QuickGO/term/{node_id}"
|
| 57 |
+
elif node_type == 'Protein':
|
| 58 |
+
return f"https://www.uniprot.org/uniprotkb/{node_id}/entry"
|
| 59 |
+
elif node_type == 'Disease':
|
| 60 |
+
if ':' in node_id:
|
| 61 |
+
ontology = node_id.split(':')[0]
|
| 62 |
+
if ontology == 'EFO':
|
| 63 |
+
return f"http://www.ebi.ac.uk/efo/EFO_{node_id.split(':')[1]}"
|
| 64 |
+
elif ontology == 'MONDO':
|
| 65 |
+
return f'http://purl.obolibrary.org/obo/MONDO_{node_id.split(":")[1]}'
|
| 66 |
+
elif ontology == 'Orphanet':
|
| 67 |
+
return f"http://www.orpha.net/ORDO/Orphanet_{node_id.split(':')[1]}"
|
| 68 |
+
else:
|
| 69 |
+
return f"https://www.genome.jp/entry/{node_id}"
|
| 70 |
+
elif node_type == 'HPO':
|
| 71 |
+
return f"https://hpo.jax.org/browse/term/{node_id}"
|
| 72 |
+
elif node_type == 'Drug':
|
| 73 |
+
return f"https://go.drugbank.com/drugs/{node_id}"
|
| 74 |
+
elif node_type == 'Compound':
|
| 75 |
+
return f"https://www.ebi.ac.uk/chembl/explore/compound/{node_id}"
|
| 76 |
+
elif node_type == 'Domain':
|
| 77 |
+
return f"https://www.ebi.ac.uk/interpro/entry/InterPro/{node_id}"
|
| 78 |
+
elif node_type == 'Pathway':
|
| 79 |
+
return f"https://reactome.org/content/detail/{node_id}"
|
| 80 |
+
elif node_type == 'kegg_Pathway':
|
| 81 |
+
return f"https://www.genome.jp/pathway/{node_id}"
|
| 82 |
+
elif node_type == 'EC_number':
|
| 83 |
+
return f"https://enzyme.expasy.org/EC/{node_id}"
|
| 84 |
+
else:
|
| 85 |
+
return None
|
| 86 |
+
|
| 87 |
def _gather_protein_edges(data, protein_id):
|
| 88 |
|
| 89 |
protein_idx = data['Protein']['id_mapping'][protein_id]
|
|
|
|
| 170 |
|
| 171 |
|
| 172 |
def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
|
| 173 |
+
|
| 174 |
+
with gzip.open('data/name_info.json.gz', 'rt', encoding='utf-8') as file:
|
| 175 |
+
name_info = json.load(file)
|
| 176 |
+
|
| 177 |
protein_edges = _gather_protein_edges(data, protein_id)
|
| 178 |
visualized_edges = _filter_edges(protein_id, protein_edges, prediction_df, limit)
|
| 179 |
print(f'Edges to be visualized: {visualized_edges}')
|
|
|
|
| 188 |
}
|
| 189 |
|
| 190 |
# Convert groups_config to a JSON-compatible string
|
|
|
|
| 191 |
groups_json = json.dumps(groups_config)
|
| 192 |
|
| 193 |
# Configure physics options with settings for better clustering
|
|
|
|
| 235 |
"groups": """ + groups_json + "}")
|
| 236 |
|
| 237 |
# Add the main protein node
|
| 238 |
+
query_node_url = get_node_url('Protein', protein_id)
|
| 239 |
+
node_name = name_info['Protein'][protein_id]
|
| 240 |
+
query_node_title = f"{node_name} (Query Protein)"
|
| 241 |
+
if query_node_url:
|
| 242 |
+
query_node_title = f'<a href="{query_node_url}" target="_blank">{query_node_title}</a>'
|
| 243 |
+
|
| 244 |
net.add_node(protein_id,
|
| 245 |
+
label=protein_id,
|
| 246 |
+
title=query_node_title,
|
| 247 |
color={'background': 'white', 'border': '#c1121f'},
|
| 248 |
borderWidth=4,
|
| 249 |
shape="dot",
|
|
|
|
| 272 |
|
| 273 |
# Add source node if not present
|
| 274 |
if source_str not in added_nodes:
|
| 275 |
+
if not source_type.startswith('GO_term'):
|
| 276 |
+
node_name = name_info[source_type][source_str]
|
| 277 |
+
else:
|
| 278 |
+
node_name = name_info['GO_term'][source_str]
|
| 279 |
+
url = get_node_url(source_type, source_str)
|
| 280 |
+
title = f"{node_name} ({NODE_LABEL_TRANSLATION[source_type] if source_type in NODE_LABEL_TRANSLATION else source_type})"
|
| 281 |
+
if url:
|
| 282 |
+
title = f'<a href="{url}" target="_blank">{title}</a>'
|
| 283 |
net.add_node(source_str,
|
| 284 |
+
label=source_str,
|
| 285 |
shape="dot",
|
| 286 |
font={'color': '#000000', 'size': 12},
|
| 287 |
+
title=title,
|
| 288 |
group=source_type,
|
| 289 |
size=15,
|
| 290 |
mass=1.5)
|
|
|
|
| 292 |
|
| 293 |
# Add target node if not present
|
| 294 |
if target_str not in added_nodes:
|
| 295 |
+
if not target_type.startswith('GO_term'):
|
| 296 |
+
node_name = name_info[target_type][target_str]
|
| 297 |
+
else:
|
| 298 |
+
node_name = name_info['GO_term'][target_str]
|
| 299 |
+
url = get_node_url(target_type, target_str)
|
| 300 |
+
title = f"{node_name} ({NODE_LABEL_TRANSLATION[target_type] if target_type in NODE_LABEL_TRANSLATION else target_type})"
|
| 301 |
+
if url:
|
| 302 |
+
title = f'<a href="{url}" target="_blank">{title}</a>'
|
| 303 |
net.add_node(target_str,
|
| 304 |
+
label=target_str,
|
| 305 |
shape="dot",
|
| 306 |
font={'color': '#000000', 'size': 12},
|
| 307 |
+
title=title,
|
| 308 |
group=target_type,
|
| 309 |
size=15,
|
| 310 |
mass=1.5)
|
| 311 |
added_nodes.add(target_str)
|
|
|
|
| 312 |
# Add edge with relationship type and probability as label
|
| 313 |
edge_label = f"{relation_type}"
|
| 314 |
if probability is not None:
|