Spaces:
Runtime error
Runtime error
Simon Duerr
commited on
Commit
·
46124fc
1
Parent(s):
a412119
fix voxelization issue with structures containing H
Browse files- app.py +67 -35
- utils/helpers.py +6 -4
- utils/voxelization.py +3 -1
app.py
CHANGED
|
@@ -52,26 +52,35 @@ def update(inp, file, mode, custom_resids, clustering_threshold):
|
|
| 52 |
return "pdb code must be 4 letters or Uniprot code does not match", ""
|
| 53 |
identifier = os.path.basename(filepath)
|
| 54 |
if mode == "All residues":
|
| 55 |
-
print(
|
| 56 |
ids = get_all_protein_resids(filepath)
|
| 57 |
-
elif len(custom_resids)!=0:
|
| 58 |
-
print(
|
| 59 |
-
ids=get_all_resids_from_list(filepath,custom_resids.replace(","," "))
|
| 60 |
else:
|
| 61 |
-
print(
|
| 62 |
ids = get_all_metalbinding_resids(filepath)
|
| 63 |
print(filepath)
|
| 64 |
print(ids)
|
| 65 |
try:
|
| 66 |
voxels, prot_centers, prot_N, prots = processStructures(filepath, ids)
|
| 67 |
except Exception as e:
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 70 |
voxels.to(device)
|
| 71 |
-
|
| 72 |
model = Model()
|
| 73 |
model.to(device)
|
| 74 |
-
model.load_state_dict(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
model.eval()
|
| 76 |
with warnings.catch_warnings():
|
| 77 |
warnings.filterwarnings("ignore")
|
|
@@ -107,7 +116,6 @@ def update(inp, file, mode, custom_resids, clustering_threshold):
|
|
| 107 |
)
|
| 108 |
|
| 109 |
|
| 110 |
-
|
| 111 |
def read_mol(molpath):
|
| 112 |
with open(molpath, "r") as fp:
|
| 113 |
lines = fp.readlines()
|
|
@@ -171,7 +179,7 @@ def molecule(pdb, probes, cube):
|
|
| 171 |
</div>
|
| 172 |
<div class="px-4">
|
| 173 |
<label for="pdbmetal" class="relative inline-flex items-center mb-4 cursor-pointer ">
|
| 174 |
-
<input id="pdbmetal" type="checkbox" class="sr-only peer"
|
| 175 |
<div class="w-11 h-6 bg-gray-200 rounded-full peer peer-focus:ring-4 peer-focus:ring-blue-300 dark:peer-focus:ring-blue-800 dark:bg-gray-700 peer-checked:after:translate-x-full peer-checked:after:border-white after:absolute after:top-0.5 after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all dark:border-gray-600 peer-checked:bg-blue-600"></div>
|
| 176 |
<span class="ml-3 text-sm font-medium text-gray-900 dark:text-gray-300">Show PDB metals</span>
|
| 177 |
</label>
|
|
@@ -311,13 +319,11 @@ def molecule(pdb, probes, cube):
|
|
| 311 |
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
|
| 312 |
allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
|
| 313 |
|
|
|
|
| 314 |
def set_examples(example):
|
| 315 |
-
n,code, resids = example
|
| 316 |
-
return [
|
| 317 |
-
|
| 318 |
-
code,
|
| 319 |
-
resids
|
| 320 |
-
]
|
| 321 |
|
| 322 |
metal3d = gr.Blocks()
|
| 323 |
|
|
@@ -325,40 +331,66 @@ with metal3d:
|
|
| 325 |
gr.Markdown("# Metal3D")
|
| 326 |
with gr.Tabs():
|
| 327 |
with gr.TabItem("Input"):
|
| 328 |
-
inp = gr.Textbox(
|
|
|
|
|
|
|
| 329 |
)
|
| 330 |
file = gr.File(file_count="single", type="file")
|
| 331 |
-
|
| 332 |
with gr.TabItem("Settings"):
|
| 333 |
with gr.Row():
|
| 334 |
mode = gr.Radio(
|
| 335 |
["All metalbinding residues (ASP, CYS, GLU, HIS)", "All residues"],
|
| 336 |
label="Residues to use for prediction",
|
| 337 |
)
|
| 338 |
-
custom_resids = gr.Textbox(
|
|
|
|
|
|
|
|
|
|
| 339 |
with gr.Row():
|
| 340 |
-
clustering_threshold = gr.Slider(
|
| 341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
btn = gr.Button("Run")
|
| 343 |
-
n = gr.Textbox(label="Label",visible=False)
|
| 344 |
-
examples = gr.Dataset(
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
examples.click(fn=set_examples, inputs=examples, outputs=examples.components)
|
| 351 |
-
#gr.Markdown(
|
| 352 |
# """ <small>Inference using CPU-only, can be quite slow for more than 20 residues. Use Colab notebook for GPU acceleration</small>
|
| 353 |
-
#"""
|
| 354 |
-
#)
|
| 355 |
-
|
| 356 |
|
| 357 |
gr.Markdown("# Output")
|
| 358 |
-
|
| 359 |
out = gr.Textbox(label="status")
|
| 360 |
mol = gr.HTML()
|
| 361 |
-
btn.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
|
| 363 |
metal3d.launch(share=True)
|
| 364 |
-
|
|
|
|
| 52 |
return "pdb code must be 4 letters or Uniprot code does not match", ""
|
| 53 |
identifier = os.path.basename(filepath)
|
| 54 |
if mode == "All residues":
|
| 55 |
+
print("using all residues")
|
| 56 |
ids = get_all_protein_resids(filepath)
|
| 57 |
+
elif len(custom_resids) != 0:
|
| 58 |
+
print("using listed residues", custom_resids)
|
| 59 |
+
ids = get_all_resids_from_list(filepath, custom_resids.replace(",", " "))
|
| 60 |
else:
|
| 61 |
+
print("using metalbinding")
|
| 62 |
ids = get_all_metalbinding_resids(filepath)
|
| 63 |
print(filepath)
|
| 64 |
print(ids)
|
| 65 |
try:
|
| 66 |
voxels, prot_centers, prot_N, prots = processStructures(filepath, ids)
|
| 67 |
except Exception as e:
|
| 68 |
+
print(e)
|
| 69 |
+
return (
|
| 70 |
+
"Error",
|
| 71 |
+
f"""<div class="text-center mt-4"> Something went wrong with the voxelization, reset custom residues and other input fiels and check error message <br> <br> <code>{e}</code></div>""",
|
| 72 |
+
)
|
| 73 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 74 |
voxels.to(device)
|
| 75 |
+
|
| 76 |
model = Model()
|
| 77 |
model.to(device)
|
| 78 |
+
model.load_state_dict(
|
| 79 |
+
torch.load(
|
| 80 |
+
"weights/metal_0.5A_v3_d0.2_16Abox.pth",
|
| 81 |
+
map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
|
| 82 |
+
)
|
| 83 |
+
)
|
| 84 |
model.eval()
|
| 85 |
with warnings.catch_warnings():
|
| 86 |
warnings.filterwarnings("ignore")
|
|
|
|
| 116 |
)
|
| 117 |
|
| 118 |
|
|
|
|
| 119 |
def read_mol(molpath):
|
| 120 |
with open(molpath, "r") as fp:
|
| 121 |
lines = fp.readlines()
|
|
|
|
| 179 |
</div>
|
| 180 |
<div class="px-4">
|
| 181 |
<label for="pdbmetal" class="relative inline-flex items-center mb-4 cursor-pointer ">
|
| 182 |
+
<input id="pdbmetal" type="checkbox" class="sr-only peer">
|
| 183 |
<div class="w-11 h-6 bg-gray-200 rounded-full peer peer-focus:ring-4 peer-focus:ring-blue-300 dark:peer-focus:ring-blue-800 dark:bg-gray-700 peer-checked:after:translate-x-full peer-checked:after:border-white after:absolute after:top-0.5 after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all dark:border-gray-600 peer-checked:bg-blue-600"></div>
|
| 184 |
<span class="ml-3 text-sm font-medium text-gray-900 dark:text-gray-300">Show PDB metals</span>
|
| 185 |
</label>
|
|
|
|
| 319 |
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
|
| 320 |
allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
|
| 321 |
|
| 322 |
+
|
| 323 |
def set_examples(example):
|
| 324 |
+
n, code, resids = example
|
| 325 |
+
return [n, code, resids]
|
| 326 |
+
|
|
|
|
|
|
|
|
|
|
| 327 |
|
| 328 |
metal3d = gr.Blocks()
|
| 329 |
|
|
|
|
| 331 |
gr.Markdown("# Metal3D")
|
| 332 |
with gr.Tabs():
|
| 333 |
with gr.TabItem("Input"):
|
| 334 |
+
inp = gr.Textbox(
|
| 335 |
+
placeholder="PDB Code or Uniprot identifier or upload file below",
|
| 336 |
+
label="Input molecule",
|
| 337 |
)
|
| 338 |
file = gr.File(file_count="single", type="file")
|
| 339 |
+
|
| 340 |
with gr.TabItem("Settings"):
|
| 341 |
with gr.Row():
|
| 342 |
mode = gr.Radio(
|
| 343 |
["All metalbinding residues (ASP, CYS, GLU, HIS)", "All residues"],
|
| 344 |
label="Residues to use for prediction",
|
| 345 |
)
|
| 346 |
+
custom_resids = gr.Textbox(
|
| 347 |
+
placeholder="Comma separated list of residues",
|
| 348 |
+
label="Custom residues",
|
| 349 |
+
)
|
| 350 |
with gr.Row():
|
| 351 |
+
clustering_threshold = gr.Slider(
|
| 352 |
+
minimum=0.15,
|
| 353 |
+
maximum=1,
|
| 354 |
+
value=0.15,
|
| 355 |
+
step=0.05,
|
| 356 |
+
label="Clustering threshold",
|
| 357 |
+
)
|
| 358 |
+
distance_cutoff = gr.Slider(
|
| 359 |
+
minimum=1,
|
| 360 |
+
maximum=10,
|
| 361 |
+
value=7,
|
| 362 |
+
step=1,
|
| 363 |
+
label="Clustering distance cutoff",
|
| 364 |
+
)
|
| 365 |
btn = gr.Button("Run")
|
| 366 |
+
n = gr.Textbox(label="Label", visible=False)
|
| 367 |
+
examples = gr.Dataset(
|
| 368 |
+
components=[n, inp, custom_resids],
|
| 369 |
+
samples=[
|
| 370 |
+
["HCA2", "2CBA", ""],
|
| 371 |
+
["Nickel in GB1 dimer", "6F5N", ""],
|
| 372 |
+
["Zebrafish palmitoyltransferase ZDHHC15B PDB", "6BMS", ""],
|
| 373 |
+
[
|
| 374 |
+
"Human palmitoyltransferase ZDHHC23 AlphaFold",
|
| 375 |
+
"Q8IYP9",
|
| 376 |
+
"280,273,263,260,274,277,274,287",
|
| 377 |
+
],
|
| 378 |
+
],
|
| 379 |
+
)
|
| 380 |
examples.click(fn=set_examples, inputs=examples, outputs=examples.components)
|
| 381 |
+
# gr.Markdown(
|
| 382 |
# """ <small>Inference using CPU-only, can be quite slow for more than 20 residues. Use Colab notebook for GPU acceleration</small>
|
| 383 |
+
# """
|
| 384 |
+
# )
|
|
|
|
| 385 |
|
| 386 |
gr.Markdown("# Output")
|
| 387 |
+
|
| 388 |
out = gr.Textbox(label="status")
|
| 389 |
mol = gr.HTML()
|
| 390 |
+
btn.click(
|
| 391 |
+
fn=update,
|
| 392 |
+
inputs=[inp, file, mode, custom_resids, clustering_threshold],
|
| 393 |
+
outputs=[out, mol],
|
| 394 |
+
)
|
| 395 |
|
| 396 |
metal3d.launch(share=True)
|
|
|
utils/helpers.py
CHANGED
|
@@ -89,7 +89,7 @@ def get_all_protein_resids(pdb_file):
|
|
| 89 |
prot = Molecule(pdb_file)
|
| 90 |
except:
|
| 91 |
exit("could not read file")
|
| 92 |
-
prot.filter("protein")
|
| 93 |
return prot.get("index", sel="name CA")
|
| 94 |
|
| 95 |
|
|
@@ -112,12 +112,13 @@ def get_all_metalbinding_resids(pdb_file):
|
|
| 112 |
prot = Molecule(pdb_file)
|
| 113 |
except:
|
| 114 |
exit("could not read file")
|
| 115 |
-
prot.filter("protein")
|
| 116 |
return prot.get(
|
| 117 |
"index",
|
| 118 |
sel="name CA and resname HIS HID HIE HIP CYS CYX GLU GLH GLN ASP ASH ASN GLN MET",
|
| 119 |
)
|
| 120 |
|
|
|
|
| 121 |
def get_all_resids_from_list(pdb_file, resids):
|
| 122 |
"""Return all metal binding residues from a pdb file
|
| 123 |
|
|
@@ -131,7 +132,7 @@ def get_all_resids_from_list(pdb_file, resids):
|
|
| 131 |
Returns
|
| 132 |
-------
|
| 133 |
resids : numpy.ndarray
|
| 134 |
-
indexes of name CA resids
|
| 135 |
|
| 136 |
"""
|
| 137 |
|
|
@@ -139,12 +140,13 @@ def get_all_resids_from_list(pdb_file, resids):
|
|
| 139 |
prot = Molecule(pdb_file)
|
| 140 |
except:
|
| 141 |
exit("could not read file")
|
| 142 |
-
prot.filter("protein")
|
| 143 |
return prot.get(
|
| 144 |
"index",
|
| 145 |
sel=f"name CA and resid {resids}",
|
| 146 |
)
|
| 147 |
|
|
|
|
| 148 |
def compute_average_p_fast(point, cutoff=1):
|
| 149 |
"""Using KDTree find the closest gridpoints
|
| 150 |
|
|
|
|
| 89 |
prot = Molecule(pdb_file)
|
| 90 |
except:
|
| 91 |
exit("could not read file")
|
| 92 |
+
prot.filter("protein and not hydrogen")
|
| 93 |
return prot.get("index", sel="name CA")
|
| 94 |
|
| 95 |
|
|
|
|
| 112 |
prot = Molecule(pdb_file)
|
| 113 |
except:
|
| 114 |
exit("could not read file")
|
| 115 |
+
prot.filter("protein and not hydrogen")
|
| 116 |
return prot.get(
|
| 117 |
"index",
|
| 118 |
sel="name CA and resname HIS HID HIE HIP CYS CYX GLU GLH GLN ASP ASH ASN GLN MET",
|
| 119 |
)
|
| 120 |
|
| 121 |
+
|
| 122 |
def get_all_resids_from_list(pdb_file, resids):
|
| 123 |
"""Return all metal binding residues from a pdb file
|
| 124 |
|
|
|
|
| 132 |
Returns
|
| 133 |
-------
|
| 134 |
resids : numpy.ndarray
|
| 135 |
+
indexes of name CA resids
|
| 136 |
|
| 137 |
"""
|
| 138 |
|
|
|
|
| 140 |
prot = Molecule(pdb_file)
|
| 141 |
except:
|
| 142 |
exit("could not read file")
|
| 143 |
+
prot.filter("protein and not hydrogen")
|
| 144 |
return prot.get(
|
| 145 |
"index",
|
| 146 |
sel=f"name CA and resid {resids}",
|
| 147 |
)
|
| 148 |
|
| 149 |
+
|
| 150 |
def compute_average_p_fast(point, cutoff=1):
|
| 151 |
"""Using KDTree find the closest gridpoints
|
| 152 |
|
utils/voxelization.py
CHANGED
|
@@ -136,7 +136,9 @@ def voxelize_single_notcentered(env):
|
|
| 136 |
voxelsize=0.5,
|
| 137 |
validitychecks=False,
|
| 138 |
)
|
| 139 |
-
except:
|
|
|
|
|
|
|
| 140 |
raise VoxelizationError(f"voxelization of {id} failed")
|
| 141 |
nchannels = prot_vox.shape[1]
|
| 142 |
prot_vox_t = (
|
|
|
|
| 136 |
voxelsize=0.5,
|
| 137 |
validitychecks=False,
|
| 138 |
)
|
| 139 |
+
except Exception as e:
|
| 140 |
+
print(e)
|
| 141 |
+
print(id)
|
| 142 |
raise VoxelizationError(f"voxelization of {id} failed")
|
| 143 |
nchannels = prot_vox.shape[1]
|
| 144 |
prot_vox_t = (
|