Spaces:
Runtime error
Runtime error
| import time | |
| import gradio as gr | |
| from gradio_molecule3d import Molecule3D | |
| import sys | |
| import os | |
| import os | |
| import numpy as np | |
| from rdkit import Chem | |
| from rdkit.Chem import AllChem | |
| from rdkit.Chem import Draw | |
| from rdkit.Chem.Draw import IPythonConsole | |
| from rdkit.Chem import DataStructs | |
| from rdkit.Chem import RDConfig | |
| from rdkit.Chem import rdBase | |
| import pickle | |
| from Bio.PDB import * | |
| from Bio import PDB | |
| import requests | |
| import subprocess | |
| import mdtraj as md | |
| from enspara import geometry | |
| from sklearn.cluster import DBSCAN | |
| import pandas as pd | |
| def run_smina( | |
| ligand_path, protein_path, out_path, pocket_center, pocket_size, num_poses=1, exhaustiveness=1 | |
| ): | |
| """ | |
| Perform docking with Smina. | |
| Parameters | |
| ---------- | |
| ligand_path: str or pathlib.Path | |
| Path to ligand PDBQT file that should be docked. | |
| protein_path: str or pathlib.Path | |
| Path to protein PDBQT file that should be docked to. | |
| out_path: str or pathlib.Path | |
| Path to which docking poses should be saved, SDF or PDB format. | |
| pocket_center: iterable of float or int | |
| Coordinates defining the center of the binding site. | |
| pocket_size: iterable of float or int | |
| Lengths of edges defining the binding site. | |
| num_poses: int | |
| Maximum number of poses to generate. | |
| exhaustiveness: int | |
| Accuracy of docking calculations. | |
| Returns | |
| ------- | |
| output_text: str | |
| The output of the Smina calculation. | |
| """ | |
| output_text = subprocess.check_output( | |
| [ | |
| "./smina.static", | |
| "--ligand", | |
| str(ligand_path), | |
| "--receptor", | |
| str(protein_path), | |
| "--out", | |
| str(out_path), | |
| "--center_x", | |
| str(pocket_center[0]), | |
| "--center_y", | |
| str(pocket_center[1]), | |
| "--center_z", | |
| str(pocket_center[2]), | |
| "--size_x", | |
| str(pocket_size[0]), | |
| "--size_y", | |
| str(pocket_size[1]), | |
| "--size_z", | |
| str(pocket_size[2]), | |
| "--num_modes", | |
| str(num_poses), | |
| "--exhaustiveness", | |
| str(exhaustiveness), | |
| ], | |
| universal_newlines=True, # needed to capture output text | |
| ) | |
| time.sleep(0.5) | |
| return output_text | |
| def predict (input_sequence, input_ligand, input_protein, exhaustiveness): | |
| """ | |
| Main prediction function that calls ligsite and smina | |
| Parameters | |
| ---------- | |
| input_sequence: str | |
| monomer sequence | |
| input_ligand: str | |
| ligand as SMILES string | |
| protein_path: gradio.File | |
| Gradio file object to monomer protein structure as PDB | |
| exhaustiveness: int | |
| SMINA parameter | |
| Returns | |
| ------- | |
| output_structures: tuple | |
| (output_protein, output_ligand_sdf) | |
| run_time: float | |
| run time of the program | |
| """ | |
| start_time = time.time() | |
| if input_protein==None: | |
| raise gr.Error("need pdb input") | |
| m=Chem.MolFromSmiles(input_ligand) | |
| m2=Chem.AddHs(m) | |
| AllChem.EmbedMolecule(m2) | |
| AllChem.MMFFOptimizeMolecule(m2) | |
| Chem.SDWriter("/usr/src/app/ligand.sdf").write(m2) | |
| os.system(f"obabel {input_protein.name} -xr -O /usr/src/app/receptor.pdbqt") | |
| os.system("obabel -isdf /usr/src/app/ligand.sdf -O /usr/src/app/ligand.pdbqt") | |
| #Find pocket | |
| pdb = md.load(input_protein.name) | |
| # run ligsite | |
| pockets_xyz = geometry.pockets.get_pocket_cells(struct=pdb) | |
| eps_value = 0.15 | |
| min_samples_value = 5 | |
| dbscan = DBSCAN(eps=eps_value, min_samples=min_samples_value) | |
| labels = dbscan.fit_predict(pockets_xyz) | |
| # Find the unique clusters and their sizes | |
| unique_labels, counts = np.unique(labels, return_counts=True) | |
| # Exclude noise points | |
| valid_clusters = unique_labels[unique_labels != -1] | |
| valid_counts = counts[unique_labels != -1] | |
| # Find the cluster with the most points (highest density) | |
| densest_cluster_label = valid_clusters[np.argmax(valid_counts)] | |
| densest_cluster_points = pockets_xyz[labels == densest_cluster_label] | |
| # write cluster to PDB | |
| top_df = pd.DataFrame() | |
| top_df['serial'] = list(range(densest_cluster_points.shape[0])) | |
| top_df['name'] = 'PK' | |
| top_df['element'] = 'H' | |
| top_df['resSeq'] = list(range(densest_cluster_points.shape[0])) | |
| top_df['resName'] = 'PCK' | |
| top_df['chainID'] = 0 | |
| pocket_top = md.Topology.from_dataframe(top_df, np.array([])) | |
| pocket_trj = md.Trajectory(xyz=densest_cluster_points, topology=pocket_top) | |
| pocket_trj.save('/usr/src/app/pockets_dense.pdb') | |
| parser = PDBParser() | |
| struc = parser.get_structure("X", "/usr/src/app/pockets_dense.pdb") | |
| coords = [x.coord for x in struc.get_atoms()] | |
| pocket_center = np.mean(coords, axis=0) | |
| # run smina | |
| output_text = run_smina( | |
| "/usr/src/app/ligand.pdbqt", | |
| "/usr/src/app/receptor.pdbqt", | |
| "/usr/src/app/docking_pose.sdf", | |
| pocket_center, | |
| [10,10,10], | |
| exhaustiveness=exhaustiveness | |
| ) | |
| end_time = time.time() | |
| run_time = end_time - start_time | |
| return [input_protein.name,"/usr/src/app/docking_pose.sdf"], run_time | |
| with gr.Blocks() as app: | |
| gr.Markdown("# LigSite + Smina") | |
| gr.Markdown("Example model using LigSite and DBScan to find a binding pocket in the protein and then SMINA to dock the ligand in the found pocket.") | |
| with gr.Row(): | |
| input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)") | |
| input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES") | |
| input_protein = gr.File(label="Input protein monomer") | |
| # define any options here | |
| # for automated inference the default options are used | |
| exhaustiveness = gr.Slider(1,10,value=1, label="Exhaustiveness") | |
| # checkbox_option = gr.Checkbox(label="Checkbox Option") | |
| # dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option") | |
| btn = gr.Button("Run Inference") | |
| gr.Examples( | |
| [ | |
| [ | |
| "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL", | |
| "COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O", | |
| "input_test.pdb" | |
| ], | |
| ], | |
| [input_sequence, input_ligand, input_protein], | |
| ) | |
| reps = [ | |
| { | |
| "model": 0, | |
| "style": "cartoon", | |
| "color": "whiteCarbon", | |
| }, | |
| { | |
| "model": 0, | |
| "resname": "UNK", | |
| "style": "stick", | |
| "color": "greenCarbon", | |
| }, | |
| { | |
| "model": 0, | |
| "resname": "LIG", | |
| "style": "stick", | |
| "color": "greenCarbon", | |
| }, | |
| { | |
| "model": 1, | |
| "style": "stick", | |
| "color": "greenCarbon", | |
| } | |
| ] | |
| out = Molecule3D(reps=reps) | |
| run_time = gr.Textbox(label="Runtime") | |
| btn.click(predict, inputs=[input_sequence, input_ligand, input_protein, exhaustiveness], outputs=[out, run_time]) | |
| app.launch() | |