Spaces:
Sleeping
Sleeping
| import time | |
| import gradio as gr | |
| from gradio_molecule3d import Molecule3D | |
| import numpy as np | |
| from scipy.optimize import differential_evolution, NonlinearConstraint | |
| from biotite.structure.io.pdb import PDBFile | |
| from rdkit import Chem | |
| from rdkit.Chem import AllChem | |
| from biotite.structure import AtomArrayStack | |
| def generate_input_conformer( | |
| ligand_smiles: str, | |
| addHs: bool = False, | |
| minimize_maxIters: int = -1, | |
| ) -> Chem.Mol: | |
| _mol = Chem.MolFromSmiles(ligand_smiles) | |
| # need to add Hs to generate sensible conformers | |
| _mol = Chem.AddHs(_mol) | |
| # try embedding molecule using ETKDGv2 (default) | |
| confid = AllChem.EmbedMolecule( | |
| _mol, | |
| useRandomCoords=True, | |
| useBasicKnowledge=True, | |
| maxAttempts=100, | |
| randomSeed=42, | |
| ) | |
| if confid != -1: | |
| if minimize_maxIters > 0: | |
| # molecule successfully embedded - minimize | |
| success = AllChem.MMFFOptimizeMolecule(_mol, maxIters=minimize_maxIters) | |
| # 0 if the optimization converged, | |
| # -1 if the forcefield could not be set up, | |
| # 1 if more iterations are required. | |
| if success == 1: | |
| # extend optimization to double the steps (extends by the same amount) | |
| AllChem.MMFFOptimizeMolecule(_mol, maxIters=minimize_maxIters) | |
| else: | |
| # this means EmbedMolecule failed | |
| # try less optimal approach | |
| confid = AllChem.EmbedMolecule( | |
| _mol, | |
| useRandomCoords=True, | |
| useBasicKnowledge=False, | |
| maxAttempts=100, | |
| randomSeed=42, | |
| ) | |
| return _mol | |
| def optimize_coordinate(points, bound_buffer=15, dmin=6.02): | |
| bounds = list( | |
| zip( | |
| np.average(points, axis=0) - [bound_buffer]*3, | |
| np.average(points, axis=0) + [bound_buffer]*3 | |
| ) | |
| ) | |
| # Define the constraint function (ensure dmin distance) | |
| con = NonlinearConstraint(lambda x: np.min(np.linalg.norm(points - x, axis=1)), dmin, 8) | |
| # Define the objective function (minimize pairwise distance) | |
| def objective(x): | |
| return np.sum(np.linalg.norm(points - x, axis=1)) | |
| # Perform differential evolution to find the optimal coordinate | |
| result = differential_evolution(objective, bounds, constraints=con) | |
| return result.x, result.fun | |
| def optimize_decoy_coordinate(points, bound_buffer=15, dmin=6.02, decoy_min=4.0, decoy_max=4.98): | |
| bounds = list( | |
| zip( | |
| np.average(points, axis=0) - [bound_buffer]*3, | |
| np.average(points, axis=0) + [bound_buffer]*3 | |
| ) | |
| ) | |
| # Define the constraint function (ensure dmin distance for all but one atom) | |
| con1 = NonlinearConstraint(lambda x: np.sum(np.linalg.norm(points - x, axis=1) < dmin), 1, 1) | |
| con2 = NonlinearConstraint(lambda x: np.min(np.linalg.norm(points - x, axis=1)), decoy_min, decoy_max) | |
| # Define the objective function (maximize pairwise distance) | |
| def objective(x): | |
| return - np.sum(np.linalg.norm(points - x, axis=1)) | |
| # Perform differential evolution to find the optimal coordinate | |
| result = differential_evolution(objective, bounds, constraints=(con1, con2)) | |
| return result.x, result.fun | |
| def add_decoy_atom(structure, decoy_pos): | |
| decoy = AtomArrayStack(length=1, depth=1) | |
| decoy.coord = np.ones_like(decoy.coord) * decoy_pos | |
| decoy.chain_id = ["q"] | |
| decoy.element = ["C"] | |
| decoy.atom_name = ["C"] | |
| decoy.res_name = ["GLY"] | |
| return structure + decoy | |
| def set_protein_to_new_coord_plus_decoy_atom(input_pdb_file, new_coord, decoy_coord, output_file): | |
| structure = PDBFile.read(input_pdb_file).get_structure() | |
| structure.coord = np.ones_like(structure.coord) * np.array(new_coord) | |
| # add decoy | |
| structure = add_decoy_atom(structure, decoy_coord) | |
| file = PDBFile() | |
| file.set_structure(structure) | |
| file.write(output_file) | |
| def predict(input_sequence, input_ligand, input_msa, input_protein): | |
| start_time = time.time() | |
| # Do inference here | |
| mol = generate_input_conformer(input_ligand, minimize_maxIters=500) | |
| molwriter = Chem.SDWriter("test_docking_pose.sdf") | |
| molwriter.write(mol) | |
| # get only non hydrogen atoms | |
| heavy_atom_mask = [at.GetAtomicNum() != 1 for at in mol.GetAtoms()] | |
| mol_coords = mol.GetConformer().GetPositions()[heavy_atom_mask] | |
| # get opt coords | |
| new_coord, min_dist_sum = optimize_coordinate(mol_coords) | |
| # get mindist to protein | |
| min_dist = np.min(np.linalg.norm(mol_coords - new_coord, axis=1)) | |
| # decoy coord | |
| decoy_coord, _ = optimize_decoy_coordinate(mol_coords) | |
| decoy_min_dist = np.min(np.linalg.norm(mol_coords - decoy_coord, axis=1)) | |
| # save protein | |
| output_file = "test_out.pdb" | |
| set_protein_to_new_coord_plus_decoy_atom(input_protein, new_coord, decoy_coord, output_file) | |
| # return an output pdb file with the protein and ligand with resname LIG or UNK. | |
| # also return any metrics you want to log, metrics will not be used for evaluation but might be useful for users | |
| metrics = {"min_dist": min_dist, "min_dist_sum": min_dist_sum, "decoy_min_dist": decoy_min_dist} | |
| end_time = time.time() | |
| run_time = end_time - start_time | |
| return ["test_out.pdb", "test_docking_pose.sdf"], metrics, run_time | |
| with gr.Blocks() as app: | |
| gr.Markdown("# Template for inference") | |
| gr.Markdown("Title, description, and other information about the model") | |
| with gr.Row(): | |
| input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)") | |
| input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES") | |
| with gr.Row(): | |
| input_msa = gr.File(label="Input Protein MSA (A3M)") | |
| input_protein = gr.File(label="Input protein monomer") | |
| # define any options here | |
| # for automated inference the default options are used | |
| # slider_option = gr.Slider(0,10, label="Slider Option") | |
| # checkbox_option = gr.Checkbox(label="Checkbox Option") | |
| # dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option") | |
| btn = gr.Button("Run Inference") | |
| gr.Examples( | |
| [ | |
| [ | |
| "", | |
| "COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O", | |
| "empty_file.a3m", | |
| "test_input.pdb" | |
| ], | |
| ], | |
| [input_sequence, input_ligand, input_msa, input_protein], | |
| ) | |
| reps = [ | |
| { | |
| "model": 0, | |
| "style": "sphere", | |
| "color": "grayCarbon", | |
| }, | |
| { | |
| "model": 1, | |
| "style": "stick", | |
| "color": "greenCarbon", | |
| } | |
| ] | |
| out = Molecule3D(reps=reps) | |
| metrics = gr.JSON(label="Metrics") | |
| run_time = gr.Textbox(label="Runtime") | |
| btn.click(predict, inputs=[input_sequence, input_ligand, input_msa, input_protein], outputs=[out, metrics, run_time]) | |
| app.launch() | |