CDK_library / app.py
AryanRajSaxena's picture
Upload folder using huggingface_hub
56793c5 verified
import gradio as gr
from CDK_pywrapper import CDK
from rdkit import Chem
from rdkit.Chem import Descriptors, Draw, AllChem, rdMolDescriptors
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit.DataStructs import TanimotoSimilarity
import pandas as pd
import numpy as np
import tempfile
# Function to convert SMILES to MolFile
def convert_smiles_to_mol(smiles_list, checkbox):
if checkbox == True:
cdk = CDK(ignore_3D=False)
else:
cdk = CDK()
smiles_list = list(smiles_list.split(','))
try:
mols = [Chem.AddHs(Chem.MolFromSmiles(smiles)) for smiles in smiles_list]
molfile = cdk.calculate(mols)
try:
with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
file_path = tmp.name
molfile.to_excel(file_path, index=False)
return molfile,file_path
except Exception as e:
return molfile,str(e)
except Exception as e:
return str(e), str(e)
# Function to calculate Molecular Weight
def calculate_molecular_weight(smiles):
if smiles is None:
return "SMILES string is None"
try:
molecule = Chem.MolFromSmiles(smiles)
if molecule is None:
return "Invalid SMILES"
mol_weight = Descriptors.MolWt(molecule)
img = Draw.MolToImage(molecule)
return mol_weight, img
except Exception as e:
return str(e)
def get_geometric_descriptors(smiles):
try:
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return "Invalid SMILES string"
# Add hydrogens and compute 3D coordinates
mol = Chem.AddHs(mol)
AllChem.EmbedMolecule(mol, AllChem.ETKDG())
AllChem.UFFOptimizeMolecule(mol)
# Calculate geometric descriptors
conformer = mol.GetConformer()
coords = conformer.GetPositions()
centroid = np.mean(coords, axis=0)
centroid = np.round(centroid, 12)
distances = np.linalg.norm(coords - centroid, axis=1)
mol_weight = Descriptors.MolWt(mol)
geometric_descriptors = {
'Molecular Weight': mol_weight,
'Centroid': centroid.tolist(),
'Mean Distance To Centroid': np.mean(distances),
'Max Distance To Centroid': np.max(distances)
}
img = Draw.MolToImage(mol)
df = pd.DataFrame([geometric_descriptors])
return df.T,img
except Exception as e:
return str(e), str(e)
# Function to check if a substructure is present
def check_substructure(smiles, substructure_smiles):
# Convert the SMILES strings to RDKit molecule objects
molecule = Chem.MolFromSmiles(smiles)
substructure = Chem.MolFromSmiles(substructure_smiles)
# Check if the molecule is None (invalid SMILES)
if molecule is None or substructure is None:
return "Error","Error","Invalid SMILES string provided."
# Use RDKit's HasSubstructMatch to check for the substructure
val = molecule.HasSubstructMatch(substructure)
img1 = Draw.MolToImage(molecule)
if val:
try:
molecule = Chem.MolFromSmiles(smiles)
sub_molecule = Chem.MolFromSmiles(substructure_smiles)
img1 = Draw.MolToImage(molecule)
img2 = Draw.MolToImage(sub_molecule)
return img1, img2, "Substructure is present."
except Exception as e:
return str(e), str(e), "Substructure is present."
else:
return img1,"NO Image","Substructure is not present."
def calculate_similarity(smiles1, smiles2):
try:
mol1 = Chem.MolFromSmiles(smiles1)
mol2 = Chem.MolFromSmiles(smiles2)
if mol1 is None or mol2 is None:
return "Invalid SMILES string"
fp1 = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol1, radius=2, nBits=2048)
fp2 = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol2, radius=2, nBits=2048)
similarity = TanimotoSimilarity(fp1, fp2)
return similarity
except Exception as e:
return str(e)
def perform_reaction(reactant1_smiles, reactant2_smiles, reaction_smarts):
try:
# Define the reaction using SMARTS provided by the user
reaction = AllChem.ReactionFromSmarts(reaction_smarts)
# Convert SMILES to RDKit molecules
reactant1 = Chem.MolFromSmiles(reactant1_smiles)
reactant2 = Chem.MolFromSmiles(reactant2_smiles)
if reactant1 is None or reactant2 is None:
return "Invalid SMILES string(s)", None
# Run the reaction
products = reaction.RunReactants((reactant1, reactant2))
# Create a grid image of reactants and products
all_mols = [reactant1, reactant2]
legends = ["Reactant 1", "Reactant 2"]
for i, product_set in enumerate(products):
for j, product in enumerate(product_set):
all_mols.append(product)
legends.append(f'Product {i+1}.{j+1}')
img = Draw.MolsToGridImage(all_mols, molsPerRow=4, subImgSize=(300, 300), legends=legends)
return "Reaction Successful", img
except Exception as e:
return str(e), None
# Gradio Interface
def generate_reaction_image(reaction_smarts,reactant1_smiles, reactant2_smiles):
result, img = perform_reaction(reactant1_smiles, reactant2_smiles, reaction_smarts)
return result, img
# Gradio Interface
with gr.Blocks(theme='earneleh/paris') as demo:
gr.Markdown("### CDK Functionality with Gradio Interface")
with gr.Tab("Calculate Descriptors"):
smiles_input = gr.Textbox(label="SMILES", info="Enter SMILES separated by comma")
checkbox = gr.Checkbox(label="Include 3D Coordinates")
molfile_output = gr.Textbox(label="MolFile", lines=10)
convert_button = gr.Button("Calculate")
download_link = gr.File(label="Download Descriptors as Excel")
convert_button.click(fn=convert_smiles_to_mol, inputs=[smiles_input, checkbox], outputs=[molfile_output,download_link])
with gr.Tab("Geometric Values"):
with gr.Row():
with gr.Column(min_width=800):
smiles_input_mw = gr.Textbox(label="SMILE")
weight_output = gr.TextArea(label="Geometric Values", lines=8, show_copy_button=True)
calculate_button = gr.Button("Calculate")
with gr.Column():
image_output = gr.Image(label="Molecular Structure", height=400, width=500)
calculate_button.click(fn=get_geometric_descriptors, inputs=smiles_input_mw,outputs=[weight_output, image_output])
with gr.Tab("Check Substructure"):
with gr.Row():
with gr.Column():
smiles_input_sub = gr.Textbox(label="SMILES")
substructure_input = gr.Textbox(label="Substructure SMILES")
substructure_output = gr.Label(label="Is Substructure Present?")
check_button = gr.Button("Check")
with gr.Column():
image_output1 = gr.Image(label="Molecular Structure", height=350, width=500)
image_output2 = gr.Image(label="Sub_Molecular Structure", height=350, width=500)
check_button.click(fn=check_substructure, inputs=[smiles_input_sub, substructure_input], outputs=[image_output1, image_output2, substructure_output])
with gr.Tab("Calculate Similarity"):
smiles_input1 = gr.Textbox(label="SMILES 1")
smiles_input2 = gr.Textbox(label="SMILES 2")
similarity_output = gr.Number(label="Similarity (Tanimoto)")
calculate_button_sim = gr.Button("Calculate Similarity")
calculate_button_sim.click(fn=calculate_similarity, inputs=[smiles_input1, smiles_input2], outputs=similarity_output)
with gr.Tab("Chemical Reaction"):
reaction_smarts_input = gr.Textbox(label="Reaction SMARTS",value="[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]")
smiles_input1 = gr.Textbox(label="Reactant 1 SMILES", value="OC=O")
smiles_input2 = gr.Textbox(label="Reactant 2 SMILES", value= "NCC")
calculate_button = gr.Button("Perform Reaction")
result_output = gr.Label(label="Result")
image_output = gr.Image(label="Reaction Image", interactive=True)
calculate_button.click(fn=generate_reaction_image, inputs=[reaction_smarts_input, smiles_input1, smiles_input2], outputs=[result_output, image_output])
# Launch Gradio Interface
demo.launch(share=True)