Spaces:
Running
Running
Commit
·
6a8393c
1
Parent(s):
a9bf7b9
improve bond recognition and glycine recognition
Browse files
app.py
CHANGED
|
@@ -17,11 +17,12 @@ from rdkit import Chem
|
|
| 17 |
class PeptideAnalyzer:
|
| 18 |
def __init__(self):
|
| 19 |
self.bond_patterns = [
|
| 20 |
-
r'OC\(=O\)', #
|
| 21 |
-
r'N\(C\)C\(=O\)', # N-methylated peptide bond
|
| 22 |
-
r'N[12]
|
| 23 |
-
r'
|
| 24 |
-
r'C\(=O\)N' #
|
|
|
|
| 25 |
]
|
| 26 |
|
| 27 |
def is_peptide(self, smiles):
|
|
@@ -39,12 +40,7 @@ class PeptideAnalyzer:
|
|
| 39 |
n_methyl_pattern = Chem.MolFromSmarts('[N;H0;$(NC)](C)[C](=O)')
|
| 40 |
if mol.HasSubstructMatch(n_methyl_pattern):
|
| 41 |
return True
|
| 42 |
-
|
| 43 |
-
# Look for ester bonds in cyclic depsipeptides: OC(=O) pattern
|
| 44 |
-
ester_bond_pattern = Chem.MolFromSmarts('O[C](=O)')
|
| 45 |
-
if mol.HasSubstructMatch(ester_bond_pattern):
|
| 46 |
-
return True
|
| 47 |
-
|
| 48 |
return False
|
| 49 |
|
| 50 |
def is_cyclic(self, smiles):
|
|
@@ -107,18 +103,8 @@ class PeptideAnalyzer:
|
|
| 107 |
'pattern': match.group()
|
| 108 |
})
|
| 109 |
used.update(range(match.start(), match.end()))
|
| 110 |
-
|
| 111 |
-
# Then find all bonds, including N2C(=O)
|
| 112 |
-
bond_patterns = [
|
| 113 |
-
(r'OC\(=O\)', 'ester'),
|
| 114 |
-
(r'N\(C\)C\(=O\)', 'n_methyl'),
|
| 115 |
-
(r'N[12]C\(=O\)', 'peptide'), # Pro peptide bonds
|
| 116 |
-
(r'NC\(=O\)', 'peptide'), # Regular peptide bonds
|
| 117 |
-
(r'C\(=O\)N\(C\)', 'n_methyl'),
|
| 118 |
-
(r'C\(=O\)N[12]?', 'peptide')
|
| 119 |
-
]
|
| 120 |
|
| 121 |
-
for pattern, bond_type in bond_patterns:
|
| 122 |
for match in re.finditer(pattern, smiles):
|
| 123 |
if not any(p in range(match.start(), match.end()) for p in used):
|
| 124 |
positions.append({
|
|
@@ -216,8 +202,14 @@ class PeptideAnalyzer:
|
|
| 216 |
return '4F-Phe', mods
|
| 217 |
|
| 218 |
# Regular residue identification
|
| 219 |
-
if 'NCC(=O)' in content:
|
| 220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
if 'CC(C)C[C@H]' in content or 'CC(C)C[C@@H]' in content:
|
| 223 |
return 'Leu', mods
|
|
@@ -694,7 +686,7 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False, show_segm
|
|
| 694 |
return "No input provided.", None, None
|
| 695 |
|
| 696 |
iface = gr.Interface(
|
| 697 |
-
fn=process_input,
|
| 698 |
inputs=[
|
| 699 |
gr.Textbox(
|
| 700 |
label="Enter SMILES string",
|
|
|
|
| 17 |
class PeptideAnalyzer:
|
| 18 |
def __init__(self):
|
| 19 |
self.bond_patterns = [
|
| 20 |
+
(r'OC\(=O\)', 'ester'), # Ester bond
|
| 21 |
+
(r'N\(C\)C\(=O\)', 'n_methyl'), # N-methylated peptide bond
|
| 22 |
+
(r'N[12]C\(=O\)', 'proline'), # Proline peptide bond
|
| 23 |
+
(r'NC\(=O\)', 'peptide'), # Standard peptide bond
|
| 24 |
+
(r'C\(=O\)N\(C\)', 'n_methyl_reverse'), # Reverse N-methylated
|
| 25 |
+
(r'C\(=O\)N[12]?', 'peptide_reverse') # Reverse peptide bond
|
| 26 |
]
|
| 27 |
|
| 28 |
def is_peptide(self, smiles):
|
|
|
|
| 40 |
n_methyl_pattern = Chem.MolFromSmarts('[N;H0;$(NC)](C)[C](=O)')
|
| 41 |
if mol.HasSubstructMatch(n_methyl_pattern):
|
| 42 |
return True
|
| 43 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
return False
|
| 45 |
|
| 46 |
def is_cyclic(self, smiles):
|
|
|
|
| 103 |
'pattern': match.group()
|
| 104 |
})
|
| 105 |
used.update(range(match.start(), match.end()))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
+
for pattern, bond_type in self.bond_patterns:
|
| 108 |
for match in re.finditer(pattern, smiles):
|
| 109 |
if not any(p in range(match.start(), match.end()) for p in used):
|
| 110 |
positions.append({
|
|
|
|
| 202 |
return '4F-Phe', mods
|
| 203 |
|
| 204 |
# Regular residue identification
|
| 205 |
+
if ('NCC(=O)' in content) or (content == 'C'):
|
| 206 |
+
# Middle case - between bonds
|
| 207 |
+
if segment.get('bond_before') and segment.get('bond_after'):
|
| 208 |
+
if ('C(=O)N' in segment['bond_before'] or 'C(=O)N(C)' in segment['bond_before']):
|
| 209 |
+
return 'Gly', mods
|
| 210 |
+
# Terminal case - at the end
|
| 211 |
+
elif segment.get('bond_before') and segment.get('bond_before').startswith('C(=O)N'):
|
| 212 |
+
return 'Gly', mods
|
| 213 |
|
| 214 |
if 'CC(C)C[C@H]' in content or 'CC(C)C[C@@H]' in content:
|
| 215 |
return 'Leu', mods
|
|
|
|
| 686 |
return "No input provided.", None, None
|
| 687 |
|
| 688 |
iface = gr.Interface(
|
| 689 |
+
fn=process_input,
|
| 690 |
inputs=[
|
| 691 |
gr.Textbox(
|
| 692 |
label="Enter SMILES string",
|