Spaces:
Running
Running
Commit
·
a9bf7b9
1
Parent(s):
a7360b6
cyclic
Browse files
app.py
CHANGED
|
@@ -49,57 +49,47 @@ class PeptideAnalyzer:
|
|
| 49 |
|
| 50 |
def is_cyclic(self, smiles):
|
| 51 |
"""
|
| 52 |
-
Determine if SMILES represents a cyclic peptide
|
| 53 |
Returns: (is_cyclic, peptide_cycles, aromatic_cycles)
|
| 54 |
"""
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
# Find all cycle numbers and their contexts
|
| 58 |
-
for match in re.finditer(r'(\d)', smiles):
|
| 59 |
-
number = match.group(1)
|
| 60 |
-
position = match.start(1)
|
| 61 |
-
|
| 62 |
-
if number not in cycle_info:
|
| 63 |
-
cycle_info[number] = []
|
| 64 |
-
cycle_info[number].append({
|
| 65 |
-
'position': position,
|
| 66 |
-
'full_context': smiles[max(0, position-3):min(len(smiles), position+4)]
|
| 67 |
-
})
|
| 68 |
-
|
| 69 |
-
# Check each cycle
|
| 70 |
-
peptide_cycles = []
|
| 71 |
aromatic_cycles = []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
continue
|
| 76 |
-
|
| 77 |
-
start, end = occurrences[0]['position'], occurrences[1]['position']
|
| 78 |
-
segment = smiles[start:end+1]
|
| 79 |
-
|
| 80 |
-
# Check for aromatic rings
|
| 81 |
-
full_context = smiles[max(0,start-10):min(len(smiles),end+10)]
|
| 82 |
-
is_aromatic = ('c2ccccc2' in full_context and len(segment) < 20) or \
|
| 83 |
-
('c1ccccc1' in full_context and len(segment) < 20)
|
| 84 |
-
|
| 85 |
-
# Check for peptide bonds
|
| 86 |
-
peptide_patterns = [
|
| 87 |
-
'C(=O)N', # Regular peptide bond
|
| 88 |
-
'C(=O)N(C)', # N-methylated peptide bond
|
| 89 |
-
'C(=O)N1', # Cyclic peptide bond
|
| 90 |
-
'C(=O)N2' # Cyclic peptide bond
|
| 91 |
-
]
|
| 92 |
-
|
| 93 |
-
has_peptide_bond = any(pattern in segment for pattern in peptide_patterns) and \
|
| 94 |
-
len(segment) > 20
|
| 95 |
-
|
| 96 |
-
if is_aromatic and len(segment) < 20:
|
| 97 |
-
aromatic_cycles.append(number)
|
| 98 |
-
elif has_peptide_bond:
|
| 99 |
-
peptide_cycles.append(number)
|
| 100 |
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
def split_on_bonds(self, smiles):
|
| 105 |
"""Split SMILES into segments with simplified Pro handling"""
|
|
@@ -629,7 +619,7 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False, show_segm
|
|
| 629 |
summary += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
|
| 630 |
if is_cyclic:
|
| 631 |
summary += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
|
| 632 |
-
summary += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
|
| 633 |
|
| 634 |
return summary + output_text, img_cyclic, img_linear
|
| 635 |
|
|
@@ -693,7 +683,7 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False, show_segm
|
|
| 693 |
output_text += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
|
| 694 |
if is_cyclic:
|
| 695 |
output_text += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
|
| 696 |
-
output_text += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
|
| 697 |
output_text += "-" * 50 + "\n"
|
| 698 |
|
| 699 |
return output_text, None, None
|
|
|
|
| 49 |
|
| 50 |
def is_cyclic(self, smiles):
|
| 51 |
"""
|
| 52 |
+
Determine if SMILES represents a cyclic peptide by checking head-tail connection.
|
| 53 |
Returns: (is_cyclic, peptide_cycles, aromatic_cycles)
|
| 54 |
"""
|
| 55 |
+
# First find aromatic rings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
aromatic_cycles = []
|
| 57 |
+
for match in re.finditer(r'c[12]ccccc[12]', smiles):
|
| 58 |
+
number = match.group(0)[1]
|
| 59 |
+
if number not in aromatic_cycles:
|
| 60 |
+
aromatic_cycles.append(str(number))
|
| 61 |
|
| 62 |
+
# Find potential cycle numbers and their contexts
|
| 63 |
+
cycle_closures = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
+
# Look for cycle starts and corresponding ends
|
| 66 |
+
cycle_patterns = [
|
| 67 |
+
# Pattern pairs (start, end)
|
| 68 |
+
(r'[^\d](\d)[A-Z@]', r'C\1=O$'), # Classic C=O ending
|
| 69 |
+
(r'[^\d](\d)[A-Z@]', r'N\1C\(=O\)'), # N1C(=O) pattern
|
| 70 |
+
(r'[^\d](\d)[A-Z@]', r'N\1C$'), # Simple N1C ending
|
| 71 |
+
(r'[^\d](\d)C\(=O\)', r'N\1[A-Z]'), # Reverse connection
|
| 72 |
+
(r'H(\d)', r'N\1C'), # H1...N1C pattern
|
| 73 |
+
(r'[^\d](\d)(?:C|N|O)', r'(?:C|N)\1(?:\(|$)'), # Generic cycle closure
|
| 74 |
+
]
|
| 75 |
+
|
| 76 |
+
for start_pat, end_pat in cycle_patterns:
|
| 77 |
+
start_matches = re.finditer(start_pat, smiles)
|
| 78 |
+
for start_match in start_matches:
|
| 79 |
+
number = start_match.group(1)
|
| 80 |
+
if number not in aromatic_cycles: # Skip aromatic ring numbers
|
| 81 |
+
# Look for corresponding end pattern
|
| 82 |
+
end_match = re.search(end_pat.replace('\\1', number), smiles)
|
| 83 |
+
if end_match and end_match.start() > start_match.start():
|
| 84 |
+
cycle_closures.append(number)
|
| 85 |
+
break
|
| 86 |
+
|
| 87 |
+
# Remove duplicates and aromatic numbers
|
| 88 |
+
peptide_cycles = list(set(cycle_closures) - set(aromatic_cycles))
|
| 89 |
+
|
| 90 |
+
is_cyclic = len(peptide_cycles) > 0
|
| 91 |
+
|
| 92 |
+
return is_cyclic, peptide_cycles, aromatic_cycles
|
| 93 |
|
| 94 |
def split_on_bonds(self, smiles):
|
| 95 |
"""Split SMILES into segments with simplified Pro handling"""
|
|
|
|
| 619 |
summary += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
|
| 620 |
if is_cyclic:
|
| 621 |
summary += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
|
| 622 |
+
#summary += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
|
| 623 |
|
| 624 |
return summary + output_text, img_cyclic, img_linear
|
| 625 |
|
|
|
|
| 683 |
output_text += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
|
| 684 |
if is_cyclic:
|
| 685 |
output_text += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
|
| 686 |
+
#output_text += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
|
| 687 |
output_text += "-" * 50 + "\n"
|
| 688 |
|
| 689 |
return output_text, None, None
|