Spaces:

ChatterjeeLab
/

SMILES2PEPTIDE

Running

App Files Files Community

yinuozhang commited on Nov 17, 2024

Commit

a9bf7b9

1 Parent(s): a7360b6

cyclic

Browse files

Files changed (1) hide show

app.py +38 -48

app.py CHANGED Viewed

@@ -49,57 +49,47 @@ class PeptideAnalyzer:
     def is_cyclic(self, smiles):
         """
-        Determine if SMILES represents a cyclic peptide
         Returns: (is_cyclic, peptide_cycles, aromatic_cycles)
         """
-        cycle_info = {}
-        # Find all cycle numbers and their contexts
-        for match in re.finditer(r'(\d)', smiles):
-            number = match.group(1)
-            position = match.start(1)
-            if number not in cycle_info:
-                cycle_info[number] = []
-            cycle_info[number].append({
-                'position': position,
-                'full_context': smiles[max(0, position-3):min(len(smiles), position+4)]
-            })
-        # Check each cycle
-        peptide_cycles = []
         aromatic_cycles = []
-        for number, occurrences in cycle_info.items():
-            if len(occurrences) != 2:
-                continue
-            start, end = occurrences[0]['position'], occurrences[1]['position']
-            segment = smiles[start:end+1]
-            # Check for aromatic rings
-            full_context = smiles[max(0,start-10):min(len(smiles),end+10)]
-            is_aromatic = ('c2ccccc2' in full_context and len(segment) < 20) or \
-                         ('c1ccccc1' in full_context and len(segment) < 20)
-            # Check for peptide bonds
-            peptide_patterns = [
-                'C(=O)N',  # Regular peptide bond
-                'C(=O)N(C)',  # N-methylated peptide bond
-                'C(=O)N1',  # Cyclic peptide bond
-                'C(=O)N2'   # Cyclic peptide bond
-            ]
-            has_peptide_bond = any(pattern in segment for pattern in peptide_patterns) and \
-                              len(segment) > 20
-            if is_aromatic and len(segment) < 20:
-                aromatic_cycles.append(number)
-            elif has_peptide_bond:
-                peptide_cycles.append(number)
-        return len(peptide_cycles) > 0, peptide_cycles, aromatic_cycles
     def split_on_bonds(self, smiles):
         """Split SMILES into segments with simplified Pro handling"""
@@ -629,7 +619,7 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False, show_segm
             summary += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
             if is_cyclic:
                 summary += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
-                summary += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
             return summary + output_text, img_cyclic, img_linear
@@ -693,7 +683,7 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False, show_segm
                     output_text += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
                     if is_cyclic:
                         output_text += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
-                        output_text += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
                     output_text += "-" * 50 + "\n"
             return output_text, None, None

     def is_cyclic(self, smiles):
         """
+        Determine if SMILES represents a cyclic peptide by checking head-tail connection.
         Returns: (is_cyclic, peptide_cycles, aromatic_cycles)
         """
+        # First find aromatic rings
         aromatic_cycles = []
+        for match in re.finditer(r'c[12]ccccc[12]', smiles):
+            number = match.group(0)[1]
+            if number not in aromatic_cycles:
+                aromatic_cycles.append(str(number))
+        # Find potential cycle numbers and their contexts
+        cycle_closures = []
+        # Look for cycle starts and corresponding ends
+        cycle_patterns = [
+            # Pattern pairs (start, end)
+            (r'[^\d](\d)[A-Z@]', r'C\1=O$'),  # Classic C=O ending
+            (r'[^\d](\d)[A-Z@]', r'N\1C\(=O\)'),  # N1C(=O) pattern
+            (r'[^\d](\d)[A-Z@]', r'N\1C$'),  # Simple N1C ending
+            (r'[^\d](\d)C\(=O\)', r'N\1[A-Z]'),  # Reverse connection
+            (r'H(\d)', r'N\1C'),  # H1...N1C pattern
+            (r'[^\d](\d)(?:C|N|O)', r'(?:C|N)\1(?:\(|$)'),  # Generic cycle closure
+        ]
+        for start_pat, end_pat in cycle_patterns:
+            start_matches = re.finditer(start_pat, smiles)
+            for start_match in start_matches:
+                number = start_match.group(1)
+                if number not in aromatic_cycles:  # Skip aromatic ring numbers
+                    # Look for corresponding end pattern
+                    end_match = re.search(end_pat.replace('\\1', number), smiles)
+                    if end_match and end_match.start() > start_match.start():
+                        cycle_closures.append(number)
+                        break
+        # Remove duplicates and aromatic numbers
+        peptide_cycles = list(set(cycle_closures) - set(aromatic_cycles))
+        is_cyclic = len(peptide_cycles) > 0
+        return is_cyclic, peptide_cycles, aromatic_cycles
     def split_on_bonds(self, smiles):
         """Split SMILES into segments with simplified Pro handling"""
             summary += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
             if is_cyclic:
                 summary += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
+                #summary += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
             return summary + output_text, img_cyclic, img_linear
                     output_text += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
                     if is_cyclic:
                         output_text += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
+                        #output_text += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
                     output_text += "-" * 50 + "\n"
             return output_text, None, None