Spaces:

ChatterjeeLab
/

SMILES2PEPTIDE

Running

App Files Files Community

yinuozhang commited on May 22

Commit

bcae4e2

1 Parent(s): 22efa51

debug

Browse files

Files changed (1) hide show

app.py +68 -200

app.py CHANGED Viewed

@@ -1,13 +1,11 @@
 import os
 import gradio_client.utils as client_utils
 _original = client_utils._json_schema_to_python_type
 def _safe_json_schema_to_python_type(schema, defs=None):
     if isinstance(schema, bool):
         return "Any"
     return _original(schema, defs)
-# Override both entry points
 client_utils._json_schema_to_python_type = _safe_json_schema_to_python_type
 client_utils.json_schema_to_python_type  = _safe_json_schema_to_python_type
 import gradio as gr
@@ -37,19 +35,18 @@ class PeptideAnalyzer:
             (r'C\(=O\)N[12]?', 'peptide_reverse')  # Reverse peptide bond
         ]
         self.complex_residue_patterns = [
-            # Kpg - Lys(palmitoyl-Glu-OtBu) - Exact pattern for the specific structure
             (r'\[C[@]H\]\(CCCNC\(=O\)CCC\[C@@H\]\(NC\(=O\)CCCCCCCCCCCCCCCC\)C\(=O\)OC\(C\)\(C\)C\)', 'Kpg'),
             (r'CCCCCCCCCCCCCCCCC\(=O\)N\[C@H\]\(CCCC\(=O\)NCCC\[C@@H\]', 'Kpg'),
             (r'\[C@*H\]\(CSC\(c\d+ccccc\d+\)\(c\d+ccccc\d+\)c\d+ccc\(OC\)cc\d+\)', 'Cmt'),
-            (r'CSC\(c.*?c.*?OC\)', 'Cmt'),        # Core structure of Cys-Mmt group
-            (r'COc.*?ccc\(C\(SC', 'Cmt'),         # Start of Cmt in cyclic peptides
-            (r'c2ccccc2\)c2ccccc2\)cc', 'Cmt'),   # End of Cmt in cyclic peptides
-            # Glu(OAll) - Only match the complete pattern to avoid partial matches
             (r'C=CCOC\(=O\)CC\[C@@H\]', 'Eal'),
             (r'\(C\)OP\(=O\)\(O\)OCc\d+ccccc\d+', 'Tpb'),
             #(r'COc\d+ccc\(C\(SC\[C@@H\]\d+.*?\)\(c\d+ccccc\d+\)c\d+ccccc\d+\)cc\d+', 'Cmt-cyclic'),
-            # Dtg - Asp(OtBu)-(Dmb)Gly - Full pattern
             (r'CN\(Cc\d+ccc\(OC\)cc\d+OC\)C\(=O\)\[C@H\]\(CC\(=O\)OC\(C\)\(C\)C\)', 'Dtg'),
             (r'C\(=O\)N\(CC\d+=C\(C=C\(C=C\d+\)OC\)OC\)CC\(=O\)', 'Dtg'),
             (r'N\[C@@H\]\(CC\(=O\)OC\(C\)\(C\)C\)C\(=O\)N\(CC\d+=C\(C=C\(C=C\d+\)OC\)OC\)CC\(=O\)', 'Dtg'),
@@ -71,13 +68,10 @@ class PeptideAnalyzer:
         }
     def preprocess_complex_residues(self, smiles):
         """Identify and protect complex residues with internal peptide bonds - improved to prevent overlaps"""
-        # Create a mapping of positions to complex residue types
         complex_positions = []
-        # Search for all complex residue patterns
         for pattern, residue_type in self.complex_residue_patterns:
             for match in re.finditer(pattern, smiles):
-                # Only add if this position doesn't overlap with existing matches
                 if not any(pos['start'] <= match.start() < pos['end'] or
                           pos['start'] < match.end() <= pos['end'] for pos in complex_positions):
                     complex_positions.append({
@@ -87,56 +81,44 @@ class PeptideAnalyzer:
                         'pattern': match.group()
                     })
-        # Sort by position (to handle potential overlapping matches)
         complex_positions.sort(key=lambda x: x['start'])
-        # If no complex residues found, return original SMILES
         if not complex_positions:
             return smiles, []
-        # Build a new SMILES string, protecting complex residues
         preprocessed_smiles = smiles
-        offset = 0  # Track offset from replacements
         protected_residues = []
         for pos in complex_positions:
-            # Adjust positions based on previous replacements
             start = pos['start'] + offset
             end = pos['end'] + offset
-            # Extract the complex residue part
             complex_part = preprocessed_smiles[start:end]
-            # Verify this is a complete residue (should have proper amino acid structure)
             if not ('[C@H]' in complex_part or '[C@@H]' in complex_part):
-                continue  # Skip if not a proper amino acid structure
-            # Create a placeholder for this complex residue
             placeholder = f"COMPLEX_RESIDUE_{len(protected_residues)}"
-            # Replace the complex part with the placeholder
             preprocessed_smiles = preprocessed_smiles[:start] + placeholder + preprocessed_smiles[end:]
-            # Track the offset change
             offset += len(placeholder) - (end - start)
-            # Store the residue information
             protected_residues.append({
                 'placeholder': placeholder,
                 'type': pos['type'],
                 'content': complex_part
             })
-            #print(f"Protected {pos['type']}: {complex_part[:20]}... as {placeholder}")
         return preprocessed_smiles, protected_residues
     def split_on_bonds(self, smiles, protected_residues=None):
         """Split SMILES into segments based on peptide bonds, with improved handling of protected residues"""
         positions = []
         used = set()
-        # First, handle protected complex residues if any
         if protected_residues:
             for residue in protected_residues:
                 match = re.search(residue['placeholder'], smiles)
@@ -166,7 +148,6 @@ class PeptideAnalyzer:
                 })
                 used.update(range(match.start(), match.end()))
-        # Then find all other bonds
         for pattern, bond_type in self.bond_patterns:
             for match in re.finditer(pattern, smiles):
                 if not any(p in range(match.start(), match.end()) for p in used):
@@ -178,17 +159,13 @@ class PeptideAnalyzer:
                     })
                     used.update(range(match.start(), match.end()))
-        # Sort all positions
         bond_positions.sort(key=lambda x: x['start'])
-        # Combine complex residue positions and bond positions
         all_positions = positions + bond_positions
         all_positions.sort(key=lambda x: x['start'])
-        # Create segments
         segments = []
-        # First segment (if not starting with a bond or complex residue)
         if all_positions and all_positions[0]['start'] > 0:
             segments.append({
                 'content': smiles[0:all_positions[0]['start']],
@@ -196,12 +173,10 @@ class PeptideAnalyzer:
                 'complex_after': all_positions[0]['pattern'] if all_positions[0]['type'] == 'complex' else None
             })
-        # Process segments between positions
         for i in range(len(all_positions)-1):
             current = all_positions[i]
             next_pos = all_positions[i+1]
-            # Handle complex residues
             if current['type'] == 'complex':
                 segments.append({
                     'content': current['content'],
@@ -209,7 +184,6 @@ class PeptideAnalyzer:
                     'bond_after': next_pos['pattern'] if next_pos['type'] != 'complex' else None,
                     'complex_type': current['residue_type']
                 })
-            # Handle regular bonds
             elif current['type'] == 'gly':
                 segments.append({
                     'content': 'NCC(=O)',
@@ -217,7 +191,6 @@ class PeptideAnalyzer:
                     'bond_after': next_pos['pattern'] if next_pos['type'] != 'complex' else None
                 })
             else:
-                # Only create segment if there's content between this bond and next position
                 content = smiles[current['end']:next_pos['start']]
                 if content and next_pos['type'] != 'complex':
                     segments.append({
@@ -268,14 +241,13 @@ class PeptideAnalyzer:
         # Find all numbers used in ring closures
         ring_numbers = re.findall(r'(?:^|[^c])[0-9](?=[A-Z@\(\)])', smiles)
-        # Find aromatic ring numbers
         aromatic_matches = re.findall(r'c[0-9](?:ccccc|c\[nH\]c)[0-9]', smiles)
         aromatic_cycles = []
         for match in aromatic_matches:
             numbers = re.findall(r'[0-9]', match)
             aromatic_cycles.extend(numbers)
-        # Numbers that aren't part of aromatic rings are peptide cycles
         peptide_cycles = [n for n in ring_numbers if n not in aromatic_cycles]
         is_cyclic = len(peptide_cycles) > 0 and not smiles.endswith('C(=O)O')
@@ -309,17 +281,15 @@ class PeptideAnalyzer:
             print("DIRECT MATCH: Found Cmt at beginning")
             return 'Cmt', mods
-        # VERY EXPLICIT check for the last segment in your example
         if '[C@@H]3CCCN3C2=O)(c2ccccc2)c2ccccc2)cc' in content:
             print("DIRECT MATCH: Found Pro at end")
             return 'Pro', mods
-        # === Original amino acid patterns ===
         # Eal - Glu(OAll) - Multiple patterns
         if 'CCC(=O)OCC=C' in content or 'CC(=O)OCC=C' in content or 'C=CCOC(=O)CC' in content:
             return 'Eal', mods
-        # Proline (P) - flexible ring numbers
         if any([
-            # Check for any ring number in bond patterns
             (segment.get('bond_after', '').startswith(f'N{n}C(=O)') and 'CCC' in content and
             any(f'[C@@H]{n}' in content or f'[C@H]{n}' in content for n in '123456789'))
             for n in '123456789'
@@ -327,12 +297,11 @@ class PeptideAnalyzer:
                 any(f'CCC{n}' for n in '123456789'))
                 for n in '123456789'
         ]) or any([
-            # Check ending patterns with any ring number
             (f'CCCN{n}' in content and content.endswith('=O') and
             any(f'[C@@H]{n}' in content or f'[C@H]{n}' in content for n in '123456789'))
             for n in '123456789'
         ]) or any([
-            # Handle CCC[C@H]n patterns
             (content == f'CCC[C@H]{n}' and segment.get('bond_before', '').startswith(f'C(=O)N{n}')) or
             (content == f'CCC[C@@H]{n}' and segment.get('bond_before', '').startswith(f'C(=O)N{n}')) or
             # N-terminal Pro with any ring number
@@ -349,35 +318,29 @@ class PeptideAnalyzer:
         # Tryptophan (W) - more specific indole pattern
         if re.search(r'c[0-9]c\[nH\]c[0-9]ccccc[0-9][0-9]', content) and \
         'c[nH]c' in content.replace(' ', ''):
-            # Check stereochemistry for D/L
             if '[C@H](CC' in content:  # D-form
                 return 'trp', mods
             return 'Trp', mods
         # Lysine (K) - both patterns
         if '[C@@H](CCCCN)' in content or '[C@H](CCCCN)' in content:
-            # Check stereochemistry for D/L
             if '[C@H](CCCCN)' in content:  # D-form
                 return 'lys', mods
             return 'Lys', mods
         # Arginine (R) - both patterns
         if '[C@@H](CCCNC(=N)N)' in content or '[C@H](CCCNC(=N)N)' in content:
-            # Check stereochemistry for D/L
             if '[C@H](CCCNC(=N)N)' in content:  # D-form
                 return 'arg', mods
             return 'Arg', mods
-        # Regular residue identification
         if content == 'C' and segment.get('bond_before') and segment.get('bond_after'):
-            # If it's surrounded by peptide bonds, it's almost certainly Gly
             if ('C(=O)N' in segment['bond_before'] or 'NC(=O)' in segment['bond_before'] or 'N(C)C(=O)' in segment['bond_before']) and \
                ('NC(=O)' in segment['bond_after'] or 'C(=O)N' in segment['bond_after'] or 'N(C)C(=O)' in segment['bond_after']):
                 return 'Gly', mods
-        # Case 2: Cyclic terminal glycine - typically contains 'CNC' with ring closure
         if 'CNC' in content and any(f'C{i}=' in content for i in range(1, 10)):
-            return 'Gly', mods  # This will catch patterns like 'CNC1=O'
         if not segment.get('bond_before') and segment.get('bond_after'):
             if content == 'C' or content == 'NC':
                 if ('NC(=O)' in segment['bond_after'] or 'C(=O)N' in segment['bond_after'] or 'N(C)C(=O)' in segment['bond_after']):
@@ -385,14 +348,12 @@ class PeptideAnalyzer:
         # Leucine patterns (L/l)
         if 'CC(C)C[C@H]' in content or 'CC(C)C[C@@H]' in content or '[C@@H](CC(C)C)' in content or '[C@H](CC(C)C)' in content or (('N[C@H](CCC(C)C)' in content or 'N[C@@H](CCC(C)C)' in content) and segment.get('bond_before') is None):
-            # Check stereochemistry for D/L
             if '[C@H](CC(C)C)' in content or 'CC(C)C[C@H]' in content:  # D-form
                 return 'leu', mods
             return 'Leu', mods
         # Threonine patterns (T/t)
         if '[C@@H]([C@@H](C)O)' in content or '[C@H]([C@H](C)O)' in content or '[C@@H]([C@H](C)O)' in content or '[C@H]([C@@H](C)O)' in content:
-            # Check both stereochemistry patterns
             if '[C@H]([C@@H](C)O)' in content:  # D-form
                 return 'thr', mods
             return 'Thr', mods
@@ -402,7 +363,6 @@ class PeptideAnalyzer:
         # Phenylalanine patterns (F/f)
         if re.search(r'\[C@H\]\(Cc\d+ccccc\d+\)', content) or re.search(r'\[C@@H\]\(Cc\d+ccccc\d+\)', content):
-            # Check stereochemistry for D/L
             if re.search(r'\[C@H\]\(Cc\d+ccccc\d+\)', content):  # D-form
                 return 'phe', mods
             return 'Phe', mods
@@ -411,15 +371,12 @@ class PeptideAnalyzer:
             '[C@H](C(C)C)' in content or '[C@@H](C(C)C)' in content or
             'C(C)C[C@H]' in content or 'C(C)C[C@@H]' in content):
-            # Make sure it's not leucine
             if not any(p in content for p in ['CC(C)C[C@H]', 'CC(C)C[C@@H]', 'CCC(=O)']):
-                # Check stereochemistry
                 if '[C@H]' in content and not '[C@@H]' in content:  # D-form
                     return 'val', mods
                 return 'Val', mods
         # Isoleucine patterns (I/i)
-        # First check for various isoleucine patterns while excluding valine
         if (any(['CC[C@@H](C)' in content, '[C@@H](C)CC' in content, '[C@@H](CC)C' in content,
                 'C(C)C[C@@H]' in content, '[C@@H]([C@H](C)CC)' in content, '[C@H]([C@@H](C)CC)' in content,
                 '[C@@H]([C@@H](C)CC)' in content, '[C@H]([C@H](C)CC)' in content,
@@ -429,30 +386,26 @@ class PeptideAnalyzer:
                 'CC[C@H](C)[C@H]' in content, 'CC[C@@H](C)[C@@H]' in content])
             and 'CC(C)C' not in content):  # Exclude valine pattern
-            # Check stereochemistry for D/L forms
             if any(['[C@H]([C@@H](CC)C)' in content, '[C@H](CC)C' in content,
                     '[C@H]([C@@H](C)CC)' in content, '[C@H]([C@H](C)CC)' in content,
                     'C[C@@H](CC)[C@H]' in content, 'C[C@H](CC)[C@H]' in content,
                     'CC[C@@H](C)[C@H]' in content, 'CC[C@H](C)[C@H]' in content]):
                 # D-form
                 return 'ile', mods
-            # All other stereochemistries are treated as L-form
             return 'Ile', mods
-        # Tpb - Thr(PO(OBzl)OH) - Multiple patterns
         if re.search(r'\(C\)OP\(=O\)\(O\)OCc[0-9]ccccc[0-9]', content) or 'OP(=O)(O)OCC' in content:
             return 'Tpb', mods
         # Alanine patterns (A/a)
         if ('[C@H](C)' in content or '[C@@H](C)' in content):
             if not any(p in content for p in ['C(C)C', 'COC', 'CN(', 'C(C)O', 'CC[C@H]', 'CC[C@@H]']):
-                # Check stereochemistry for D/L
                 if '[C@H](C)' in content:  # D-form
                     return 'ala', mods
                 return 'Ala', mods
         # Tyrosine patterns (Y/y)
         if re.search(r'Cc[0-9]ccc\(O\)cc[0-9]', content):
-            # Check stereochemistry for D/L
             if '[C@H](Cc1ccc(O)cc1)' in content:  # D-form
                 return 'tyr', mods
             return 'Tyr', mods
@@ -460,25 +413,24 @@ class PeptideAnalyzer:
         # Serine patterns (S/s)
         if '[C@H](CO)' in content or '[C@@H](CO)' in content:
             if not ('C(C)O' in content or 'COC' in content):
-                # Check stereochemistry for D/L
                 if '[C@H](CO)' in content:  # D-form
                     return 'ser', mods
                 return 'Ser', mods
         if 'CSSC' in content:
-            # Check for various cysteine-cysteine bridge patterns
             if re.search(r'\[C@@H\].*CSSC.*\[C@@H\]', content) or re.search(r'\[C@H\].*CSSC.*\[C@H\]', content):
                 if '[C@H]' in content and not '[C@@H]' in content:  # D-form
                     return 'cys-cys', mods
                 return 'Cys-Cys', mods
-            # Pattern for cysteine with N-terminal amine group
             if '[C@@H](N)CSSC' in content or '[C@H](N)CSSC' in content:
                 if '[C@H](N)CSSC' in content:  # D-form
                     return 'cys-cys', mods
                 return 'Cys-Cys', mods
-            # Pattern for cysteine with C-terminal carboxyl
             if 'CSSC[C@@H](C(=O)O)' in content or 'CSSC[C@H](C(=O)O)' in content:
                 if 'CSSC[C@H](C(=O)O)' in content:  # D-form
                     return 'cys-cys', mods
@@ -486,14 +438,12 @@ class PeptideAnalyzer:
         # Cysteine patterns (C/c)
         if '[C@H](CS)' in content or '[C@@H](CS)' in content:
-            # Check stereochemistry for D/L
             if '[C@H](CS)' in content:  # D-form
                 return 'cys', mods
             return 'Cys', mods
         # Methionine patterns (M/m)
         if ('CCSC' in content) or ("CSCC" in content):
-            # Check stereochemistry for D/L
             if '[C@H](CCSC)' in content:  # D-form
                 return 'met', mods
             elif '[C@H]' in content:
@@ -502,34 +452,29 @@ class PeptideAnalyzer:
         # Glutamine patterns (Q/q)
         if (content == '[C@@H](CC' or content == '[C@H](CC' and segment.get('bond_before')=='C(=O)N' and segment.get('bond_after')=='C(=O)N') or ('CCC(=O)N' in content) or ('CCC(N)=O' in content):
-            # Check stereochemistry for D/L
             if '[C@H](CCC(=O)N)' in content:  # D-form
                 return 'gln', mods
             return 'Gln', mods
         # Asparagine patterns (N/n)
         if (content == '[C@@H](C' or content == '[C@H](C' and segment.get('bond_before')=='C(=O)N' and segment.get('bond_after')=='C(=O)N') or ('CC(=O)N' in content) or ('CCN(=O)' in content) or ('CC(N)=O' in content):
-            # Check stereochemistry for D/L
             if '[C@H](CC(=O)N)' in content:  # D-form
                 return 'asn', mods
             return 'Asn', mods
         # Glutamic acid patterns (E/e)
         if ('CCC(=O)O' in content):
-            # Check stereochemistry for D/L
             if '[C@H](CCC(=O)O)' in content:  # D-form
                 return 'glu', mods
             return 'Glu', mods
         # Aspartic acid patterns (D/d)
         if ('CC(=O)O' in content):
-            # Check stereochemistry for D/L
             if '[C@H](CC(=O)O)' in content:  # D-form
                 return 'asp', mods
             return 'Asp', mods
         if re.search(r'Cc\d+c\[nH\]cn\d+', content) or re.search(r'Cc\d+cnc\[nH\]\d+', content):
-            # Check stereochemistry for D/L
             if '[C@H]' in content:  # D-form
                 return 'his', mods
             return 'His', mods
@@ -539,29 +484,26 @@ class PeptideAnalyzer:
         if ('N[C@@H](CCCC)' in content or '[C@@H](CCCC)' in content or 'CCCC[C@@H]' in content or
             'N[C@H](CCCC)' in content or '[C@H](CCCC)' in content) and 'CC(C)' not in content:
             return 'Nle', mods
-        # Aib - alpha-aminoisobutyric acid (2-aminoisobutyric acid)
-        # More flexible pattern detection
         if 'C(C)(C)(N)' in content:
             return 'Aib', mods
-        # Partial Aib pattern but NOT part of t-butyl ester
         if 'C(C)(C)' in content and 'OC(C)(C)C' not in content:
             if (segment.get('bond_before') and segment.get('bond_after') and
                 any(bond in segment['bond_before'] for bond in ['C(=O)N', 'NC(=O)', 'N(C)C(=O)']) and
                 any(bond in segment['bond_after'] for bond in ['NC(=O)', 'C(=O)N', 'N(C)C(=O)'])):
                 return 'Aib', mods
-        # Dtg - Asp(OtBu)-(Dmb)Gly - Simplified pattern for better detection
         if 'CC(=O)OC(C)(C)C' in content and 'CC1=C(C=C(C=C1)OC)OC' in content:
             return 'Dtg', mods
-        # Kpg - Lys(palmitoyl-Glu-OtBu) - Simplified pattern
         if 'CCCNC(=O)' in content and 'CCCCCCCCCCCC' in content:
             return 'Kpg', mods
         return None, mods
     def get_modifications(self, segment):
@@ -582,67 +524,45 @@ class PeptideAnalyzer:
         return mods
-    def analyze_structure(self, smiles):
-        """Main analysis function with preprocessing for complex residues"""
-        #print("\nAnalyzing structure:", smiles)
-        # Pre-process to identify complex residues first
         preprocessed_smiles, protected_residues = self.preprocess_complex_residues(smiles)
-        """
-        if protected_residues:
-            print(f"Identified {len(protected_residues)} complex residues during pre-processing")
-            for i, residue in enumerate(protected_residues):
-                print(f"Complex residue {i+1}: {residue['type']}")
-        """
-        # Check if it's cyclic
         is_cyclic, peptide_cycles, aromatic_cycles = self.is_cyclic(smiles)
-        # Split into segments, respecting protected residues
         segments = self.split_on_bonds(preprocessed_smiles, protected_residues)
-        #print("\nSegment Analysis:")
         sequence = []
         for i, segment in enumerate(segments):
-            """
-            print(f"\nSegment {i}:")
-            print(f"Content: {segment.get('content', 'None')}")
-            print(f"Bond before: {segment.get('bond_before', 'None')}")
-            print(f"Bond after: {segment.get('bond_after', 'None')}")
-            """
             residue, mods = self.identify_residue(segment)
             if residue:
                 if mods:
                     sequence.append(f"{residue}({','.join(mods)})")
                 else:
                     sequence.append(residue)
-                #print(f"Identified as: {residue}")
-                #print(f"Modifications: {mods}")
             else:
-                print(f"Warning: Could not identify residue in segment: {segment.get('content', 'None')}")
-        # Format the sequence
         three_letter = '-'.join(sequence)
-        # Use the mapping to create one-letter code
         one_letter = ''.join(self.three_to_one.get(aa.split('(')[0], 'X') for aa in sequence)
         if is_cyclic:
             three_letter = f"cyclo({three_letter})"
             one_letter = f"cyclo({one_letter})"
-        """
-        print(f"\nFinal sequence: {three_letter}")
-        print(f"One-letter code: {one_letter}")
-        print(f"Is cyclic: {is_cyclic}")
-        print(f"Peptide cycles: {peptide_cycles}")
-        print(f"Aromatic cycles: {aromatic_cycles}")
-        """
         return {
             'three_letter': three_letter,
             'one_letter': one_letter,
             'is_cyclic': is_cyclic,
-            'residues': sequence
         }
 def annotate_cyclic_structure(mol, sequence):
@@ -651,12 +571,10 @@ def annotate_cyclic_structure(mol, sequence):
     drawer = Draw.rdMolDraw2D.MolDraw2DCairo(2000, 2000)
-    # Draw molecule first
     drawer.drawOptions().addAtomIndices = False
     drawer.DrawMolecule(mol)
     drawer.FinishDrawing()
-    # Convert to PIL Image
     img = Image.open(BytesIO(drawer.GetDrawingText()))
     draw = ImageDraw.Draw(img)
     try:
@@ -668,7 +586,6 @@ def annotate_cyclic_structure(mol, sequence):
             print("Warning: TrueType fonts not available, using default font")
             small_font = ImageFont.load_default()
-    # Header
     seq_text = f"Sequence: {sequence}"
     bbox = draw.textbbox((1000, 100), seq_text, font=small_font)
     padding = 10
@@ -751,7 +668,6 @@ def create_enhanced_linear_viz(sequence, smiles):
                 text += f" ({', '.join(mods)})"
             color = 'blue'
         else:
-            # Must be a bond
             text = f"Bond {i}: "
             if 'O-linked' in segment.get('bond_after', ''):
                 text += "ester"
@@ -893,7 +809,7 @@ class PeptideStructureGenerator:
 def process_input(
     smiles_input=None,
     file_obj=None,
-    show_linear=False,
     show_segment_details=False,
     generate_3d=False,
     use_uff=False
@@ -946,60 +862,22 @@ def process_input(
                 except Exception as e:
                     return f"Error generating 3D structures: {str(e)}", None, None, []
-            analysis = analyzer.analyze_structure(smiles)
             three_letter = analysis['three_letter']
             one_letter = analysis['one_letter']
             is_cyclic = analysis['is_cyclic']
-            # Only include segment analysis in output if requested
-            if show_segment_details:
-                segments = analyzer.split_on_bonds(smiles)
-                sequence_parts = []
-                output_text = ""
-                output_text += "Segment Analysis:\n"
-                for i, segment in enumerate(segments):
-                    output_text += f"\nSegment {i}:\n"
-                    output_text += f"Content: {segment['content']}\n"
-                    output_text += f"Bond before: {segment.get('bond_before', 'None')}\n"
-                    output_text += f"Bond after: {segment.get('bond_after', 'None')}\n"
-                    residue, mods = analyzer.identify_residue(segment)
-                    if residue:
-                        if mods:
-                            sequence_parts.append(f"{residue}({','.join(mods)})")
-                        else:
-                            sequence_parts.append(residue)
-                        output_text += f"Identified as: {residue}\n"
-                        output_text += f"Modifications: {mods}\n"
-                    else:
-                        output_text += f"Warning: Could not identify residue in segment: {segment['content']}\n"
-                output_text += "\n"
-                is_cyclic, peptide_cycles, aromatic_cycles = analyzer.is_cyclic(smiles)
-                three_letter = '-'.join(sequence_parts)
-                one_letter = ''.join(analyzer.three_to_one.get(aa.split('(')[0], 'X') for aa in sequence_parts)
-            else:
-                pass
             img_cyclic = annotate_cyclic_structure(mol, three_letter)
-            # Create linear representation if requested
-            img_linear = None
-            if show_linear:
-                fig_linear = create_enhanced_linear_viz(three_letter, smiles)
-                buf = BytesIO()
-                fig_linear.savefig(buf, format='png', bbox_inches='tight', dpi=300)
-                buf.seek(0)
-                img_linear = Image.open(buf)
-                plt.close(fig_linear)
             summary = "Summary:\n"
             summary += f"Sequence: {three_letter}\n"
             summary += f"One-letter code: {one_letter}\n"
             summary += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
-            #if is_cyclic:
-                #summary += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
-                #summary += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
             if structure_files:
                 summary += "\n3D Structures Generated:\n"
@@ -1007,11 +885,11 @@ def process_input(
                     summary += f"- {os.path.basename(filepath)}\n"
             #return summary, img_cyclic, img_linear, structure_files if structure_files else None
-            return summary, img_cyclic
         except Exception as e:
             #return f"Error processing SMILES: {str(e)}", None, None, []
-            return f"Error processing SMILES: {str(e)}", None
     # Handle file input
     if file_obj is not None:
         try:
@@ -1032,7 +910,6 @@ def process_input(
                     continue
                 try:
-                    # Process the structure
                     result = analyzer.analyze_structure(smiles)
                     output_text += f"\nSummary for SMILES: {smiles}\n"
@@ -1053,7 +930,7 @@ def process_input(
             output_text or "No analysis done.",
             img_cyclic if 'img_cyclic' in locals() else None,
             #img_linear if 'img_linear' in locals() else None,
-            #structure_files if structure_files else []
         )
 iface = gr.Interface(
@@ -1063,11 +940,24 @@ iface = gr.Interface(
             label="Enter SMILES string",
             placeholder="Enter SMILES notation of peptide...",
             lines=2
-        ),],
-        #gr.File(
-            #label="Or upload a text file with SMILES",
-            #file_types=[".txt"]
-        #)],
     outputs=[
         gr.Textbox(
             label="Analysis Results",
@@ -1077,6 +967,10 @@ iface = gr.Interface(
             label="2D Structure with Annotations",
             type="pil"
         ),
     ],
     title="Peptide Structure Analyzer and Visualizer",
     description='''
@@ -1105,30 +999,4 @@ iface = gr.Interface(
 )
 if __name__ == "__main__":
-    iface.launch(share=True)
-"""
-from fastapi import FastAPI
-import gradio as gr
-# 1) Make a FastAPI with no OpenAPI/docs routes
-app = FastAPI(docs_url=None, redoc_url=None, openapi_url=None)
-# 2) Build your Interface as before
-iface = gr.Interface(
-    fn=process_input,
-    inputs=[ gr.Textbox(label="Enter SMILES string", lines=2) ],
-    outputs=[
-      gr.Textbox(label="Analysis Results", lines=10),
-      gr.Image(label="2D Structure with Annotations", type="pil"),
-    ],
-    title="Peptide Structure Analyzer and Visualizer",
-    flagging_mode="never"
-)
-# 3) Mount it at “/”
-app = gr.mount_gradio_app(app, iface, path="/")
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)
-"""

 import os
 import gradio_client.utils as client_utils
+# Monkey path gradio_client issue
 _original = client_utils._json_schema_to_python_type
 def _safe_json_schema_to_python_type(schema, defs=None):
     if isinstance(schema, bool):
         return "Any"
     return _original(schema, defs)
 client_utils._json_schema_to_python_type = _safe_json_schema_to_python_type
 client_utils.json_schema_to_python_type  = _safe_json_schema_to_python_type
 import gradio as gr
             (r'C\(=O\)N[12]?', 'peptide_reverse')  # Reverse peptide bond
         ]
         self.complex_residue_patterns = [
             (r'\[C[@]H\]\(CCCNC\(=O\)CCC\[C@@H\]\(NC\(=O\)CCCCCCCCCCCCCCCC\)C\(=O\)OC\(C\)\(C\)C\)', 'Kpg'),
             (r'CCCCCCCCCCCCCCCCC\(=O\)N\[C@H\]\(CCCC\(=O\)NCCC\[C@@H\]', 'Kpg'),
             (r'\[C@*H\]\(CSC\(c\d+ccccc\d+\)\(c\d+ccccc\d+\)c\d+ccc\(OC\)cc\d+\)', 'Cmt'),
+            (r'CSC\(c.*?c.*?OC\)', 'Cmt'),
+            (r'COc.*?ccc\(C\(SC', 'Cmt'),
+            (r'c2ccccc2\)c2ccccc2\)cc', 'Cmt'),
+            # Glu(OAll)
             (r'C=CCOC\(=O\)CC\[C@@H\]', 'Eal'),
             (r'\(C\)OP\(=O\)\(O\)OCc\d+ccccc\d+', 'Tpb'),
             #(r'COc\d+ccc\(C\(SC\[C@@H\]\d+.*?\)\(c\d+ccccc\d+\)c\d+ccccc\d+\)cc\d+', 'Cmt-cyclic'),
+            # Dtg - Asp(OtBu)-(Dmb)Gly
             (r'CN\(Cc\d+ccc\(OC\)cc\d+OC\)C\(=O\)\[C@H\]\(CC\(=O\)OC\(C\)\(C\)C\)', 'Dtg'),
             (r'C\(=O\)N\(CC\d+=C\(C=C\(C=C\d+\)OC\)OC\)CC\(=O\)', 'Dtg'),
             (r'N\[C@@H\]\(CC\(=O\)OC\(C\)\(C\)C\)C\(=O\)N\(CC\d+=C\(C=C\(C=C\d+\)OC\)OC\)CC\(=O\)', 'Dtg'),
         }
     def preprocess_complex_residues(self, smiles):
         """Identify and protect complex residues with internal peptide bonds - improved to prevent overlaps"""
         complex_positions = []
         for pattern, residue_type in self.complex_residue_patterns:
             for match in re.finditer(pattern, smiles):
                 if not any(pos['start'] <= match.start() < pos['end'] or
                           pos['start'] < match.end() <= pos['end'] for pos in complex_positions):
                     complex_positions.append({
                         'pattern': match.group()
                     })
         complex_positions.sort(key=lambda x: x['start'])
         if not complex_positions:
             return smiles, []
         preprocessed_smiles = smiles
+        offset = 0
         protected_residues = []
         for pos in complex_positions:
             start = pos['start'] + offset
             end = pos['end'] + offset
             complex_part = preprocessed_smiles[start:end]
             if not ('[C@H]' in complex_part or '[C@@H]' in complex_part):
+                continue
             placeholder = f"COMPLEX_RESIDUE_{len(protected_residues)}"
             preprocessed_smiles = preprocessed_smiles[:start] + placeholder + preprocessed_smiles[end:]
             offset += len(placeholder) - (end - start)
             protected_residues.append({
                 'placeholder': placeholder,
                 'type': pos['type'],
                 'content': complex_part
             })
         return preprocessed_smiles, protected_residues
     def split_on_bonds(self, smiles, protected_residues=None):
         """Split SMILES into segments based on peptide bonds, with improved handling of protected residues"""
         positions = []
         used = set()
+        # Handle protected complex residues if any
         if protected_residues:
             for residue in protected_residues:
                 match = re.search(residue['placeholder'], smiles)
                 })
                 used.update(range(match.start(), match.end()))
         for pattern, bond_type in self.bond_patterns:
             for match in re.finditer(pattern, smiles):
                 if not any(p in range(match.start(), match.end()) for p in used):
                     })
                     used.update(range(match.start(), match.end()))
         bond_positions.sort(key=lambda x: x['start'])
         all_positions = positions + bond_positions
         all_positions.sort(key=lambda x: x['start'])
         segments = []
         if all_positions and all_positions[0]['start'] > 0:
             segments.append({
                 'content': smiles[0:all_positions[0]['start']],
                 'complex_after': all_positions[0]['pattern'] if all_positions[0]['type'] == 'complex' else None
             })
         for i in range(len(all_positions)-1):
             current = all_positions[i]
             next_pos = all_positions[i+1]
             if current['type'] == 'complex':
                 segments.append({
                     'content': current['content'],
                     'bond_after': next_pos['pattern'] if next_pos['type'] != 'complex' else None,
                     'complex_type': current['residue_type']
                 })
             elif current['type'] == 'gly':
                 segments.append({
                     'content': 'NCC(=O)',
                     'bond_after': next_pos['pattern'] if next_pos['type'] != 'complex' else None
                 })
             else:
                 content = smiles[current['end']:next_pos['start']]
                 if content and next_pos['type'] != 'complex':
                     segments.append({
         # Find all numbers used in ring closures
         ring_numbers = re.findall(r'(?:^|[^c])[0-9](?=[A-Z@\(\)])', smiles)
+        # Aromatic ring numbers
         aromatic_matches = re.findall(r'c[0-9](?:ccccc|c\[nH\]c)[0-9]', smiles)
         aromatic_cycles = []
         for match in aromatic_matches:
             numbers = re.findall(r'[0-9]', match)
             aromatic_cycles.extend(numbers)
         peptide_cycles = [n for n in ring_numbers if n not in aromatic_cycles]
         is_cyclic = len(peptide_cycles) > 0 and not smiles.endswith('C(=O)O')
             print("DIRECT MATCH: Found Cmt at beginning")
             return 'Cmt', mods
         if '[C@@H]3CCCN3C2=O)(c2ccccc2)c2ccccc2)cc' in content:
             print("DIRECT MATCH: Found Pro at end")
             return 'Pro', mods
         # Eal - Glu(OAll) - Multiple patterns
         if 'CCC(=O)OCC=C' in content or 'CC(=O)OCC=C' in content or 'C=CCOC(=O)CC' in content:
             return 'Eal', mods
+        # Proline (P)
         if any([
             (segment.get('bond_after', '').startswith(f'N{n}C(=O)') and 'CCC' in content and
             any(f'[C@@H]{n}' in content or f'[C@H]{n}' in content for n in '123456789'))
             for n in '123456789'
                 any(f'CCC{n}' for n in '123456789'))
                 for n in '123456789'
         ]) or any([
             (f'CCCN{n}' in content and content.endswith('=O') and
             any(f'[C@@H]{n}' in content or f'[C@H]{n}' in content for n in '123456789'))
             for n in '123456789'
         ]) or any([
+            # CCC[C@H]n
             (content == f'CCC[C@H]{n}' and segment.get('bond_before', '').startswith(f'C(=O)N{n}')) or
             (content == f'CCC[C@@H]{n}' and segment.get('bond_before', '').startswith(f'C(=O)N{n}')) or
             # N-terminal Pro with any ring number
         # Tryptophan (W) - more specific indole pattern
         if re.search(r'c[0-9]c\[nH\]c[0-9]ccccc[0-9][0-9]', content) and \
         'c[nH]c' in content.replace(' ', ''):
             if '[C@H](CC' in content:  # D-form
                 return 'trp', mods
             return 'Trp', mods
         # Lysine (K) - both patterns
         if '[C@@H](CCCCN)' in content or '[C@H](CCCCN)' in content:
             if '[C@H](CCCCN)' in content:  # D-form
                 return 'lys', mods
             return 'Lys', mods
         # Arginine (R) - both patterns
         if '[C@@H](CCCNC(=N)N)' in content or '[C@H](CCCNC(=N)N)' in content:
             if '[C@H](CCCNC(=N)N)' in content:  # D-form
                 return 'arg', mods
             return 'Arg', mods
         if content == 'C' and segment.get('bond_before') and segment.get('bond_after'):
             if ('C(=O)N' in segment['bond_before'] or 'NC(=O)' in segment['bond_before'] or 'N(C)C(=O)' in segment['bond_before']) and \
                ('NC(=O)' in segment['bond_after'] or 'C(=O)N' in segment['bond_after'] or 'N(C)C(=O)' in segment['bond_after']):
                 return 'Gly', mods
         if 'CNC' in content and any(f'C{i}=' in content for i in range(1, 10)):
+            return 'Gly', mods  #'CNC1=O'
         if not segment.get('bond_before') and segment.get('bond_after'):
             if content == 'C' or content == 'NC':
                 if ('NC(=O)' in segment['bond_after'] or 'C(=O)N' in segment['bond_after'] or 'N(C)C(=O)' in segment['bond_after']):
         # Leucine patterns (L/l)
         if 'CC(C)C[C@H]' in content or 'CC(C)C[C@@H]' in content or '[C@@H](CC(C)C)' in content or '[C@H](CC(C)C)' in content or (('N[C@H](CCC(C)C)' in content or 'N[C@@H](CCC(C)C)' in content) and segment.get('bond_before') is None):
             if '[C@H](CC(C)C)' in content or 'CC(C)C[C@H]' in content:  # D-form
                 return 'leu', mods
             return 'Leu', mods
         # Threonine patterns (T/t)
         if '[C@@H]([C@@H](C)O)' in content or '[C@H]([C@H](C)O)' in content or '[C@@H]([C@H](C)O)' in content or '[C@H]([C@@H](C)O)' in content:
             if '[C@H]([C@@H](C)O)' in content:  # D-form
                 return 'thr', mods
             return 'Thr', mods
         # Phenylalanine patterns (F/f)
         if re.search(r'\[C@H\]\(Cc\d+ccccc\d+\)', content) or re.search(r'\[C@@H\]\(Cc\d+ccccc\d+\)', content):
             if re.search(r'\[C@H\]\(Cc\d+ccccc\d+\)', content):  # D-form
                 return 'phe', mods
             return 'Phe', mods
             '[C@H](C(C)C)' in content or '[C@@H](C(C)C)' in content or
             'C(C)C[C@H]' in content or 'C(C)C[C@@H]' in content):
             if not any(p in content for p in ['CC(C)C[C@H]', 'CC(C)C[C@@H]', 'CCC(=O)']):
                 if '[C@H]' in content and not '[C@@H]' in content:  # D-form
                     return 'val', mods
                 return 'Val', mods
         # Isoleucine patterns (I/i)
         if (any(['CC[C@@H](C)' in content, '[C@@H](C)CC' in content, '[C@@H](CC)C' in content,
                 'C(C)C[C@@H]' in content, '[C@@H]([C@H](C)CC)' in content, '[C@H]([C@@H](C)CC)' in content,
                 '[C@@H]([C@@H](C)CC)' in content, '[C@H]([C@H](C)CC)' in content,
                 'CC[C@H](C)[C@H]' in content, 'CC[C@@H](C)[C@@H]' in content])
             and 'CC(C)C' not in content):  # Exclude valine pattern
             if any(['[C@H]([C@@H](CC)C)' in content, '[C@H](CC)C' in content,
                     '[C@H]([C@@H](C)CC)' in content, '[C@H]([C@H](C)CC)' in content,
                     'C[C@@H](CC)[C@H]' in content, 'C[C@H](CC)[C@H]' in content,
                     'CC[C@@H](C)[C@H]' in content, 'CC[C@H](C)[C@H]' in content]):
                 # D-form
                 return 'ile', mods
             return 'Ile', mods
+        # Tpb - Thr(PO(OBzl)OH)
         if re.search(r'\(C\)OP\(=O\)\(O\)OCc[0-9]ccccc[0-9]', content) or 'OP(=O)(O)OCC' in content:
             return 'Tpb', mods
         # Alanine patterns (A/a)
         if ('[C@H](C)' in content or '[C@@H](C)' in content):
             if not any(p in content for p in ['C(C)C', 'COC', 'CN(', 'C(C)O', 'CC[C@H]', 'CC[C@@H]']):
                 if '[C@H](C)' in content:  # D-form
                     return 'ala', mods
                 return 'Ala', mods
         # Tyrosine patterns (Y/y)
         if re.search(r'Cc[0-9]ccc\(O\)cc[0-9]', content):
             if '[C@H](Cc1ccc(O)cc1)' in content:  # D-form
                 return 'tyr', mods
             return 'Tyr', mods
         # Serine patterns (S/s)
         if '[C@H](CO)' in content or '[C@@H](CO)' in content:
             if not ('C(C)O' in content or 'COC' in content):
                 if '[C@H](CO)' in content:  # D-form
                     return 'ser', mods
                 return 'Ser', mods
         if 'CSSC' in content:
+            # cysteine-cysteine bridge
             if re.search(r'\[C@@H\].*CSSC.*\[C@@H\]', content) or re.search(r'\[C@H\].*CSSC.*\[C@H\]', content):
                 if '[C@H]' in content and not '[C@@H]' in content:  # D-form
                     return 'cys-cys', mods
                 return 'Cys-Cys', mods
+            # N-terminal amine group
             if '[C@@H](N)CSSC' in content or '[C@H](N)CSSC' in content:
                 if '[C@H](N)CSSC' in content:  # D-form
                     return 'cys-cys', mods
                 return 'Cys-Cys', mods
+            # C-terminal carboxyl
             if 'CSSC[C@@H](C(=O)O)' in content or 'CSSC[C@H](C(=O)O)' in content:
                 if 'CSSC[C@H](C(=O)O)' in content:  # D-form
                     return 'cys-cys', mods
         # Cysteine patterns (C/c)
         if '[C@H](CS)' in content or '[C@@H](CS)' in content:
             if '[C@H](CS)' in content:  # D-form
                 return 'cys', mods
             return 'Cys', mods
         # Methionine patterns (M/m)
         if ('CCSC' in content) or ("CSCC" in content):
             if '[C@H](CCSC)' in content:  # D-form
                 return 'met', mods
             elif '[C@H]' in content:
         # Glutamine patterns (Q/q)
         if (content == '[C@@H](CC' or content == '[C@H](CC' and segment.get('bond_before')=='C(=O)N' and segment.get('bond_after')=='C(=O)N') or ('CCC(=O)N' in content) or ('CCC(N)=O' in content):
             if '[C@H](CCC(=O)N)' in content:  # D-form
                 return 'gln', mods
             return 'Gln', mods
         # Asparagine patterns (N/n)
         if (content == '[C@@H](C' or content == '[C@H](C' and segment.get('bond_before')=='C(=O)N' and segment.get('bond_after')=='C(=O)N') or ('CC(=O)N' in content) or ('CCN(=O)' in content) or ('CC(N)=O' in content):
             if '[C@H](CC(=O)N)' in content:  # D-form
                 return 'asn', mods
             return 'Asn', mods
         # Glutamic acid patterns (E/e)
         if ('CCC(=O)O' in content):
             if '[C@H](CCC(=O)O)' in content:  # D-form
                 return 'glu', mods
             return 'Glu', mods
         # Aspartic acid patterns (D/d)
         if ('CC(=O)O' in content):
             if '[C@H](CC(=O)O)' in content:  # D-form
                 return 'asp', mods
             return 'Asp', mods
         if re.search(r'Cc\d+c\[nH\]cn\d+', content) or re.search(r'Cc\d+cnc\[nH\]\d+', content):
             if '[C@H]' in content:  # D-form
                 return 'his', mods
             return 'His', mods
         if ('N[C@@H](CCCC)' in content or '[C@@H](CCCC)' in content or 'CCCC[C@@H]' in content or
             'N[C@H](CCCC)' in content or '[C@H](CCCC)' in content) and 'CC(C)' not in content:
             return 'Nle', mods
         if 'C(C)(C)(N)' in content:
             return 'Aib', mods
         if 'C(C)(C)' in content and 'OC(C)(C)C' not in content:
             if (segment.get('bond_before') and segment.get('bond_after') and
                 any(bond in segment['bond_before'] for bond in ['C(=O)N', 'NC(=O)', 'N(C)C(=O)']) and
                 any(bond in segment['bond_after'] for bond in ['NC(=O)', 'C(=O)N', 'N(C)C(=O)'])):
                 return 'Aib', mods
+        # Dtg - Asp(OtBu)-(Dmb)Gly
         if 'CC(=O)OC(C)(C)C' in content and 'CC1=C(C=C(C=C1)OC)OC' in content:
             return 'Dtg', mods
+        # Kpg - Lys(palmitoyl-Glu-OtBu)
         if 'CCCNC(=O)' in content and 'CCCCCCCCCCCC' in content:
             return 'Kpg', mods
         return None, mods
     def get_modifications(self, segment):
         return mods
+    def analyze_structure(self, smiles, verbose=False):
+        logs = []
         preprocessed_smiles, protected_residues = self.preprocess_complex_residues(smiles)
         is_cyclic, peptide_cycles, aromatic_cycles = self.is_cyclic(smiles)
         segments = self.split_on_bonds(preprocessed_smiles, protected_residues)
         sequence = []
         for i, segment in enumerate(segments):
+            if verbose:
+                logs.append(f"\nSegment {i}:")
+                logs.append(f" Content: {segment.get('content','None')}")
+                logs.append(f" Bond before: {segment.get('bond_before','None')}")
+                logs.append(f" Bond after: {segment.get('bond_after','None')}")
             residue, mods = self.identify_residue(segment)
             if residue:
                 if mods:
                     sequence.append(f"{residue}({','.join(mods)})")
                 else:
                     sequence.append(residue)
             else:
+                logs.append(f"Warning: Could not identify residue in segment: {segment.get('content', 'None')}")
         three_letter = '-'.join(sequence)
         one_letter = ''.join(self.three_to_one.get(aa.split('(')[0], 'X') for aa in sequence)
         if is_cyclic:
             three_letter = f"cyclo({three_letter})"
             one_letter = f"cyclo({one_letter})"
         return {
             'three_letter': three_letter,
             'one_letter': one_letter,
             'is_cyclic': is_cyclic,
+            'residues': sequence,
+            'details': "\n".join(logs)
         }
 def annotate_cyclic_structure(mol, sequence):
     drawer = Draw.rdMolDraw2D.MolDraw2DCairo(2000, 2000)
     drawer.drawOptions().addAtomIndices = False
     drawer.DrawMolecule(mol)
     drawer.FinishDrawing()
     img = Image.open(BytesIO(drawer.GetDrawingText()))
     draw = ImageDraw.Draw(img)
     try:
             print("Warning: TrueType fonts not available, using default font")
             small_font = ImageFont.load_default()
     seq_text = f"Sequence: {sequence}"
     bbox = draw.textbbox((1000, 100), seq_text, font=small_font)
     padding = 10
                 text += f" ({', '.join(mods)})"
             color = 'blue'
         else:
             text = f"Bond {i}: "
             if 'O-linked' in segment.get('bond_after', ''):
                 text += "ester"
 def process_input(
     smiles_input=None,
     file_obj=None,
+    #show_linear=False,
     show_segment_details=False,
     generate_3d=False,
     use_uff=False
                 except Exception as e:
                     return f"Error generating 3D structures: {str(e)}", None, None, []
+            analysis = analyzer.analyze_structure(smiles, verbose=show_segment_details)
             three_letter = analysis['three_letter']
             one_letter = analysis['one_letter']
             is_cyclic = analysis['is_cyclic']
+            details = analysis.get('details', "")
             img_cyclic = annotate_cyclic_structure(mol, three_letter)
+            summary = ""
+            if show_segment_details and details:
+                summary += "Segment Analysis:\n"
+                summary += details + "\n\n"
             summary = "Summary:\n"
             summary += f"Sequence: {three_letter}\n"
             summary += f"One-letter code: {one_letter}\n"
             summary += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
             if structure_files:
                 summary += "\n3D Structures Generated:\n"
                     summary += f"- {os.path.basename(filepath)}\n"
             #return summary, img_cyclic, img_linear, structure_files if structure_files else None
+            return summary, img_cyclic, structure_files or None
         except Exception as e:
             #return f"Error processing SMILES: {str(e)}", None, None, []
+            return f"Error processing SMILES: {str(e)}", None, []
     # Handle file input
     if file_obj is not None:
         try:
                     continue
                 try:
                     result = analyzer.analyze_structure(smiles)
                     output_text += f"\nSummary for SMILES: {smiles}\n"
             output_text or "No analysis done.",
             img_cyclic if 'img_cyclic' in locals() else None,
             #img_linear if 'img_linear' in locals() else None,
+            structure_files if structure_files else []
         )
 iface = gr.Interface(
             label="Enter SMILES string",
             placeholder="Enter SMILES notation of peptide...",
             lines=2
+        ),
+        gr.File(
+            label="Or upload a text file with SMILES",
+            file_types=[".txt"]
+        ),
+        gr.Checkbox(
+            label="Show show segmentation details",
+            value=False
+        ),
+        gr.Checkbox(
+            label="Generate 3D structure (sdf file format)",
+            value=False
+        ),
+        gr.Checkbox(
+            label="Use UFF optimization (may take long)",
+            value=False
+        )
+        ],
     outputs=[
         gr.Textbox(
             label="Analysis Results",
             label="2D Structure with Annotations",
             type="pil"
         ),
+        gr.File(
+            label="3D Structure Files",
+            file_count="multiple"
+        )
     ],
     title="Peptide Structure Analyzer and Visualizer",
     description='''
 )
 if __name__ == "__main__":
+    iface.launch(share=True)