Spaces:

swyx
/

BeeWeb

Sleeping

App Files Files Community

swyx commited on Oct 21, 2024

Commit

d0bef40

1 Parent(s): 9cdfb93

speaker joinign and sorting

Browse files

Files changed (1) hide show

app.py +18 -7

app.py CHANGED Viewed

@@ -80,24 +80,35 @@ def format_conversation(data: Dict[str, Any]) -> str:
         # for key in ['summary']: #, 'short_summary', 'state', 'created_at', 'updated_at']:
         #     if key in conversation:
         #         formatted += f"**{key}**: {conversation[key]}\n"
         if 'transcriptions' in conversation and conversation['transcriptions']:
             formatted += "\n\n## Transcriptions\n\n"
             last_timestamp = None
-            for utterance in conversation['transcriptions'][0].get('utterances', []):
                 current_timestamp = utterance.get('spoken_at')
-                speaker = utterance.get('speaker')
                 text = utterance.get('text')
                 if last_timestamp is not None:
                     time_diff = datetime.fromisoformat(current_timestamp.replace('Z', '+00:00')) - datetime.fromisoformat(last_timestamp.replace('Z', '+00:00'))
                     if time_diff.total_seconds() > 300:  # More than 5 minutes
                         local_time = datetime.fromisoformat(current_timestamp.replace('Z', '+00:00')).astimezone().strftime('%I:%M %p')
-                        formatted += f"[{local_time}]\n\n"
-                formatted += f"Speaker **[{speaker}](https://kagi.com/search?q={current_timestamp})**: {text}\n\n"
                 last_timestamp = current_timestamp
         return formatted
     except Exception as e:
         logging.error(f"Error formatting conversation: {str(e)}")

         # for key in ['summary']: #, 'short_summary', 'state', 'created_at', 'updated_at']:
         #     if key in conversation:
         #         formatted += f"**{key}**: {conversation[key]}\n"
         if 'transcriptions' in conversation and conversation['transcriptions']:
             formatted += "\n\n## Transcriptions\n\n"
             last_timestamp = None
+            last_speaker = None
+            # Sort utterances chronologically
+            sorted_utterances = sorted(
+                conversation['transcriptions'][0].get('utterances', []),
+                key=lambda x: x.get('spoken_at', '')
+            )
+            for utterance in sorted_utterances:
                 current_timestamp = utterance.get('spoken_at')
+                speaker = int(utterance.get('speaker')) if utterance.get('speaker', '').isdigit() else str(utterance.get('speaker', ''))
                 text = utterance.get('text')
                 if last_timestamp is not None:
                     time_diff = datetime.fromisoformat(current_timestamp.replace('Z', '+00:00')) - datetime.fromisoformat(last_timestamp.replace('Z', '+00:00'))
                     if time_diff.total_seconds() > 300:  # More than 5 minutes
                         local_time = datetime.fromisoformat(current_timestamp.replace('Z', '+00:00')).astimezone().strftime('%I:%M %p')
+                        formatted += f"\n\n[{local_time}]\n"
+                # Convert speaker to string to ensure consistent comparison
+                if speaker != last_speaker:
+                    formatted += f"\n\nSpeaker **[{speaker}](https://kagi.com/search?q={current_timestamp})**: {text}"
+                else:
+                    formatted += f" {text}"
                 last_timestamp = current_timestamp
+                last_speaker = speaker
         return formatted
     except Exception as e:
         logging.error(f"Error formatting conversation: {str(e)}")