speaker joinign and sorting
Browse files
app.py
CHANGED
|
@@ -80,24 +80,35 @@ def format_conversation(data: Dict[str, Any]) -> str:
|
|
| 80 |
# for key in ['summary']: #, 'short_summary', 'state', 'created_at', 'updated_at']:
|
| 81 |
# if key in conversation:
|
| 82 |
# formatted += f"**{key}**: {conversation[key]}\n"
|
| 83 |
-
|
| 84 |
if 'transcriptions' in conversation and conversation['transcriptions']:
|
| 85 |
formatted += "\n\n## Transcriptions\n\n"
|
| 86 |
last_timestamp = None
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
current_timestamp = utterance.get('spoken_at')
|
| 89 |
-
speaker = utterance.get('speaker')
|
| 90 |
text = utterance.get('text')
|
| 91 |
|
| 92 |
if last_timestamp is not None:
|
| 93 |
time_diff = datetime.fromisoformat(current_timestamp.replace('Z', '+00:00')) - datetime.fromisoformat(last_timestamp.replace('Z', '+00:00'))
|
| 94 |
if time_diff.total_seconds() > 300: # More than 5 minutes
|
| 95 |
local_time = datetime.fromisoformat(current_timestamp.replace('Z', '+00:00')).astimezone().strftime('%I:%M %p')
|
| 96 |
-
formatted += f"[{local_time}]\n
|
| 97 |
-
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
last_timestamp = current_timestamp
|
| 100 |
-
|
| 101 |
return formatted
|
| 102 |
except Exception as e:
|
| 103 |
logging.error(f"Error formatting conversation: {str(e)}")
|
|
|
|
| 80 |
# for key in ['summary']: #, 'short_summary', 'state', 'created_at', 'updated_at']:
|
| 81 |
# if key in conversation:
|
| 82 |
# formatted += f"**{key}**: {conversation[key]}\n"
|
|
|
|
| 83 |
if 'transcriptions' in conversation and conversation['transcriptions']:
|
| 84 |
formatted += "\n\n## Transcriptions\n\n"
|
| 85 |
last_timestamp = None
|
| 86 |
+
last_speaker = None
|
| 87 |
+
|
| 88 |
+
# Sort utterances chronologically
|
| 89 |
+
sorted_utterances = sorted(
|
| 90 |
+
conversation['transcriptions'][0].get('utterances', []),
|
| 91 |
+
key=lambda x: x.get('spoken_at', '')
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
for utterance in sorted_utterances:
|
| 95 |
current_timestamp = utterance.get('spoken_at')
|
| 96 |
+
speaker = int(utterance.get('speaker')) if utterance.get('speaker', '').isdigit() else str(utterance.get('speaker', ''))
|
| 97 |
text = utterance.get('text')
|
| 98 |
|
| 99 |
if last_timestamp is not None:
|
| 100 |
time_diff = datetime.fromisoformat(current_timestamp.replace('Z', '+00:00')) - datetime.fromisoformat(last_timestamp.replace('Z', '+00:00'))
|
| 101 |
if time_diff.total_seconds() > 300: # More than 5 minutes
|
| 102 |
local_time = datetime.fromisoformat(current_timestamp.replace('Z', '+00:00')).astimezone().strftime('%I:%M %p')
|
| 103 |
+
formatted += f"\n\n[{local_time}]\n"
|
| 104 |
+
# Convert speaker to string to ensure consistent comparison
|
| 105 |
+
|
| 106 |
+
if speaker != last_speaker:
|
| 107 |
+
formatted += f"\n\nSpeaker **[{speaker}](https://kagi.com/search?q={current_timestamp})**: {text}"
|
| 108 |
+
else:
|
| 109 |
+
formatted += f" {text}"
|
| 110 |
last_timestamp = current_timestamp
|
| 111 |
+
last_speaker = speaker
|
| 112 |
return formatted
|
| 113 |
except Exception as e:
|
| 114 |
logging.error(f"Error formatting conversation: {str(e)}")
|