Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| import re | |
| # Initialize pipelines | |
| summarizer = pipeline("text2text-generation", model="google/flan-t5-small", tokenizer="google/flan-t5-small") | |
| sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest") | |
| asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-small") | |
| def convert_to_json(transcript_text): | |
| lines = transcript_text.strip().split("\n") | |
| session_data = [] | |
| therapist_pattern = re.compile(r"^\s*(Therapist|T):", re.IGNORECASE) | |
| client_pattern = re.compile(r"^\s*(Client|C):", re.IGNORECASE) | |
| current_speaker = None | |
| current_text = [] | |
| for line in lines: | |
| line = line.strip() | |
| if therapist_pattern.match(line): | |
| if current_speaker and current_text: | |
| session_data.append({"speaker": current_speaker, "text": " ".join(current_text).strip()}) | |
| current_text = [] | |
| current_speaker = "Therapist" | |
| text_part = therapist_pattern.sub("", line).strip() | |
| current_text.append(text_part) | |
| elif client_pattern.match(line): | |
| if current_speaker and current_text: | |
| session_data.append({"speaker": current_speaker, "text": " ".join(current_text).strip()}) | |
| current_text = [] | |
| current_speaker = "Client" | |
| text_part = client_pattern.sub("", line).strip() | |
| current_text.append(text_part) | |
| else: | |
| if current_speaker is None: | |
| current_speaker = "Unknown" | |
| current_text.append(line) | |
| if current_speaker and current_text: | |
| session_data.append({"speaker": current_speaker, "text": " ".join(current_text).strip()}) | |
| if not session_data: | |
| session_data = [{"speaker": "Unknown", "text": transcript_text.strip()}] | |
| json_data = {"session": session_data} | |
| return json_data | |
| def analyze_session(transcript, custom_instruction, audio): | |
| # If an audio file is provided, transcribe it | |
| if audio is not None: | |
| # 'audio' will be the file path if type="filepath" | |
| asr_result = asr_pipeline(audio) | |
| transcript_text = asr_result['text'] | |
| else: | |
| transcript_text = transcript | |
| if not transcript_text.strip(): | |
| return "Please provide a transcript or an audio file." | |
| json_data = convert_to_json(transcript_text) | |
| prompt = ( | |
| "You are a helpful assistant that summarizes psychotherapy sessions. " | |
| "The session is provided in JSON format with speaker turns. " | |
| "Summarize the key themes, emotional shifts, and patterns from this session. " | |
| ) | |
| if custom_instruction.strip(): | |
| prompt += f" Additionally, {custom_instruction.strip()}" | |
| prompt += "\n\nJSON data:\n" + str(json_data) | |
| summary_output = summarizer(prompt, max_length=200, do_sample=False) | |
| summary = summary_output[0]['generated_text'].strip() | |
| sentiment_results = sentiment_analyzer(transcript_text) | |
| main_sentiment = sentiment_results[0]['label'] | |
| words = transcript_text.lower().split() | |
| keywords_of_interest = ["anxiety", "depression", "relationship", "stress", "fear", "goals", "progress", "cognitive", "behavior"] | |
| recurring_concerns = list(set([word for word in words if word in keywords_of_interest])) | |
| if not recurring_concerns: | |
| recurring_concerns_str = "No specific recurring concerns identified from the predefined list." | |
| else: | |
| recurring_concerns_str = "Recurring concerns include: " + ", ".join(recurring_concerns) | |
| follow_up_suggestions = [] | |
| if "progress" in summary.lower(): | |
| follow_up_suggestions.append("Explore client's perception of progress in more detail.") | |
| if "relationship" in summary.lower(): | |
| follow_up_suggestions.append("Discuss client's relationship dynamics further.") | |
| if not follow_up_suggestions: | |
| follow_up_suggestions.append("Consider following up on the emotional themes identified in the summary.") | |
| follow_up_suggestions_str = " ".join(follow_up_suggestions) | |
| final_output = f"**Summary of Session:**\n{summary}\n\n**Overall Sentiment:** {main_sentiment}\n\n**{recurring_concerns_str}**\n\n**Suggested Follow-Up Topics:** {follow_up_suggestions_str}" | |
| return final_output | |
| description = """# Psychotherapy Session Summarizer | |
| This tool summarizes psychotherapy session transcripts (text or audio) into key themes, emotional shifts, and patterns. | |
| **How to Use:** | |
| - You may upload an audio file of the session or paste the text transcript. | |
| - Optionally provide a custom focus or instruction (e.g., "Focus on how the client talks about their anxiety."). | |
| - Click 'Summarize' to generate a summary along with identified concerns and suggested follow-ups. | |
| **Important:** | |
| These is a tool for educational or research purpuses, should not be taken as profesional advice. I dont resposabilize for any misuse. | |
| """ | |
| with gr.Blocks() as demo: | |
| gr.Markdown(description) | |
| with gr.Row(): | |
| transcript_input = gr.Textbox(label="Session Transcript (Text)", lines=10, placeholder="Paste the session transcript here...") | |
| audio_input = gr.Audio(type="filepath", label="Session Audio (Optional)") | |
| custom_instruction_input = gr.Textbox(label="Custom Instruction (Optional)", placeholder="e.g., Focus on anxiety and coping strategies.") | |
| summarize_button = gr.Button("Summarize") | |
| output_box = gr.Markdown() | |
| summarize_button.click(fn=analyze_session, inputs=[transcript_input, custom_instruction_input, audio_input], outputs=output_box) | |
| if __name__ == "__main__": | |
| demo.launch() | |