Spaces:

Rogerjs
/

SessionSummarizer

Sleeping

App Files Files Community

SessionSummarizer / app.py

Rogerjs

Update app.py

0e23aca verified 11 months ago

raw

history blame contribute delete

5.66 kB

	import gradio as gr
	from transformers import pipeline
	import re

	# Initialize pipelines
	summarizer = pipeline("text2text-generation", model="google/flan-t5-small", tokenizer="google/flan-t5-small")
	sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
	asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-small")

	def convert_to_json(transcript_text):
	lines = transcript_text.strip().split("\n")
	session_data = []

	therapist_pattern = re.compile(r"^\s*(Therapist\|T):", re.IGNORECASE)
	client_pattern = re.compile(r"^\s*(Client\|C):", re.IGNORECASE)

	current_speaker = None
	current_text = []

	for line in lines:
	line = line.strip()
	if therapist_pattern.match(line):
	if current_speaker and current_text:
	session_data.append({"speaker": current_speaker, "text": " ".join(current_text).strip()})
	current_text = []
	current_speaker = "Therapist"
	text_part = therapist_pattern.sub("", line).strip()
	current_text.append(text_part)
	elif client_pattern.match(line):
	if current_speaker and current_text:
	session_data.append({"speaker": current_speaker, "text": " ".join(current_text).strip()})
	current_text = []
	current_speaker = "Client"
	text_part = client_pattern.sub("", line).strip()
	current_text.append(text_part)
	else:
	if current_speaker is None:
	current_speaker = "Unknown"
	current_text.append(line)

	if current_speaker and current_text:
	session_data.append({"speaker": current_speaker, "text": " ".join(current_text).strip()})

	if not session_data:
	session_data = [{"speaker": "Unknown", "text": transcript_text.strip()}]

	json_data = {"session": session_data}
	return json_data

	def analyze_session(transcript, custom_instruction, audio):
	# If an audio file is provided, transcribe it
	if audio is not None:
	# 'audio' will be the file path if type="filepath"
	asr_result = asr_pipeline(audio)
	transcript_text = asr_result['text']
	else:
	transcript_text = transcript

	if not transcript_text.strip():
	return "Please provide a transcript or an audio file."

	json_data = convert_to_json(transcript_text)

	prompt = (
	"You are a helpful assistant that summarizes psychotherapy sessions. "
	"The session is provided in JSON format with speaker turns. "
	"Summarize the key themes, emotional shifts, and patterns from this session. "
	)
	if custom_instruction.strip():
	prompt += f" Additionally, {custom_instruction.strip()}"
	prompt += "\n\nJSON data:\n" + str(json_data)

	summary_output = summarizer(prompt, max_length=200, do_sample=False)
	summary = summary_output[0]['generated_text'].strip()

	sentiment_results = sentiment_analyzer(transcript_text)
	main_sentiment = sentiment_results[0]['label']

	words = transcript_text.lower().split()
	keywords_of_interest = ["anxiety", "depression", "relationship", "stress", "fear", "goals", "progress", "cognitive", "behavior"]
	recurring_concerns = list(set([word for word in words if word in keywords_of_interest]))
	if not recurring_concerns:
	recurring_concerns_str = "No specific recurring concerns identified from the predefined list."
	else:
	recurring_concerns_str = "Recurring concerns include: " + ", ".join(recurring_concerns)

	follow_up_suggestions = []
	if "progress" in summary.lower():
	follow_up_suggestions.append("Explore client's perception of progress in more detail.")
	if "relationship" in summary.lower():
	follow_up_suggestions.append("Discuss client's relationship dynamics further.")
	if not follow_up_suggestions:
	follow_up_suggestions.append("Consider following up on the emotional themes identified in the summary.")
	follow_up_suggestions_str = " ".join(follow_up_suggestions)

	final_output = f"Summary of Session:\n{summary}\n\nOverall Sentiment: {main_sentiment}\n\n{recurring_concerns_str}\n\nSuggested Follow-Up Topics: {follow_up_suggestions_str}"
	return final_output

	description = """# Psychotherapy Session Summarizer

	This tool summarizes psychotherapy session transcripts (text or audio) into key themes, emotional shifts, and patterns.

	How to Use:
	- You may upload an audio file of the session or paste the text transcript.
	- Optionally provide a custom focus or instruction (e.g., "Focus on how the client talks about their anxiety.").
	- Click 'Summarize' to generate a summary along with identified concerns and suggested follow-ups.

	Important:
	These is a tool for educational or research purpuses, should not be taken as profesional advice. I dont resposabilize for any misuse.
	"""

	with gr.Blocks() as demo:
	gr.Markdown(description)
	with gr.Row():
	transcript_input = gr.Textbox(label="Session Transcript (Text)", lines=10, placeholder="Paste the session transcript here...")
	audio_input = gr.Audio(type="filepath", label="Session Audio (Optional)")
	custom_instruction_input = gr.Textbox(label="Custom Instruction (Optional)", placeholder="e.g., Focus on anxiety and coping strategies.")
	summarize_button = gr.Button("Summarize")
	output_box = gr.Markdown()

	summarize_button.click(fn=analyze_session, inputs=[transcript_input, custom_instruction_input, audio_input], outputs=output_box)

	if __name__ == "__main__":
	demo.launch()