SEASG commited on
Commit
dfe6f42
Β·
verified Β·
1 Parent(s): 5a4ccfc

Upload 5 files

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. README.md +2 -19
  3. main.py +90 -0
  4. packages.txt +2 -0
  5. requirements.txt +8 -3
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ venv/
README.md CHANGED
@@ -1,19 +1,2 @@
1
- ---
2
- title: Echomatch
3
- emoji: πŸš€
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
- pinned: false
11
- short_description: Streamlit template space
12
- ---
13
-
14
- # Welcome to Streamlit!
15
-
16
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
17
-
18
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
19
- forums](https://discuss.streamlit.io).
 
1
+ # echomatch
2
+ Transformer-Based Voice-to-Text Emotion Matching
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import tempfile
4
+ from pydub import AudioSegment
5
+ from app.audio_processing import transcribe_audio
6
+ from app.emotion_text import get_emotion, emotion_labels
7
+
8
+ # Configure Streamlit app
9
+ st.set_page_config(
10
+ page_title="EchoMatch: Emotion from Voice",
11
+ page_icon="🎧",
12
+ layout= "centered"
13
+ )
14
+
15
+ st.title("🎧 EchoMatch: Emotion from Voice")
16
+ st.markdown("Upload an audio file (MP3, WAV, M4A, FLAC, OGG) to get a text transcription and emotional analysis.")
17
+
18
+ uploaded_file = st.file_uploader(
19
+ "Upload an audio file (max 15MB recommended)",
20
+ type=["wav", "mp3", "m4a", "flac", "ogg"]
21
+ )
22
+
23
+ if uploaded_file is None:
24
+ st.info("Please upload an audio file to get started.")
25
+ else:
26
+ if uploaded_file.size > 15 * 1024 * 1024:
27
+ st.warning("File size exceeds the recommended 15MB limit. Processing may be slow or fail for larger files.")
28
+
29
+ st.audio(uploaded_file, format=uploaded_file.type)
30
+
31
+ temp_audio_path = None
32
+
33
+ try:
34
+ suffix = os.path.splitext(uploaded_file.name)[1]
35
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
36
+ tmp_file.write(uploaded_file.read())
37
+ temp_audio_path = tmp_file.name
38
+
39
+ if not temp_audio_path.lower().endswith(".wav"):
40
+ st.info("Converting audio to WAV format for reliable processing...")
41
+ try:
42
+ audio = AudioSegment.from_file(temp_audio_path)
43
+ wav_audio_path = temp_audio_path.replace(suffix, ".wav")
44
+ audio.export(wav_audio_path, format="wav")
45
+ os.remove(temp_audio_path)
46
+ temp_audio_path = wav_audio_path
47
+ except Exception as e:
48
+ st.error(f"Could not convert audio to WAV. Please ensure ffmpeg is installed and correctly configured in your system's PATH. Error: {e}")
49
+ st.stop()
50
+
51
+ st.subheader("πŸ“œ Transcription")
52
+ with st.spinner("Transcribing audio... This might take a while for longer files."):
53
+ transcribed_text = transcribe_audio(temp_audio_path)
54
+
55
+ if transcribed_text:
56
+ st.success("Transcription complete!")
57
+ st.markdown(f"**Transcribed Text:**\n\n```\n{transcribed_text}\n```")
58
+ else:
59
+ st.warning("Could not generate transcription for this audio file.")
60
+
61
+ st.subheader("πŸ˜„ Detected Emotions")
62
+ if transcribed_text:
63
+ with st.spinner("Analyzing emotions..."):
64
+ emotion_scores = get_emotion(transcribed_text)
65
+
66
+ if emotion_scores:
67
+ st.markdown("Here are the detected emotion scores:")
68
+
69
+ cols = st.columns(len(emotion_labels))
70
+ for i, label in enumerate(emotion_labels):
71
+ with cols[i]:
72
+ score_percent = f"{emotion_scores.get(label, 0.0) * 100:.2f}%"
73
+ st.metric(label.capitalize(), score_percent)
74
+
75
+ if st.checkbox("Show raw emotion scores (JSON)"):
76
+ st.json(emotion_scores)
77
+ else:
78
+ st.info("Could not detect emotions from the transcription (it might be empty or too short).")
79
+ else:
80
+ st.info("Emotion analysis skipped as transcription was not available.")
81
+
82
+ except Exception as e:
83
+ st.error(f"An unexpected error occurred during processing: {e}")
84
+ st.exception(e)
85
+ finally:
86
+ if temp_audio_path and os.path.exists(temp_audio_path):
87
+ os.remove(temp_audio_path)
88
+
89
+ st.markdown("---")
90
+ st.markdown("EchoMatch: Emotion from Voice. Powered by Whisper ASR and Hugging Face Transformers.")
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ffmpeg
2
+ libportaudio2
requirements.txt CHANGED
@@ -1,3 +1,8 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
1
+ streamlit
2
+ git+https://github.com/openai/whisper.git
3
+ pydub
4
+ scipy>=1.7.3
5
+ torch
6
+ torchaudio
7
+ transformers
8
+ ffmpeg-python