Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,20 +1,63 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
def speech_to_text(speech):
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
demo = gr.Blocks()
|
| 20 |
demo.encrypt = False
|
|
@@ -22,35 +65,25 @@ demo.encrypt = False
|
|
| 22 |
with demo:
|
| 23 |
with gr.Row():
|
| 24 |
with gr.Column():
|
| 25 |
-
audio = gr.Audio(label="Audio file")
|
| 26 |
with gr.Row():
|
| 27 |
btn = gr.Button("Transcribe")
|
| 28 |
-
|
| 29 |
with gr.Row():
|
| 30 |
-
examples = gr.components.Dataset(
|
| 31 |
-
components=[audio],
|
| 32 |
-
samples=[EXAMPLES],
|
| 33 |
-
type="index",
|
| 34 |
-
)
|
| 35 |
-
|
| 36 |
with gr.Column():
|
| 37 |
gr.Markdown("**Diarized Output:**")
|
| 38 |
diarized = gr.HighlightedText(lines=5, label="Diarized Output")
|
| 39 |
full = gr.Textbox(lines=4, label="Full Transcript")
|
| 40 |
-
check = gr.
|
| 41 |
-
|
| 42 |
|
| 43 |
btn.click(speech_to_text, audio, [diarized, full], status_tracker=gr.StatusTracker(cover_container=True))
|
| 44 |
-
check.change(sentiment, check,
|
| 45 |
-
|
| 46 |
def load_example(example_id):
|
| 47 |
processed_examples = audio.preprocess_example(EXAMPLES[example_id])
|
| 48 |
-
print(processed_examples)
|
| 49 |
return processed_examples
|
| 50 |
-
|
| 51 |
-
examples._click_no_postprocess(
|
| 52 |
-
load_example,
|
| 53 |
-
inputs=[examples],
|
| 54 |
-
outputs=[audio])
|
| 55 |
|
| 56 |
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from transformers import pipeline, Wav2Vec2ProcessorWithLM
|
| 3 |
+
from pyannote.audio import Pipeline
|
| 4 |
+
from librosa import load, resample
|
| 5 |
+
from rpunct import RestorePuncts
|
| 6 |
|
| 7 |
+
# Audio components
|
| 8 |
+
asr_model = 'patrickvonplaten/wav2vec2-base-960h-4-gram'
|
| 9 |
+
processor = Wav2Vec2ProcessorWithLM.from_pretrained(asr_model)
|
| 10 |
+
asr = pipeline('automatic-speech-recognition', model=asr_model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, decoder=processor.decoder)
|
| 11 |
+
speaker_segmentation = Pipeline.from_pretrained("pyannote/speaker-segmentation")
|
| 12 |
+
rpunct = RestorePuncts()
|
| 13 |
+
|
| 14 |
+
# Text components
|
| 15 |
+
sentiment_pipeline = pipeline('text-classification', model="distilbert-base-uncased-finetuned-sst-2-english")
|
| 16 |
+
sentiment_threshold = 0.75
|
| 17 |
+
|
| 18 |
+
EXAMPLES = ["example_audio.wav"]
|
| 19 |
|
| 20 |
def speech_to_text(speech):
|
| 21 |
+
speaker_output = speaker_segmentation(speech)
|
| 22 |
+
speech, sampling_rate = load(speech)
|
| 23 |
+
if sampling_rate != 16000:
|
| 24 |
+
speech = resample(speech, sampling_rate, 16000)
|
| 25 |
+
text = asr(speech, return_timestamps="word")
|
| 26 |
+
|
| 27 |
+
full_text = text['text'].lower()
|
| 28 |
+
chunks = text['chunks']
|
| 29 |
+
|
| 30 |
+
diarized_output = []
|
| 31 |
+
i = 0
|
| 32 |
+
speaker_counter = 0
|
| 33 |
+
|
| 34 |
+
# New iteration every time the speaker changes
|
| 35 |
+
for turn, _, _ in speaker_output.itertracks(yield_label=True):
|
| 36 |
+
speaker = "Speaker 0" if speaker_counter % 2 == 0 else "Speaker 1"
|
| 37 |
+
diarized = ""
|
| 38 |
+
while i < len(chunks) and chunks[i]['timestamp'][1] <= turn.end:
|
| 39 |
+
diarized += chunks[i]['text'].lower() + ' '
|
| 40 |
+
i += 1
|
| 41 |
+
|
| 42 |
+
if diarized != "":
|
| 43 |
+
diarized = rpunct.punctuate(diarized)
|
| 44 |
+
diarized_output.extend([(diarized, speaker), ('from {:.2f}-{:.2f}'.format(turn.start, turn.end), None)])
|
| 45 |
+
speaker_counter += 1
|
| 46 |
+
return diarized_output, full_text
|
| 47 |
+
|
| 48 |
+
def sentiment(checked_options, diarized):
|
| 49 |
+
customer_id = checked_options
|
| 50 |
+
customer_sentiments = []
|
| 51 |
+
|
| 52 |
+
for transcript in diarized:
|
| 53 |
+
speaker_speech, speaker_id = transcript
|
| 54 |
+
if speaker_id == customer_id:
|
| 55 |
+
output = sentiment_pipeline(speaker_speech)[0]
|
| 56 |
+
if output["label"] != "neutral" and output["score"] > sentiment_threshold:
|
| 57 |
+
customer_sentiments.append((speaker_speech, output["label"]))
|
| 58 |
+
else:
|
| 59 |
+
customer_sentiments.append(speaker_speech, None)
|
| 60 |
+
return customer_sentiments
|
| 61 |
|
| 62 |
demo = gr.Blocks()
|
| 63 |
demo.encrypt = False
|
|
|
|
| 65 |
with demo:
|
| 66 |
with gr.Row():
|
| 67 |
with gr.Column():
|
| 68 |
+
audio = gr.Audio(label="Audio file", type='filepath')
|
| 69 |
with gr.Row():
|
| 70 |
btn = gr.Button("Transcribe")
|
|
|
|
| 71 |
with gr.Row():
|
| 72 |
+
examples = gr.components.Dataset(components=[audio], samples=[EXAMPLES], type="index")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
with gr.Column():
|
| 74 |
gr.Markdown("**Diarized Output:**")
|
| 75 |
diarized = gr.HighlightedText(lines=5, label="Diarized Output")
|
| 76 |
full = gr.Textbox(lines=4, label="Full Transcript")
|
| 77 |
+
check = gr.Radio(["Speaker 0", "Speaker 1"], label='Choose speaker for sentiment analysis')
|
| 78 |
+
analyzed = gr.HighlightedText(label="Customer Sentiment")
|
| 79 |
|
| 80 |
btn.click(speech_to_text, audio, [diarized, full], status_tracker=gr.StatusTracker(cover_container=True))
|
| 81 |
+
check.change(sentiment, [check, diarized], analyzed, status_tracker=gr.StatusTracker(cover_container=True))
|
| 82 |
+
|
| 83 |
def load_example(example_id):
|
| 84 |
processed_examples = audio.preprocess_example(EXAMPLES[example_id])
|
|
|
|
| 85 |
return processed_examples
|
| 86 |
+
|
| 87 |
+
examples._click_no_postprocess(load_example, inputs=[examples], outputs=[audio])
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
demo.launch()
|