Spaces:
Running
Running
| import numpy as np | |
| import scipy.io.wavfile as wavfile | |
| from pydub import AudioSegment | |
| import io | |
| import tiktoken | |
| from openai import OpenAI | |
| def transcript_audio_func(audio_file): | |
| client = OpenAI() | |
| transcription = client.audio.transcriptions.create( | |
| model="whisper-1", | |
| file=audio_file | |
| ) | |
| return transcription.text | |
| def count_tokens(input_string: str) -> int: | |
| tokenizer = tiktoken.get_encoding("cl100k_base") | |
| tokens = tokenizer.encode(input_string) | |
| return len(tokens) | |
| # Function to calculate SNR | |
| def calculate_snr(audio_data): | |
| signal = audio_data | |
| noise = audio_data - np.mean(audio_data) | |
| signal_power = np.mean(signal ** 2) | |
| noise_power = np.mean(noise ** 2) | |
| snr = 10 * np.log10(signal_power / noise_power) | |
| return snr | |
| # Function to evaluate audio quality | |
| def evaluate_audio_quality(file) -> dict: | |
| try: | |
| audio = AudioSegment.from_file(file) | |
| except: | |
| audio = AudioSegment.from_file(io.BytesIO(file.read())) | |
| audio_data = np.array(audio.get_array_of_samples()) | |
| #number of minutes | |
| duration = len(audio_data) / audio.frame_rate*2 / 60 | |
| # Calculate volume | |
| volume = audio.dBFS | |
| # Calculate SNR | |
| snr = calculate_snr(audio_data) | |
| #get the transcription of the audio | |
| transcription = transcript_audio_func(file) | |
| audit = { | |
| "volume": volume, | |
| "SNR": snr, | |
| "duration": duration, | |
| "number_of_tokens": count_tokens(transcription), | |
| "number_of_words": len(transcription.split()) | |
| } | |
| content = { | |
| "transcription": transcription, | |
| "audio_data": audio_data, | |
| "frame_rate": audio.frame_rate | |
| } | |
| audit_global = { | |
| "audit": audit, | |
| "content": content | |
| } | |
| return audit_global | |