Spaces:
Build error
Build error
| let websocket_uri = 'ws://localhost:6006'; | |
| let websocket_audio_uri = 'ws://localhost:8888'; | |
| let bufferSize = 4096, | |
| AudioContext, | |
| context, | |
| processor, | |
| input, | |
| websocket; | |
| var intervalFunction = null; | |
| var recordingTime = 0; | |
| var server_state = 0; | |
| var websocket_audio = null; | |
| let audioContext_tts = null; | |
| var you_name = "Marcus" | |
| var audioContext = null; | |
| var audioWorkletNode = null; | |
| var audio_state = 0; | |
| var available_transcription_elements = 0; | |
| var available_llm_elements = 0; | |
| var available_audio_elements = 0; | |
| var llm_outputs = []; | |
| var new_transcription_element_state = true; | |
| var audio_sources = []; | |
| var audio_source = null; | |
| initWebSocket(); | |
| const zeroPad = (num, places) => String(num).padStart(places, '0') | |
| const generateUUID = () => { | |
| let dt = new Date().getTime(); | |
| return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, c => { | |
| const r = (dt + Math.random() * 16) % 16 | 0; | |
| dt = Math.floor(dt / 16); | |
| return (c === 'x' ? r : (r & 0x3 | 0x8)).toString(16); | |
| }); | |
| }; | |
| function recording_timer() { | |
| recordingTime++; | |
| document.getElementById("recording-time").innerHTML = zeroPad(parseInt(recordingTime / 60), 2) + ":" + zeroPad(parseInt(recordingTime % 60), 2) + "s"; | |
| } | |
| const start_recording = async () => { | |
| console.log(audioContext) | |
| try { | |
| if (audioContext) { | |
| await audioContext.resume(); | |
| const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); | |
| if (!audioContext) return; | |
| console.log(audioContext?.state); | |
| await audioContext.audioWorklet.addModule("js/audio-processor.js"); | |
| const source = audioContext.createMediaStreamSource(stream); | |
| audioWorkletNode = new AudioWorkletNode(audioContext, "audio-stream-processor"); | |
| audioWorkletNode.port.onmessage = (event) => { | |
| if (server_state != 1) { | |
| console.log("server is not ready!!") | |
| return; | |
| } | |
| const audioData = event.data; | |
| if (websocket && websocket.readyState === WebSocket.OPEN && audio_state == 0) { | |
| websocket.send(audioData.buffer); | |
| console.log("send data") | |
| } | |
| }; | |
| source.connect(audioWorkletNode); | |
| } | |
| } catch (e) { | |
| console.log("Error", e); | |
| } | |
| }; | |
| const handleStartRecording = async () => { | |
| start_recording(); | |
| }; | |
| const startRecording = async () => { | |
| document.getElementById("instructions-text").style.display = "none"; | |
| document.getElementById("control-container").style.backgroundColor = "white"; | |
| AudioContext = window.AudioContext || window.webkitAudioContext; | |
| audioContext = new AudioContext({ latencyHint: 'interactive', sampleRate: 16000 }); | |
| audioContext_tts = new AudioContext({ sampleRate: 24000 }); | |
| document.getElementById("recording-stop-btn").style.display = "block"; | |
| document.getElementById("recording-dot").style.display = "block"; | |
| document.getElementById("recording-line").style.display = "block"; | |
| document.getElementById("recording-time").style.display = "block"; | |
| intervalFunction = setInterval(recording_timer, 1000); | |
| await handleStartRecording(); | |
| }; | |
| function stopRecording() { | |
| audio_state = 1; | |
| clearInterval(intervalFunction); | |
| } | |
| function initWebSocket() { | |
| websocket_audio = new WebSocket(websocket_audio_uri); | |
| websocket_audio.binaryType = "arraybuffer"; | |
| websocket_audio.onopen = function() { } | |
| websocket_audio.onclose = function(e) { } | |
| websocket_audio.onmessage = function(e) { | |
| available_audio_elements++; | |
| let float32Array = new Float32Array(e.data); | |
| let audioBuffer = audioContext_tts.createBuffer(1, float32Array.length, 24000); | |
| audioBuffer.getChannelData(0).set(float32Array); | |
| new_whisper_speech_audio_element("audio-" + available_audio_elements, Math.floor(audioBuffer.duration)); | |
| audio_sources.push(audioBuffer); | |
| audio_source = audioContext_tts.createBufferSource(); | |
| audio_source.buffer = audioBuffer; | |
| audio_source.connect(audioContext_tts.destination); | |
| audio_source.start(); | |
| window.scrollTo({ top: document.body.scrollHeight, behavior: 'smooth' }); | |
| } | |
| websocket = new WebSocket(websocket_uri); | |
| websocket.binaryType = "arraybuffer"; | |
| console.log("Websocket created."); | |
| websocket.onopen = function() { | |
| console.log("Connected to server."); | |
| websocket.send(JSON.stringify({ | |
| uid: generateUUID(), | |
| multilingual: false, | |
| language: "en", | |
| task: "transcribe" | |
| })); | |
| } | |
| websocket.onclose = function(e) { | |
| console.log("Connection closed (" + e.code + ")."); | |
| } | |
| websocket.onmessage = function(e) { | |
| var data = JSON.parse(e.data); | |
| if ("message" in data) { | |
| if (data["message"] == "SERVER_READY") { | |
| server_state = 1; | |
| } | |
| } else if ("segments" in data) { | |
| if (new_transcription_element_state) { | |
| available_transcription_elements = available_transcription_elements + 1; | |
| var img_src = "0.png"; | |
| if (you_name.toLowerCase() == "marcus") { | |
| you_name = "Marcus"; | |
| img_src = "0.png"; | |
| } else if (you_name.toLowerCase() == "vineet") { | |
| you_name = "Vineet"; | |
| img_src = "1.png"; | |
| } else if (you_name.toLowerCase() == "jakub") { | |
| you_name = "Jakub"; | |
| img_src = "2.png"; | |
| } | |
| new_transcription_element(you_name, img_src); | |
| new_text_element("<p>" + data["segments"][0].text + "</p>", "transcription-" + available_transcription_elements); | |
| new_transcription_element_state = false; | |
| } | |
| document.getElementById("transcription-" + available_transcription_elements).innerHTML = "<p>" + data["segments"][0].text + "</p>"; | |
| if (data["eos"] == true) { | |
| new_transcription_element_state = true; | |
| } | |
| } else if ("llm_output" in data) { | |
| new_transcription_element("Phi-2", "Phi.svg"); | |
| new_text_element("<p>" + data["llm_output"][0] + "</p>", "llm-" + available_transcription_elements); | |
| } | |
| window.scrollTo({ top: document.body.scrollHeight, behavior: 'smooth' }); | |
| } | |
| } | |
| function new_transcription_element(speaker_name, speaker_avatar) { | |
| var avatar_container = document.createElement("div"); | |
| avatar_container.className = "avatar-container"; | |
| var avatar_img = document.createElement("div"); | |
| avatar_img.innerHTML = "<img class='avatar' src='img/" + speaker_avatar + "' \>"; | |
| var avatar_name = document.createElement("div"); | |
| avatar_name.className = "avatar-name"; | |
| avatar_name.innerHTML = speaker_name; | |
| var dummy_element = document.createElement("div"); | |
| avatar_container.appendChild(avatar_img); | |
| avatar_container.appendChild(avatar_name); | |
| avatar_container.appendChild(dummy_element); | |
| document.getElementById("main-wrapper").appendChild(avatar_container); | |
| } | |
| function new_text_element(text, id) { | |
| var text_container = document.createElement("div"); | |
| text_container.className = "text-container"; | |
| text_container.style.maxWidth = "500px"; | |
| var text_element = document.createElement("div"); | |
| text_element.id = id; | |
| text_element.innerHTML = "<p>" + text + "</p>"; | |
| var dummy_element = document.createElement("div"); | |
| text_container.appendChild(text_element); | |
| text_container.appendChild(dummy_element); | |
| document.getElementById("main-wrapper").appendChild(text_container); | |
| } | |
| function new_transcription_time_element(time) { | |
| var text_container = document.createElement("div"); | |
| text_container.className = "transcription-timing-container"; | |
| text_container.style.maxWidth = "500px"; | |
| var text_element = document.createElement("div"); | |
| text_element.innerHTML = "<span>WhisperLive - Transcription time: " + time + "ms</span>"; | |
| var dummy_element = document.createElement("div"); | |
| text_container.appendChild(text_element); | |
| text_container.appendChild(dummy_element); | |
| document.getElementById("main-wrapper").appendChild(text_container); | |
| } | |
| function new_llm_time_element(time) { | |
| var text_container = document.createElement("div"); | |
| text_container.className = "llm-timing-container"; | |
| text_container.style.maxWidth = "500px"; | |
| var first_response_text_element = document.createElement("div"); | |
| first_response_text_element.innerHTML = "<span>Phi-2 first response time: " + time + "ms</span>"; | |
| var complete_response_text_element = document.createElement("div"); | |
| complete_response_text_element.innerHTML = "<span>Phi-2 complete response time: " + time + "ms</span>"; | |
| var dummy_element = document.createElement("div"); | |
| text_container.appendChild(first_response_text_element); | |
| text_container.appendChild(complete_response_text_element); | |
| text_container.appendChild(dummy_element); | |
| document.getElementById("main-wrapper").appendChild(text_container); | |
| } | |
| function new_whisper_speech_audio_element(id, duration) { | |
| var audio_container = document.createElement("div"); | |
| audio_container.className = "whisperspeech-audio-container"; | |
| audio_container.style.maxWidth = "500px"; | |
| var audio_div_element = document.createElement("div"); | |
| var audio_element = document.createElement("audio"); | |
| audio_element.style.paddingTop = "20px"; | |
| if (duration > 10) | |
| duration = 10; | |
| audio_element.src = "static/" + duration + ".mp3"; | |
| audio_element.id = id; | |
| audio_element.onplay = function() { | |
| console.log(this.id) | |
| var id = this.id.split("-")[1] - 1; | |
| if (audio_source) { | |
| audio_source.disconnect(); | |
| } | |
| audio_source = audioContext_tts.createBufferSource(); | |
| audio_source.buffer = audio_sources[id]; | |
| audio_source.connect(audioContext_tts.destination); | |
| audio_source.start() | |
| }; | |
| audio_element.onpause = function() { | |
| this.currentTime = 0; | |
| console.log(this.id) | |
| var id = this.id.split("-")[1] - 1; | |
| if (audio_source) { | |
| audio_source.stop(); | |
| } | |
| }; | |
| audio_element.controls = true; | |
| audio_div_element.appendChild(audio_element); | |
| var dummy_element_a = document.createElement("div"); | |
| var dummy_element_b = document.createElement("div"); | |
| audio_container.appendChild(dummy_element_a); | |
| audio_container.appendChild(audio_div_element); | |
| audio_container.appendChild(dummy_element_b); | |
| document.getElementById("main-wrapper").appendChild(audio_container); | |
| } | |
| function new_whisper_speech_time_element(time) { | |
| var text_container = document.createElement("div"); | |
| text_container.className = "whisperspeech-timing-container"; | |
| text_container.style.maxWidth = "500px"; | |
| var text_element = document.createElement("div"); | |
| text_element.innerHTML = "<span>WhisperSpeech response time: " + time + "ms</span>"; | |
| var dummy_element = document.createElement("div"); | |
| text_container.appendChild(text_element); | |
| text_container.appendChild(dummy_element); | |
| document.getElementById("main-wrapper").appendChild(text_container); | |
| } | |
| document.addEventListener('DOMContentLoaded', function() { | |
| const queryString = window.location.search; | |
| const urlParams = new URLSearchParams(queryString); | |
| if (urlParams.has('name')) { | |
| you_name = urlParams.get('name') | |
| } | |
| }, false); |