Spaces:
Running
Running
| <!-- # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| # SPDX-License-Identifier: BSD 2-Clause License --> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <script src="https://cdn.jsdelivr.net/npm/protobufjs@7.X.X/dist/protobuf.min.js"></script> | |
| <title>ACE Controller WebUI</title> | |
| </head> | |
| <body> | |
| <h1>ACE Controller WebUI</h1> | |
| <h3><div id="progressText">Loading, wait...</div></h2> | |
| <button id="startAudioBtn">Start Audio</button> | |
| <button id="stopAudioBtn">Stop Audio</button> | |
| <script> | |
| const SAMPLE_RATE = 16000; | |
| const NUM_CHANNELS = 1; | |
| const PLAY_TIME_RESET_THRESHOLD_MS = 1.0; | |
| // The protobuf type. We will load it later. | |
| let Frame = null; | |
| // The websocket connection. | |
| let ws = null; | |
| // The audio context | |
| let audioContext = null; | |
| // The audio context media stream source | |
| let source = null; | |
| // The microphone stream from getUserMedia. Should be sampled to the | |
| // proper sample rate. | |
| let microphoneStream = null; | |
| // Script processor to get data from microphone. | |
| let scriptProcessor = null; | |
| // AudioContext play time. | |
| let playTime = 0; | |
| // Last time we received a websocket message. | |
| let lastMessageTime = 0; | |
| // Whether we should be playing audio. | |
| let isPlaying = false; | |
| let startBtn = document.getElementById('startAudioBtn'); | |
| let stopBtn = document.getElementById('stopAudioBtn'); | |
| const proto = protobuf.load("frames.proto", (err, root) => { | |
| if (err) { | |
| throw err; | |
| } | |
| Frame = root.lookupType("pipecat.Frame"); | |
| const progressText = document.getElementById("progressText"); | |
| progressText.textContent = "We are ready! Make sure to run the server and then click `Start Audio`."; | |
| startBtn.disabled = false; | |
| stopBtn.disabled = true; | |
| }); | |
| function initWebSocket() { | |
| //ws = new WebSocket('ws://localhost:8100/ws/test1'); | |
| // Generate a UUID for the WebSocket connection | |
| const uuid = crypto.randomUUID(); | |
| // Get the host IP address from the current URL | |
| const host = window.location.hostname; | |
| // Construct the WebSocket URL using the host IP address and UUID | |
| const wsUrl = `ws://${host}:8100/ws/${uuid}` | |
| // Create a new WebSocket connection | |
| ws = new WebSocket(wsUrl); | |
| ws.addEventListener('open', () => console.log('WebSocket connection established.')); | |
| ws.addEventListener('message', handleWebSocketMessage); | |
| ws.addEventListener('close', (event) => { | |
| console.log("WebSocket connection closed.", event.code, event.reason); | |
| stopAudio(false); | |
| }); | |
| ws.addEventListener('error', (event) => console.error('WebSocket error:', event)); | |
| } | |
| async function handleWebSocketMessage(event) { | |
| try{ | |
| if (JSON.parse(event.data)){ | |
| console.log(event.data) | |
| } | |
| } catch{} | |
| const arrayBuffer = await event.data.arrayBuffer(); | |
| if (isPlaying) { | |
| enqueueAudioFromProto(arrayBuffer); | |
| } | |
| } | |
| let activeSources = []; | |
| function enqueueAudioFromProto(arrayBuffer) { | |
| const parsedFrame = Frame.decode(new Uint8Array(arrayBuffer)); | |
| if(parsedFrame?.text){ | |
| // Stop all active audio buffer source nodes | |
| activeSources.forEach(source => { | |
| source.stop(); | |
| console.log("Stopped an active audio playback due to interrupt signal."); | |
| }); | |
| activeSources = []; // Clear the list of active sources | |
| playTime = 0; | |
| } | |
| if (!parsedFrame?.audio) { | |
| return false; | |
| } | |
| // Reset play time if it's been a while we haven't played anything. | |
| const diffTime = audioContext.currentTime - lastMessageTime; | |
| if ((playTime == 0) || (diffTime > PLAY_TIME_RESET_THRESHOLD_MS)) { | |
| playTime = audioContext.currentTime; | |
| } | |
| lastMessageTime = audioContext.currentTime; | |
| // We should be able to use parsedFrame.audio.audio.buffer but for | |
| // some reason that contains all the bytes from the protobuf message. | |
| const audioVector = Array.from(parsedFrame.audio.audio); | |
| const audioArray = new Uint8Array(audioVector); | |
| audioContext.decodeAudioData(audioArray.buffer, function(buffer) { | |
| const source = new AudioBufferSourceNode(audioContext); | |
| source.buffer = buffer; | |
| source.start(playTime); | |
| source.connect(audioContext.destination); | |
| playTime = playTime + buffer.duration; | |
| activeSources.push(source); | |
| console.log("New audio playback started."); | |
| }); | |
| } | |
| function convertFloat32ToS16PCM(float32Array) { | |
| let int16Array = new Int16Array(float32Array.length); | |
| for (let i = 0; i < float32Array.length; i++) { | |
| let clampedValue = Math.max(-1, Math.min(1, float32Array[i])); | |
| int16Array[i] = clampedValue < 0 ? clampedValue * 32768 : clampedValue * 32767; | |
| } | |
| return int16Array; | |
| } | |
| function startAudioBtnHandler() { | |
| if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) { | |
| alert('getUserMedia is not supported in your browser.'); | |
| return; | |
| } | |
| startBtn.disabled = true; | |
| stopBtn.disabled = false; | |
| audioContext = new (window.AudioContext || window.webkitAudioContext)({ | |
| latencyHint: "interactive", | |
| sampleRate: SAMPLE_RATE | |
| }); | |
| isPlaying = true; | |
| initWebSocket(); | |
| navigator.mediaDevices.getUserMedia({ | |
| audio: { | |
| sampleRate: SAMPLE_RATE, | |
| channelCount: NUM_CHANNELS, | |
| autoGainControl: true, | |
| echoCancellation: true, | |
| noiseSuppression: true, | |
| } | |
| }).then((stream) => { | |
| microphoneStream = stream; | |
| // 512 is closest thing to 200ms. | |
| scriptProcessor = audioContext.createScriptProcessor(512, 1, 1); | |
| source = audioContext.createMediaStreamSource(stream); | |
| source.connect(scriptProcessor); | |
| scriptProcessor.connect(audioContext.destination); | |
| scriptProcessor.onaudioprocess = (event) => { | |
| if (!ws) { | |
| return; | |
| } | |
| const audioData = event.inputBuffer.getChannelData(0); | |
| const pcmS16Array = convertFloat32ToS16PCM(audioData); | |
| const pcmByteArray = new Uint8Array(pcmS16Array.buffer); | |
| const frame = Frame.create({ | |
| audio: { | |
| audio: Array.from(pcmByteArray), | |
| sampleRate: SAMPLE_RATE, | |
| numChannels: NUM_CHANNELS | |
| } | |
| }); | |
| const encodedFrame = new Uint8Array(Frame.encode(frame).finish()); | |
| ws.send(encodedFrame); | |
| }; | |
| }).catch((error) => console.error('Error accessing microphone:', error)); | |
| } | |
| function stopAudio(closeWebsocket) { | |
| playTime = 0; | |
| isPlaying = false; | |
| startBtn.disabled = false; | |
| stopBtn.disabled = true; | |
| if (ws && closeWebsocket) { | |
| ws.close(); | |
| ws = null; | |
| } | |
| if (scriptProcessor) { | |
| scriptProcessor.disconnect(); | |
| } | |
| if (source) { | |
| source.disconnect(); | |
| } | |
| } | |
| function stopAudioBtnHandler() { | |
| stopAudio(true); | |
| } | |
| startBtn.addEventListener('click', startAudioBtnHandler); | |
| stopBtn.addEventListener('click', stopAudioBtnHandler); | |
| startBtn.disabled = true; | |
| stopBtn.disabled = true; | |
| </script> | |
| </body> | |
| </html> |