fciannella's picture
Working with service run on 7860
53ea588
<!-- # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD 2-Clause License -->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<script src="https://cdn.jsdelivr.net/npm/protobufjs@7.X.X/dist/protobuf.min.js"></script>
<title>ACE Controller WebUI</title>
</head>
<body>
<h1>ACE Controller WebUI</h1>
<h3><div id="progressText">Loading, wait...</div></h2>
<button id="startAudioBtn">Start Audio</button>
<button id="stopAudioBtn">Stop Audio</button>
<script>
const SAMPLE_RATE = 16000;
const NUM_CHANNELS = 1;
const PLAY_TIME_RESET_THRESHOLD_MS = 1.0;
// The protobuf type. We will load it later.
let Frame = null;
// The websocket connection.
let ws = null;
// The audio context
let audioContext = null;
// The audio context media stream source
let source = null;
// The microphone stream from getUserMedia. Should be sampled to the
// proper sample rate.
let microphoneStream = null;
// Script processor to get data from microphone.
let scriptProcessor = null;
// AudioContext play time.
let playTime = 0;
// Last time we received a websocket message.
let lastMessageTime = 0;
// Whether we should be playing audio.
let isPlaying = false;
let startBtn = document.getElementById('startAudioBtn');
let stopBtn = document.getElementById('stopAudioBtn');
const proto = protobuf.load("frames.proto", (err, root) => {
if (err) {
throw err;
}
Frame = root.lookupType("pipecat.Frame");
const progressText = document.getElementById("progressText");
progressText.textContent = "We are ready! Make sure to run the server and then click `Start Audio`.";
startBtn.disabled = false;
stopBtn.disabled = true;
});
function initWebSocket() {
//ws = new WebSocket('ws://localhost:8100/ws/test1');
// Generate a UUID for the WebSocket connection
const uuid = crypto.randomUUID();
// Get the host IP address from the current URL
const host = window.location.hostname;
// Construct the WebSocket URL using the host IP address and UUID
const wsUrl = `ws://${host}:8100/ws/${uuid}`
// Create a new WebSocket connection
ws = new WebSocket(wsUrl);
ws.addEventListener('open', () => console.log('WebSocket connection established.'));
ws.addEventListener('message', handleWebSocketMessage);
ws.addEventListener('close', (event) => {
console.log("WebSocket connection closed.", event.code, event.reason);
stopAudio(false);
});
ws.addEventListener('error', (event) => console.error('WebSocket error:', event));
}
async function handleWebSocketMessage(event) {
try{
if (JSON.parse(event.data)){
console.log(event.data)
}
} catch{}
const arrayBuffer = await event.data.arrayBuffer();
if (isPlaying) {
enqueueAudioFromProto(arrayBuffer);
}
}
let activeSources = [];
function enqueueAudioFromProto(arrayBuffer) {
const parsedFrame = Frame.decode(new Uint8Array(arrayBuffer));
if(parsedFrame?.text){
// Stop all active audio buffer source nodes
activeSources.forEach(source => {
source.stop();
console.log("Stopped an active audio playback due to interrupt signal.");
});
activeSources = []; // Clear the list of active sources
playTime = 0;
}
if (!parsedFrame?.audio) {
return false;
}
// Reset play time if it's been a while we haven't played anything.
const diffTime = audioContext.currentTime - lastMessageTime;
if ((playTime == 0) || (diffTime > PLAY_TIME_RESET_THRESHOLD_MS)) {
playTime = audioContext.currentTime;
}
lastMessageTime = audioContext.currentTime;
// We should be able to use parsedFrame.audio.audio.buffer but for
// some reason that contains all the bytes from the protobuf message.
const audioVector = Array.from(parsedFrame.audio.audio);
const audioArray = new Uint8Array(audioVector);
audioContext.decodeAudioData(audioArray.buffer, function(buffer) {
const source = new AudioBufferSourceNode(audioContext);
source.buffer = buffer;
source.start(playTime);
source.connect(audioContext.destination);
playTime = playTime + buffer.duration;
activeSources.push(source);
console.log("New audio playback started.");
});
}
function convertFloat32ToS16PCM(float32Array) {
let int16Array = new Int16Array(float32Array.length);
for (let i = 0; i < float32Array.length; i++) {
let clampedValue = Math.max(-1, Math.min(1, float32Array[i]));
int16Array[i] = clampedValue < 0 ? clampedValue * 32768 : clampedValue * 32767;
}
return int16Array;
}
function startAudioBtnHandler() {
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
alert('getUserMedia is not supported in your browser.');
return;
}
startBtn.disabled = true;
stopBtn.disabled = false;
audioContext = new (window.AudioContext || window.webkitAudioContext)({
latencyHint: "interactive",
sampleRate: SAMPLE_RATE
});
isPlaying = true;
initWebSocket();
navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: SAMPLE_RATE,
channelCount: NUM_CHANNELS,
autoGainControl: true,
echoCancellation: true,
noiseSuppression: true,
}
}).then((stream) => {
microphoneStream = stream;
// 512 is closest thing to 200ms.
scriptProcessor = audioContext.createScriptProcessor(512, 1, 1);
source = audioContext.createMediaStreamSource(stream);
source.connect(scriptProcessor);
scriptProcessor.connect(audioContext.destination);
scriptProcessor.onaudioprocess = (event) => {
if (!ws) {
return;
}
const audioData = event.inputBuffer.getChannelData(0);
const pcmS16Array = convertFloat32ToS16PCM(audioData);
const pcmByteArray = new Uint8Array(pcmS16Array.buffer);
const frame = Frame.create({
audio: {
audio: Array.from(pcmByteArray),
sampleRate: SAMPLE_RATE,
numChannels: NUM_CHANNELS
}
});
const encodedFrame = new Uint8Array(Frame.encode(frame).finish());
ws.send(encodedFrame);
};
}).catch((error) => console.error('Error accessing microphone:', error));
}
function stopAudio(closeWebsocket) {
playTime = 0;
isPlaying = false;
startBtn.disabled = false;
stopBtn.disabled = true;
if (ws && closeWebsocket) {
ws.close();
ws = null;
}
if (scriptProcessor) {
scriptProcessor.disconnect();
}
if (source) {
source.disconnect();
}
}
function stopAudioBtnHandler() {
stopAudio(true);
}
startBtn.addEventListener('click', startAudioBtnHandler);
stopBtn.addEventListener('click', stopAudioBtnHandler);
startBtn.disabled = true;
stopBtn.disabled = true;
</script>
</body>
</html>