Spaces:
Configuration error
Configuration error
| /** | |
| * Example provided by https://github.com/Gan-Xing in https://github.com/fedirz/faster-whisper-server/issues/26 | |
| */ | |
| import 'dotenv/config'; | |
| import fs from 'node:fs'; | |
| import path from 'node:path'; | |
| import WebSocket from 'ws'; | |
| import ffmpeg from 'fluent-ffmpeg'; | |
| const ffmpegPath = process.env.FFMPEG_PATH || '/usr/bin/ffmpeg'; | |
| ffmpeg.setFfmpegPath(ffmpegPath); | |
| /** | |
| * Transcribe an audio file using the HTTP endpoint. | |
| * Supported file types include wav, mp3, webm, and other types supported by the OpenAI API. | |
| * I have tested with these three types. | |
| * | |
| * @param {string} filePath - Path to the audio file | |
| * @param {string} model - Model name | |
| * @param {string} language - Language code | |
| * @param {string} responseFormat - Response format | |
| * @param {string} temperature - Temperature setting | |
| */ | |
| async function transcribeFile(filePath, model, language, responseFormat, temperature) { | |
| const formData = new FormData(); | |
| formData.append('file', fs.createReadStream(filePath)); | |
| formData.append('model', model); | |
| formData.append('language', language); | |
| formData.append('response_format', responseFormat); | |
| formData.append('temperature', temperature); | |
| const response = await fetch(`${process.env.TRANSCRIPTION_API_BASE_URL}/v1/audio/transcriptions`, { | |
| method: 'POST', | |
| body: formData, | |
| }); | |
| const transcription = await response.json(); | |
| console.log('Transcription Response:', transcription); | |
| } | |
| /** | |
| * Translate an audio file using the HTTP endpoint. | |
| * Only English is supported for translation. | |
| * Currently, I am using GLM-4-9b-int8 to translate various voices. | |
| * I am not sure if the author can add an endpoint for custom API+Key translation. | |
| * I plan to package my frontend, fast-whisper-server, and vllm+glm-4-9b-int8 into one Docker container for unified deployment. | |
| * | |
| * @param {string} filePath - Path to the audio file | |
| * @param {string} model - Model name | |
| * @param {string} responseFormat - Response format | |
| * @param {string} temperature - Temperature setting | |
| */ | |
| async function translateFile(filePath, model, responseFormat, temperature) { | |
| const formData = new FormData(); | |
| formData.append('file', fs.createReadStream(filePath)); | |
| formData.append('model', model); | |
| formData.append('response_format', responseFormat); | |
| formData.append('temperature', temperature); | |
| const response = await fetch(`${process.env.TRANSLATION_API_BASE_URL}/v1/audio/translations`, { | |
| method: 'POST', | |
| body: formData, | |
| }); | |
| const translation = await response.json(); | |
| console.log('Translation Response:', translation); | |
| } | |
| /** | |
| * Send audio data over WebSocket for transcription. | |
| * Currently, the supported file type for transcription is PCM. | |
| * I am not sure if other types are supported. | |
| * | |
| * @param {string} filePath - Path to the audio file | |
| * @param {string} model - Model name | |
| * @param {string} language - Language code | |
| * @param {string} responseFormat - Response format | |
| * @param {string} temperature - Temperature setting | |
| */ | |
| async function sendAudioOverWebSocket(filePath, model, language, responseFormat, temperature) { | |
| const wsUrl = `ws://100.105.162.69:8000/v1/audio/transcriptions?model=${encodeURIComponent(model)}&language=${encodeURIComponent(language)}&response_format=${encodeURIComponent(responseFormat)}&temperature=${encodeURIComponent(temperature)}`; | |
| const ws = new WebSocket(wsUrl); | |
| ws.on('open', async () => { | |
| const audioBuffer = fs.readFileSync(filePath); | |
| ws.send(audioBuffer); | |
| }); | |
| ws.on('message', (message) => { | |
| const response = JSON.parse(message); | |
| console.log('WebSocket Response:', response); | |
| }); | |
| ws.on('close', () => { | |
| console.log('WebSocket connection closed'); | |
| }); | |
| ws.on('error', (error) => { | |
| console.error('WebSocket error:', error); | |
| }); | |
| } | |
| /** | |
| * Convert audio file to PCM format. | |
| * | |
| * @param {string} filePath - Path to the audio file | |
| * @returns {string} - Path to the converted PCM file | |
| */ | |
| async function convertToPcm(filePath) { | |
| const pcmFilePath = filePath.replace(path.extname(filePath), '.pcm'); | |
| await new Promise((resolve, reject) => { | |
| ffmpeg(filePath) | |
| .audioChannels(1) | |
| .audioFrequency(16000) | |
| .audioCodec('pcm_s16le') | |
| .toFormat('s16le') | |
| .on('end', () => { | |
| console.log(`Audio file successfully converted to PCM: ${pcmFilePath}`); | |
| resolve(pcmFilePath); | |
| }) | |
| .on('error', (error) => { | |
| console.error(`Error converting audio to PCM: ${error.message}`); | |
| reject(error); | |
| }) | |
| .save(pcmFilePath); | |
| }); | |
| return pcmFilePath; | |
| } | |
| async function main() { | |
| const model = 'Systran/faster-whisper-large-v3'; | |
| const language = 'en'; | |
| const responseFormat = 'json'; | |
| const temperature = '0'; | |
| const filePath = './path/to/your/audio.webm'; // Replace with the actual file path | |
| // Convert the audio file to PCM format | |
| const pcmFilePath = await convertToPcm(filePath); | |
| // Transcribe the audio file using the HTTP endpoint | |
| await transcribeFile(pcmFilePath, model, language, responseFormat, temperature); | |
| // Translate the audio file using the HTTP endpoint | |
| await translateFile(pcmFilePath, model, responseFormat, temperature); | |
| // Transcribe the audio file using the WebSocket endpoint | |
| await sendAudioOverWebSocket(pcmFilePath, model, language, responseFormat, temperature); | |
| } | |
| // Make sure to use ffmpeg version 7 or above. The default apt-get install only installs version 4.x. Also, Ubuntu 22.04 or above is required to support version 7.x. | |
| main().catch(console.error); | |
| // Project URL: https://github.com/Gan-Xing/whisper | |