Spaces:
Configuration error
Configuration error
Fedir Zadniprovskyi
commited on
Commit
·
a5d79bf
1
Parent(s):
f58fddb
chore: update volume names and mount points
Browse files- Dockerfile +1 -1
- README.md +2 -2
- audio.wav +0 -0
- compose.cpu.yaml +2 -2
- compose.cuda-cdi.yaml +2 -2
- compose.cuda.yaml +2 -2
- docs/installation.md +9 -9
- examples/live-audio/script.sh +2 -2
- examples/youtube/script.sh +2 -2
Dockerfile
CHANGED
|
@@ -29,7 +29,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
|
| 29 |
# Creating a directory for the cache to avoid the following error:
|
| 30 |
# PermissionError: [Errno 13] Permission denied: '/home/ubuntu/.cache/huggingface/hub'
|
| 31 |
# This error occurs because the volume is mounted as root and the `ubuntu` user doesn't have permission to write to it. Pre-creating the directory solves this issue.
|
| 32 |
-
RUN mkdir -p $HOME/.cache/huggingface
|
| 33 |
ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
|
| 34 |
ENV UVICORN_HOST=0.0.0.0
|
| 35 |
ENV UVICORN_PORT=8000
|
|
|
|
| 29 |
# Creating a directory for the cache to avoid the following error:
|
| 30 |
# PermissionError: [Errno 13] Permission denied: '/home/ubuntu/.cache/huggingface/hub'
|
| 31 |
# This error occurs because the volume is mounted as root and the `ubuntu` user doesn't have permission to write to it. Pre-creating the directory solves this issue.
|
| 32 |
+
RUN mkdir -p $HOME/.cache/huggingface/hub
|
| 33 |
ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
|
| 34 |
ENV UVICORN_HOST=0.0.0.0
|
| 35 |
ENV UVICORN_PORT=8000
|
README.md
CHANGED
|
@@ -49,9 +49,9 @@ docker compose --file compose.cpu.yaml up --detach
|
|
| 49 |
|
| 50 |
```bash
|
| 51 |
# for GPU support
|
| 52 |
-
docker run --gpus=all --publish 8000:8000 --volume
|
| 53 |
# for CPU only (use this if you don't have a GPU, as the image is much smaller)
|
| 54 |
-
docker run --publish 8000:8000 --volume
|
| 55 |
```
|
| 56 |
|
| 57 |
### Using Kubernetes
|
|
|
|
| 49 |
|
| 50 |
```bash
|
| 51 |
# for GPU support
|
| 52 |
+
docker run --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --detach fedirz/faster-whisper-server:latest-cuda
|
| 53 |
# for CPU only (use this if you don't have a GPU, as the image is much smaller)
|
| 54 |
+
docker run --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu
|
| 55 |
```
|
| 56 |
|
| 57 |
### Using Kubernetes
|
audio.wav
CHANGED
|
Binary files a/audio.wav and b/audio.wav differ
|
|
|
compose.cpu.yaml
CHANGED
|
@@ -12,6 +12,6 @@ services:
|
|
| 12 |
environment:
|
| 13 |
- WHISPER__MODEL=Systran/faster-whisper-small
|
| 14 |
volumes:
|
| 15 |
-
-
|
| 16 |
volumes:
|
| 17 |
-
|
|
|
|
| 12 |
environment:
|
| 13 |
- WHISPER__MODEL=Systran/faster-whisper-small
|
| 14 |
volumes:
|
| 15 |
+
- hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
|
| 16 |
volumes:
|
| 17 |
+
hf-hub-cache:
|
compose.cuda-cdi.yaml
CHANGED
|
@@ -9,7 +9,7 @@ services:
|
|
| 9 |
file: compose.cuda.yaml
|
| 10 |
service: faster-whisper-server
|
| 11 |
volumes:
|
| 12 |
-
-
|
| 13 |
deploy:
|
| 14 |
resources:
|
| 15 |
reservations:
|
|
@@ -21,4 +21,4 @@ services:
|
|
| 21 |
device_ids:
|
| 22 |
- nvidia.com/gpu=all
|
| 23 |
volumes:
|
| 24 |
-
|
|
|
|
| 9 |
file: compose.cuda.yaml
|
| 10 |
service: faster-whisper-server
|
| 11 |
volumes:
|
| 12 |
+
- hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
|
| 13 |
deploy:
|
| 14 |
resources:
|
| 15 |
reservations:
|
|
|
|
| 21 |
device_ids:
|
| 22 |
- nvidia.com/gpu=all
|
| 23 |
volumes:
|
| 24 |
+
hf-hub-cache:
|
compose.cuda.yaml
CHANGED
|
@@ -12,11 +12,11 @@ services:
|
|
| 12 |
environment:
|
| 13 |
- WHISPER__MODEL=Systran/faster-whisper-large-v3
|
| 14 |
volumes:
|
| 15 |
-
-
|
| 16 |
deploy:
|
| 17 |
resources:
|
| 18 |
reservations:
|
| 19 |
devices:
|
| 20 |
- capabilities: ["gpu"]
|
| 21 |
volumes:
|
| 22 |
-
|
|
|
|
| 12 |
environment:
|
| 13 |
- WHISPER__MODEL=Systran/faster-whisper-large-v3
|
| 14 |
volumes:
|
| 15 |
+
- hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
|
| 16 |
deploy:
|
| 17 |
resources:
|
| 18 |
reservations:
|
| 19 |
devices:
|
| 20 |
- capabilities: ["gpu"]
|
| 21 |
volumes:
|
| 22 |
+
hf-hub-cache:
|
docs/installation.md
CHANGED
|
@@ -13,14 +13,14 @@ TODO: just reference the existing compose file in the repo
|
|
| 13 |
ports:
|
| 14 |
- 8000:8000
|
| 15 |
volumes:
|
| 16 |
-
-
|
| 17 |
deploy:
|
| 18 |
resources:
|
| 19 |
reservations:
|
| 20 |
devices:
|
| 21 |
- capabilities: ["gpu"]
|
| 22 |
volumes:
|
| 23 |
-
|
| 24 |
```
|
| 25 |
|
| 26 |
=== "CUDA (with CDI feature enabled)"
|
|
@@ -35,7 +35,7 @@ TODO: just reference the existing compose file in the repo
|
|
| 35 |
ports:
|
| 36 |
- 8000:8000
|
| 37 |
volumes:
|
| 38 |
-
-
|
| 39 |
deploy:
|
| 40 |
resources:
|
| 41 |
reservations:
|
|
@@ -46,7 +46,7 @@ TODO: just reference the existing compose file in the repo
|
|
| 46 |
device_ids:
|
| 47 |
- nvidia.com/gpu=all
|
| 48 |
volumes:
|
| 49 |
-
|
| 50 |
```
|
| 51 |
|
| 52 |
=== "CPU"
|
|
@@ -60,9 +60,9 @@ TODO: just reference the existing compose file in the repo
|
|
| 60 |
ports:
|
| 61 |
- 8000:8000
|
| 62 |
volumes:
|
| 63 |
-
-
|
| 64 |
volumes:
|
| 65 |
-
|
| 66 |
```
|
| 67 |
|
| 68 |
## Docker
|
|
@@ -70,19 +70,19 @@ TODO: just reference the existing compose file in the repo
|
|
| 70 |
=== "CUDA"
|
| 71 |
|
| 72 |
```bash
|
| 73 |
-
docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume
|
| 74 |
```
|
| 75 |
|
| 76 |
=== "CUDA (with CDI feature enabled)"
|
| 77 |
|
| 78 |
```bash
|
| 79 |
-
docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume
|
| 80 |
```
|
| 81 |
|
| 82 |
=== "CPU"
|
| 83 |
|
| 84 |
```bash
|
| 85 |
-
docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume
|
| 86 |
```
|
| 87 |
|
| 88 |
## Kubernetes
|
|
|
|
| 13 |
ports:
|
| 14 |
- 8000:8000
|
| 15 |
volumes:
|
| 16 |
+
- hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
|
| 17 |
deploy:
|
| 18 |
resources:
|
| 19 |
reservations:
|
| 20 |
devices:
|
| 21 |
- capabilities: ["gpu"]
|
| 22 |
volumes:
|
| 23 |
+
hf-hub-cache:
|
| 24 |
```
|
| 25 |
|
| 26 |
=== "CUDA (with CDI feature enabled)"
|
|
|
|
| 35 |
ports:
|
| 36 |
- 8000:8000
|
| 37 |
volumes:
|
| 38 |
+
- hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
|
| 39 |
deploy:
|
| 40 |
resources:
|
| 41 |
reservations:
|
|
|
|
| 46 |
device_ids:
|
| 47 |
- nvidia.com/gpu=all
|
| 48 |
volumes:
|
| 49 |
+
hf-hub-cache:
|
| 50 |
```
|
| 51 |
|
| 52 |
=== "CPU"
|
|
|
|
| 60 |
ports:
|
| 61 |
- 8000:8000
|
| 62 |
volumes:
|
| 63 |
+
- hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
|
| 64 |
volumes:
|
| 65 |
+
hf-hub-cache:
|
| 66 |
```
|
| 67 |
|
| 68 |
## Docker
|
|
|
|
| 70 |
=== "CUDA"
|
| 71 |
|
| 72 |
```bash
|
| 73 |
+
docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --gpus=all fedirz/faster-whisper-server:latest-cuda
|
| 74 |
```
|
| 75 |
|
| 76 |
=== "CUDA (with CDI feature enabled)"
|
| 77 |
|
| 78 |
```bash
|
| 79 |
+
docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --device=nvidia.com/gpu=all fedirz/faster-whisper-server:latest-cuda
|
| 80 |
```
|
| 81 |
|
| 82 |
=== "CPU"
|
| 83 |
|
| 84 |
```bash
|
| 85 |
+
docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub fedirz/faster-whisper-server:latest-cpu
|
| 86 |
```
|
| 87 |
|
| 88 |
## Kubernetes
|
examples/live-audio/script.sh
CHANGED
|
@@ -10,9 +10,9 @@ set -e
|
|
| 10 |
export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference.
|
| 11 |
|
| 12 |
# Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
|
| 13 |
-
docker run --detach --gpus=all --publish 8000:8000 --volume
|
| 14 |
# or you can run it on a CPU
|
| 15 |
-
# docker run --detach --publish 8000:8000 --volume
|
| 16 |
|
| 17 |
# `pv` is used to limit the rate at which the audio is streamed to the server. Audio is being streamed at a rate of 32kb/s(16000 sample rate * 16-bit sample / 8 bits per byte = 32000 bytes per second). This emulutes live audio input from a microphone: `ffmpeg -loglevel quiet -f alsa -i default -ac 1 -ar 16000 -f s16le -`
|
| 18 |
# shellcheck disable=SC2002
|
|
|
|
| 10 |
export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference.
|
| 11 |
|
| 12 |
# Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
|
| 13 |
+
docker run --detach --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda
|
| 14 |
# or you can run it on a CPU
|
| 15 |
+
# docker run --detach --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu
|
| 16 |
|
| 17 |
# `pv` is used to limit the rate at which the audio is streamed to the server. Audio is being streamed at a rate of 32kb/s(16000 sample rate * 16-bit sample / 8 bits per byte = 32000 bytes per second). This emulutes live audio input from a microphone: `ffmpeg -loglevel quiet -f alsa -i default -ac 1 -ar 16000 -f s16le -`
|
| 18 |
# shellcheck disable=SC2002
|
examples/youtube/script.sh
CHANGED
|
@@ -6,9 +6,9 @@ set -e
|
|
| 6 |
export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference.
|
| 7 |
|
| 8 |
# Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
|
| 9 |
-
docker run --detach --gpus=all --publish 8000:8000 --volume
|
| 10 |
# or you can run it on a CPU
|
| 11 |
-
# docker run --detach --publish 8000:8000 --volume
|
| 12 |
|
| 13 |
# Download the audio from a YouTube video. In this example I'm downloading "The Evolution of the Operating System" by Asionometry YouTube channel. I highly checking this channel out, the guy produces very high content. If you don't have `youtube-dl`, you'll have to install it. https://github.com/ytdl-org/youtube-dl
|
| 14 |
youtube-dl --extract-audio --audio-format mp3 -o the-evolution-of-the-operating-system.mp3 'https://www.youtube.com/watch?v=1lG7lFLXBIs'
|
|
|
|
| 6 |
export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference.
|
| 7 |
|
| 8 |
# Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
|
| 9 |
+
docker run --detach --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda
|
| 10 |
# or you can run it on a CPU
|
| 11 |
+
# docker run --detach --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu
|
| 12 |
|
| 13 |
# Download the audio from a YouTube video. In this example I'm downloading "The Evolution of the Operating System" by Asionometry YouTube channel. I highly checking this channel out, the guy produces very high content. If you don't have `youtube-dl`, you'll have to install it. https://github.com/ytdl-org/youtube-dl
|
| 14 |
youtube-dl --extract-audio --audio-format mp3 -o the-evolution-of-the-operating-system.mp3 'https://www.youtube.com/watch?v=1lG7lFLXBIs'
|