Spaces:

fedirz
/

faster-whisper-server

Configuration error

App Files Files Community

Fedir Zadniprovskyi commited on Jan 10

Commit

a5d79bf

1 Parent(s): f58fddb

chore: update volume names and mount points

Browse files

Files changed (9) hide show

Dockerfile +1 -1
README.md +2 -2
audio.wav +0 -0
compose.cpu.yaml +2 -2
compose.cuda-cdi.yaml +2 -2
compose.cuda.yaml +2 -2
docs/installation.md +9 -9
examples/live-audio/script.sh +2 -2
examples/youtube/script.sh +2 -2

Dockerfile CHANGED Viewed

@@ -29,7 +29,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # Creating a directory for the cache to avoid the following error:
 # PermissionError: [Errno 13] Permission denied: '/home/ubuntu/.cache/huggingface/hub'
 # This error occurs because the volume is mounted as root and the `ubuntu` user doesn't have permission to write to it. Pre-creating the directory solves this issue.
-RUN mkdir -p $HOME/.cache/huggingface
 ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
 ENV UVICORN_HOST=0.0.0.0
 ENV UVICORN_PORT=8000

 # Creating a directory for the cache to avoid the following error:
 # PermissionError: [Errno 13] Permission denied: '/home/ubuntu/.cache/huggingface/hub'
 # This error occurs because the volume is mounted as root and the `ubuntu` user doesn't have permission to write to it. Pre-creating the directory solves this issue.
+RUN mkdir -p $HOME/.cache/huggingface/hub
 ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
 ENV UVICORN_HOST=0.0.0.0
 ENV UVICORN_PORT=8000

README.md CHANGED Viewed

@@ -49,9 +49,9 @@ docker compose --file compose.cpu.yaml up --detach
 ```bash
 # for GPU support
-docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --detach fedirz/faster-whisper-server:latest-cuda
 # for CPU only (use this if you don't have a GPU, as the image is much smaller)
-docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu
 ```
 ### Using Kubernetes

 ```bash
 # for GPU support
+docker run --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --detach fedirz/faster-whisper-server:latest-cuda
 # for CPU only (use this if you don't have a GPU, as the image is much smaller)
+docker run --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu
 ```
 ### Using Kubernetes

audio.wav CHANGED Viewed

Binary files a/audio.wav and b/audio.wav differ

compose.cpu.yaml CHANGED Viewed

@@ -12,6 +12,6 @@ services:
     environment:
       - WHISPER__MODEL=Systran/faster-whisper-small
     volumes:
-      - hugging_face_cache:/root/.cache/huggingface
 volumes:
-  hugging_face_cache:

     environment:
       - WHISPER__MODEL=Systran/faster-whisper-small
     volumes:
+      - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
 volumes:
+  hf-hub-cache:

compose.cuda-cdi.yaml CHANGED Viewed

@@ -9,7 +9,7 @@ services:
       file: compose.cuda.yaml
       service: faster-whisper-server
     volumes:
-      - hugging_face_cache:/root/.cache/huggingface
     deploy:
       resources:
         reservations:
@@ -21,4 +21,4 @@ services:
               device_ids:
                 - nvidia.com/gpu=all
 volumes:
-  hugging_face_cache:

       file: compose.cuda.yaml
       service: faster-whisper-server
     volumes:
+      - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
     deploy:
       resources:
         reservations:
               device_ids:
                 - nvidia.com/gpu=all
 volumes:
+  hf-hub-cache:

compose.cuda.yaml CHANGED Viewed

@@ -12,11 +12,11 @@ services:
     environment:
       - WHISPER__MODEL=Systran/faster-whisper-large-v3
     volumes:
-      - hugging_face_cache:/root/.cache/huggingface
     deploy:
       resources:
         reservations:
           devices:
             - capabilities: ["gpu"]
 volumes:
-  hugging_face_cache:

     environment:
       - WHISPER__MODEL=Systran/faster-whisper-large-v3
     volumes:
+      - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
     deploy:
       resources:
         reservations:
           devices:
             - capabilities: ["gpu"]
 volumes:
+  hf-hub-cache:

docs/installation.md CHANGED Viewed

@@ -13,14 +13,14 @@ TODO: just reference the existing compose file in the repo
         ports:
           - 8000:8000
         volumes:
-          - hugging_face_cache:/root/.cache/huggingface
         deploy:
           resources:
             reservations:
               devices:
                 - capabilities: ["gpu"]
     volumes:
-      hugging_face_cache:
     ```
 === "CUDA (with CDI feature enabled)"
@@ -35,7 +35,7 @@ TODO: just reference the existing compose file in the repo
         ports:
           - 8000:8000
         volumes:
-          - hugging_face_cache:/root/.cache/huggingface
         deploy:
           resources:
             reservations:
@@ -46,7 +46,7 @@ TODO: just reference the existing compose file in the repo
                   device_ids:
                   - nvidia.com/gpu=all
     volumes:
-      hugging_face_cache:
     ```
 === "CPU"
@@ -60,9 +60,9 @@ TODO: just reference the existing compose file in the repo
         ports:
           - 8000:8000
         volumes:
-          - hugging_face_cache:/root/.cache/huggingface
     volumes:
-      hugging_face_cache:
     ```
 ## Docker
@@ -70,19 +70,19 @@ TODO: just reference the existing compose file in the repo
 === "CUDA"
     ```bash
-    docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hugging_face_cache:/root/.cache/huggingface --gpus=all fedirz/faster-whisper-server:latest-cuda
     ```
 === "CUDA (with CDI feature enabled)"
     ```bash
-    docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hugging_face_cache:/root/.cache/huggingface --device=nvidia.com/gpu=all fedirz/faster-whisper-server:latest-cuda
     ```
 === "CPU"
     ```bash
-    docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hugging_face_cache:/root/.cache/huggingface fedirz/faster-whisper-server:latest-cpu
     ```
 ## Kubernetes

         ports:
           - 8000:8000
         volumes:
+          - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
         deploy:
           resources:
             reservations:
               devices:
                 - capabilities: ["gpu"]
     volumes:
+      hf-hub-cache:
     ```
 === "CUDA (with CDI feature enabled)"
         ports:
           - 8000:8000
         volumes:
+          - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
         deploy:
           resources:
             reservations:
                   device_ids:
                   - nvidia.com/gpu=all
     volumes:
+      hf-hub-cache:
     ```
 === "CPU"
         ports:
           - 8000:8000
         volumes:
+          - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
     volumes:
+      hf-hub-cache:
     ```
 ## Docker
 === "CUDA"
     ```bash
+    docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --gpus=all fedirz/faster-whisper-server:latest-cuda
     ```
 === "CUDA (with CDI feature enabled)"
     ```bash
+    docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --device=nvidia.com/gpu=all fedirz/faster-whisper-server:latest-cuda
     ```
 === "CPU"
     ```bash
+    docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub fedirz/faster-whisper-server:latest-cpu
     ```
 ## Kubernetes

examples/live-audio/script.sh CHANGED Viewed

@@ -10,9 +10,9 @@ set -e
 export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference.
 # Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
-docker run --detach --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda
 # or you can run it on a CPU
-# docker run --detach --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu
 # `pv` is used to limit the rate at which the audio is streamed to the server. Audio is being streamed at a rate of 32kb/s(16000 sample rate * 16-bit sample / 8 bits per byte = 32000 bytes per second). This emulutes live audio input from a microphone: `ffmpeg -loglevel quiet -f alsa -i default -ac 1 -ar 16000 -f s16le -`
 # shellcheck disable=SC2002

 export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference.
 # Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
+docker run --detach --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda
 # or you can run it on a CPU
+# docker run --detach --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu
 # `pv` is used to limit the rate at which the audio is streamed to the server. Audio is being streamed at a rate of 32kb/s(16000 sample rate * 16-bit sample / 8 bits per byte = 32000 bytes per second). This emulutes live audio input from a microphone: `ffmpeg -loglevel quiet -f alsa -i default -ac 1 -ar 16000 -f s16le -`
 # shellcheck disable=SC2002

examples/youtube/script.sh CHANGED Viewed

@@ -6,9 +6,9 @@ set -e
 export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference.
 # Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
-docker run --detach --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda
 # or you can run it on a CPU
-# docker run --detach --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu
 # Download the audio from a YouTube video. In this example I'm downloading "The Evolution of the Operating System" by Asionometry YouTube channel. I highly checking this channel out, the guy produces very high content. If you don't have `youtube-dl`, you'll have to install it. https://github.com/ytdl-org/youtube-dl
 youtube-dl --extract-audio --audio-format mp3 -o the-evolution-of-the-operating-system.mp3 'https://www.youtube.com/watch?v=1lG7lFLXBIs'

 export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference.
 # Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
+docker run --detach --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda
 # or you can run it on a CPU
+# docker run --detach --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu
 # Download the audio from a YouTube video. In this example I'm downloading "The Evolution of the Operating System" by Asionometry YouTube channel. I highly checking this channel out, the guy produces very high content. If you don't have `youtube-dl`, you'll have to install it. https://github.com/ytdl-org/youtube-dl
 youtube-dl --extract-audio --audio-format mp3 -o the-evolution-of-the-operating-system.mp3 'https://www.youtube.com/watch?v=1lG7lFLXBIs'