Spaces:
Running
on
Zero
Running
on
Zero
| import spaces | |
| import gradio as gr | |
| import torch | |
| from transformers import pipeline | |
| import librosa | |
| # モデルの設定 | |
| model_id = "kotoba-tech/kotoba-whisper-v1.0" | |
| torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| model_kwargs = {"attn_implementation": "sdpa"} if torch.cuda.is_available() else {} | |
| generate_kwargs = {"language": "japanese", "task": "transcribe"} | |
| # モデルのロード | |
| pipe = pipeline( | |
| "automatic-speech-recognition", | |
| model=model_id, | |
| torch_dtype=torch_dtype, | |
| device=device, | |
| model_kwargs=model_kwargs | |
| ) | |
| def transcribe(audio): | |
| # 音声の読み込み | |
| audio_data, sr = librosa.load(audio, sr=None) | |
| # 音声をリサンプリング | |
| target_sr = 16000 | |
| audio_resampled = librosa.resample(audio_data, orig_sr=sr, target_sr=target_sr) | |
| # 推論の実行 | |
| result = pipe(audio_resampled, generate_kwargs=generate_kwargs) | |
| return result["text"] | |
| description = """ | |
| <p align="center"> | |
| <img src="https://huggingface.co/datasets/MakiAi/IconAssets/resolve/main/KotobaTranscriber.png" width="70%"> | |
| <br> | |
| </p> | |
| """ | |
| theme='JohnSmith9982/small_and_pretty' | |
| # Gradioインターフェースの定義 | |
| iface = gr.Interface( | |
| fn=transcribe, | |
| # fn=None, | |
| inputs=gr.Audio(type="filepath", label="Upload Audio (MP3 or MP4)"), | |
| outputs="text", | |
| title="KotobaTranscriber", | |
| description=description, | |
| theme=theme, | |
| ) | |
| # アプリの起動 | |
| iface.launch(server_name="0.0.0.0", server_port=7860, share=True) |