Marti Umbert
commited on
Commit
·
936478d
1
Parent(s):
777c863
whisperlivekit/core.py: load models for translation in class WhisperLiveKit
Browse files- whisperlivekit/core.py +9 -0
whisperlivekit/core.py
CHANGED
|
@@ -4,6 +4,10 @@ except ImportError:
|
|
| 4 |
from .whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
|
| 5 |
from argparse import Namespace, ArgumentParser
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
def parse_args():
|
| 8 |
parser = ArgumentParser(description="Whisper FastAPI Online Server")
|
| 9 |
parser.add_argument(
|
|
@@ -167,6 +171,11 @@ class WhisperLiveKit:
|
|
| 167 |
self.asr, self.tokenizer = backend_factory(self.args)
|
| 168 |
warmup_asr(self.asr, self.args.warmup_file)
|
| 169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
if self.args.diarization:
|
| 171 |
from whisperlivekit.diarization.diarization_online import DiartDiarization
|
| 172 |
self.diarization = DiartDiarization()
|
|
|
|
| 4 |
from .whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
|
| 5 |
from argparse import Namespace, ArgumentParser
|
| 6 |
|
| 7 |
+
import ctranslate2
|
| 8 |
+
import pyonmttok
|
| 9 |
+
from huggingface_hub import snapshot_download
|
| 10 |
+
|
| 11 |
def parse_args():
|
| 12 |
parser = ArgumentParser(description="Whisper FastAPI Online Server")
|
| 13 |
parser.add_argument(
|
|
|
|
| 171 |
self.asr, self.tokenizer = backend_factory(self.args)
|
| 172 |
warmup_asr(self.asr, self.args.warmup_file)
|
| 173 |
|
| 174 |
+
# translate from transcription
|
| 175 |
+
model_dir = snapshot_download(repo_id="projecte-aina/aina-translator-ca-es", revision="main")
|
| 176 |
+
self.translation_tokenizer = pyonmttok.Tokenizer(mode="none", sp_model_path=model_dir + "/spm.model")
|
| 177 |
+
self.translator = ctranslate2.Translator(model_dir)
|
| 178 |
+
|
| 179 |
if self.args.diarization:
|
| 180 |
from whisperlivekit.diarization.diarization_online import DiartDiarization
|
| 181 |
self.diarization = DiartDiarization()
|