Marti Umbert commited on
Commit
936478d
·
1 Parent(s): 777c863

whisperlivekit/core.py: load models for translation in class WhisperLiveKit

Browse files
Files changed (1) hide show
  1. whisperlivekit/core.py +9 -0
whisperlivekit/core.py CHANGED
@@ -4,6 +4,10 @@ except ImportError:
4
  from .whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
5
  from argparse import Namespace, ArgumentParser
6
 
 
 
 
 
7
  def parse_args():
8
  parser = ArgumentParser(description="Whisper FastAPI Online Server")
9
  parser.add_argument(
@@ -167,6 +171,11 @@ class WhisperLiveKit:
167
  self.asr, self.tokenizer = backend_factory(self.args)
168
  warmup_asr(self.asr, self.args.warmup_file)
169
 
 
 
 
 
 
170
  if self.args.diarization:
171
  from whisperlivekit.diarization.diarization_online import DiartDiarization
172
  self.diarization = DiartDiarization()
 
4
  from .whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
5
  from argparse import Namespace, ArgumentParser
6
 
7
+ import ctranslate2
8
+ import pyonmttok
9
+ from huggingface_hub import snapshot_download
10
+
11
  def parse_args():
12
  parser = ArgumentParser(description="Whisper FastAPI Online Server")
13
  parser.add_argument(
 
171
  self.asr, self.tokenizer = backend_factory(self.args)
172
  warmup_asr(self.asr, self.args.warmup_file)
173
 
174
+ # translate from transcription
175
+ model_dir = snapshot_download(repo_id="projecte-aina/aina-translator-ca-es", revision="main")
176
+ self.translation_tokenizer = pyonmttok.Tokenizer(mode="none", sp_model_path=model_dir + "/spm.model")
177
+ self.translator = ctranslate2.Translator(model_dir)
178
+
179
  if self.args.diarization:
180
  from whisperlivekit.diarization.diarization_online import DiartDiarization
181
  self.diarization = DiartDiarization()