Spaces:
Running
on
Zero
Running
on
Zero
Update whisper2.py
Browse files- whisper2.py +7 -1
whisper2.py
CHANGED
|
@@ -10,7 +10,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
| 10 |
|
| 11 |
print("[ INFO ] Device: ", device)
|
| 12 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 13 |
-
|
| 14 |
|
| 15 |
model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, torch_dtype=torch_dtype).to(device)
|
| 16 |
processor = WhisperProcessor.from_pretrained(MODEL_NAME)
|
|
@@ -26,7 +26,13 @@ def convert_forced_to_tokens(forced_decoder_ids):
|
|
| 26 |
return forced_decoder_tokens
|
| 27 |
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
def generate(audio):
|
|
|
|
| 30 |
input_audio, sample_rate = torchaudio.load(audio)
|
| 31 |
|
| 32 |
#metadata = torchaudio.info(audio)
|
|
|
|
| 10 |
|
| 11 |
print("[ INFO ] Device: ", device)
|
| 12 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 13 |
+
|
| 14 |
|
| 15 |
model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, torch_dtype=torch_dtype).to(device)
|
| 16 |
processor = WhisperProcessor.from_pretrained(MODEL_NAME)
|
|
|
|
| 26 |
return forced_decoder_tokens
|
| 27 |
|
| 28 |
|
| 29 |
+
def change_formate(input_file):
|
| 30 |
+
ffmpeg.input(input_file).output("16_" + input_file, loglevel='quiet', **{'ar': '16000'}).run(overwrite_output=True)
|
| 31 |
+
return "16_" + input_file
|
| 32 |
+
|
| 33 |
+
|
| 34 |
def generate(audio):
|
| 35 |
+
audio = change_formate(audio)
|
| 36 |
input_audio, sample_rate = torchaudio.load(audio)
|
| 37 |
|
| 38 |
#metadata = torchaudio.info(audio)
|