Upload 5 files
Browse files- README.md +13 -12
- app.py +53 -0
- packages.txt +1 -0
- pre-requirements.txt +1 -0
- requirements.txt +51 -0
README.md
CHANGED
|
@@ -1,12 +1,13 @@
|
|
| 1 |
-
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: test nemo
|
| 3 |
+
emoji: 🙄
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 4.44.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
if os.environ.get("SPACES_ZERO_GPU") is not None:
|
| 3 |
+
import spaces
|
| 4 |
+
else:
|
| 5 |
+
class spaces:
|
| 6 |
+
@staticmethod
|
| 7 |
+
def GPU(func):
|
| 8 |
+
def wrapper(*args, **kwargs):
|
| 9 |
+
return func(*args, **kwargs)
|
| 10 |
+
return wrapper
|
| 11 |
+
import gradio as gr
|
| 12 |
+
import subprocess
|
| 13 |
+
|
| 14 |
+
#subprocess.run("git clone https://github.com/AI4Bharat/NeMo.git && cd NeMo && git checkout nemo-v2 && bash reinstall.sh", shell=True)
|
| 15 |
+
|
| 16 |
+
import torch
|
| 17 |
+
import nemo.collections.asr as nemo_asr
|
| 18 |
+
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
|
| 21 |
+
model = nemo_asr.models.ASRModel.from_pretrained("ai4bharat/indicconformer_stt_ml_hybrid_rnnt_large")
|
| 22 |
+
|
| 23 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 24 |
+
model.freeze() # inference mode
|
| 25 |
+
model = model.to(device) # transfer model to device
|
| 26 |
+
|
| 27 |
+
@spaces.GPU
|
| 28 |
+
def infer(srcfile: str):
|
| 29 |
+
tmpfile = "sample_audio_infer_ready.wav"
|
| 30 |
+
|
| 31 |
+
subprocess.run(f"ffmpeg -i {srcfile} -ac 1 -ar 16000 {tmpfile}", shell=True)
|
| 32 |
+
model.cur_decoder = "ctc"
|
| 33 |
+
ctc_text = model.transcribe([tmpfile], batch_size=1, logprobs=False, language_id='ml')[0]
|
| 34 |
+
print(ctc_text)
|
| 35 |
+
|
| 36 |
+
model.cur_decoder = "rnnt"
|
| 37 |
+
rnnt_text = model.transcribe([tmpfile], batch_size=1, language_id='ml')[0]
|
| 38 |
+
print(rnnt_text)
|
| 39 |
+
|
| 40 |
+
if Path(tmpfile).exists(): Path(tmpfile).unlink()
|
| 41 |
+
|
| 42 |
+
return ctc_text, rnnt_text
|
| 43 |
+
|
| 44 |
+
with gr.Blocks() as demo:
|
| 45 |
+
input_audio = gr.Audio(label="Input", type="filepath", sources=["upload", "microphone"], format="wav")
|
| 46 |
+
run_button = gr.Button("Run", variant="primary")
|
| 47 |
+
with gr.Row():
|
| 48 |
+
ctc_text = gr.Textbox(label="CTC", value="", show_copy_button=True)
|
| 49 |
+
rnnt_text = gr.Textbox(label="RNNT", value="", show_copy_button=True)
|
| 50 |
+
|
| 51 |
+
run_button.click(infer, [input_audio], [ctc_text, rnnt_text])
|
| 52 |
+
|
| 53 |
+
demo.launch()
|
packages.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
ffmpeg
|
pre-requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip>=24.1
|
requirements.txt
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
huggingface_hub===0.20.3
|
| 2 |
+
torch
|
| 3 |
+
git+https://github.com/AI4Bharat/NeMo
|
| 4 |
+
numba
|
| 5 |
+
numpy<2
|
| 6 |
+
onnx>=1.7.0
|
| 7 |
+
python-dateutil
|
| 8 |
+
ruamel.yaml
|
| 9 |
+
scikit-learn
|
| 10 |
+
setuptools>=65.5.1
|
| 11 |
+
tensorboard
|
| 12 |
+
text-unidecode
|
| 13 |
+
torch
|
| 14 |
+
tqdm>=4.41.0
|
| 15 |
+
triton
|
| 16 |
+
wget
|
| 17 |
+
wrapt
|
| 18 |
+
datasets
|
| 19 |
+
inflect
|
| 20 |
+
pandas
|
| 21 |
+
sacremoses>=0.0.43
|
| 22 |
+
sentencepiece<1.0.0
|
| 23 |
+
braceexpand
|
| 24 |
+
editdistance
|
| 25 |
+
g2p_en
|
| 26 |
+
ipywidgets
|
| 27 |
+
jiwer
|
| 28 |
+
kaldi-python-io
|
| 29 |
+
kaldiio
|
| 30 |
+
lhotse>=1.20.0
|
| 31 |
+
librosa>=0.10.0
|
| 32 |
+
marshmallow
|
| 33 |
+
matplotlib
|
| 34 |
+
packaging
|
| 35 |
+
pyannote.core
|
| 36 |
+
pyannote.metrics
|
| 37 |
+
pydub
|
| 38 |
+
pyloudnorm
|
| 39 |
+
resampy
|
| 40 |
+
ruamel.yaml
|
| 41 |
+
scipy>=0.14
|
| 42 |
+
soundfile
|
| 43 |
+
sox
|
| 44 |
+
texterrors
|
| 45 |
+
hydra-core>1.3,<=1.3.2
|
| 46 |
+
omegaconf<=2.3
|
| 47 |
+
pytorch-lightning>=2.2.1
|
| 48 |
+
torchmetrics>=0.11.0
|
| 49 |
+
transformers>=4.36.0
|
| 50 |
+
wandb
|
| 51 |
+
webdataset>=0.2.86
|