RED

Sleeping

App Files Files Community

nabla-w commited on Sep 15, 2024

Commit

5da6c3d

1 Parent(s): af8f854

set up everything

Browse files

Files changed (5) hide show

Makefile +17 -0
README.md +29 -0
app.py +96 -0
requirements.txt +18 -0
resemble-enhance +1 -0

Makefile ADDED Viewed

	@@ -0,0 +1,17 @@

+install:
+	pip install --upgrade pip &&\
+		pip install -r requirements.txt
+test:
+	python app.py
+debug:
+	#python -m pytest -vv --pdb	#Debugger is invoked
+format:
+	#black *.py
+lint:
+	#pylint --disable=R,C *.py
+all: install lint test format

README.md CHANGED Viewed

@@ -1,2 +1,31 @@
 # resemble-enhance-hf-demo
 Demo of Resemble Enhance, an AI-powered tool that aims to improve the overall quality of speech by performing denoising and enhancement.

 # resemble-enhance-hf-demo
 Demo of Resemble Enhance, an AI-powered tool that aims to improve the overall quality of speech by performing denoising and enhancement.
+---
+title: Demo
+emoji: 🌖
+colorFrom: purple
+colorTo: purple
+sdk: gradio
+sdk_version: 3.0.6
+app_file: app.py
+pinned: false
+license: cc
+---
+[![Sync to Hugging Face hub](https://github.com/nogibjj/hugging-face/actions/workflows/main.yml/badge.svg)](https://github.com/nogibjj/hugging-face/actions/workflows/main.yml)
+[Try Demo Text Summarization Here](https://huggingface.co/spaces/noahgift/demo)
+![mlops-hugging-face](https://user-images.githubusercontent.com/58792/170845235-7f00d61c-ea36-4d28-82d0-3a9b8c0f1769.png)
+## References
+[Watch YouTube Walkthrough](https://youtu.be/VYSGjUa5sc4)

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import os
+import sys
+import argparse
+from functools import partial
+import gradio as gr
+import torch
+import torchaudio
+# Add the directory containing resemble-enhance to the Python path
+sys.path.append(os.path.abspath('resemble-enhance'))
+from resemble_enhance.enhancer.inference import denoise, enhance
+if torch.cuda.is_available():
+    device = "cuda"
+else:
+    device = "cpu"
+def _fn(path, solver, nfe, tau, denoising, unlimited):
+    if path is None:
+        gr.Warning("Please upload an audio file.")
+        return None, None
+    info = torchaudio.info(path)
+    if not unlimited and (info.num_frames / info.sample_rate > 60):
+        gr.Warning("Only audio files shorter than 60 seconds are supported.")
+        return None, None
+    solver = solver.lower()
+    nfe = int(nfe)
+    lambd = 0.9 if denoising else 0.1
+    dwav, sr = torchaudio.load(path)
+    dwav = dwav.mean(dim=0)
+    wav1, new_sr = denoise(dwav, sr, device)
+    wav2, new_sr = enhance(dwav, sr, device, nfe=nfe, solver=solver, lambd=lambd, tau=tau)
+    wav1 = wav1.cpu().numpy()
+    wav2 = wav2.cpu().numpy()
+    return (new_sr, wav1), (new_sr, wav2)
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--unlimited", action="store_true")
+    args = parser.parse_args()
+    inputs: list = [
+        gr.Audio(type="filepath", label="Input Audio"),
+        gr.Dropdown(
+            choices=["Midpoint", "RK4", "Euler"],
+            value="Midpoint",
+            label="CFM ODE Solver (Midpoint is recommended)",
+        ),
+        gr.Slider(
+            minimum=1,
+            maximum=128,
+            value=64,
+            step=1,
+            label="CFM Number of Function Evaluations (higher values in general yield better quality but may be slower)",
+        ),
+        gr.Slider(
+            minimum=0,
+            maximum=1,
+            value=0.5,
+            step=0.01,
+            label="CFM Prior Temperature (higher values can improve quality but can reduce stability)",
+        ),
+        gr.Checkbox(
+            value=False,
+            label="Denoise Before Enhancement (tick if your audio contains heavy background noise)",
+        ),
+    ]
+    outputs: list = [
+        gr.Audio(label="Output Denoised Audio"),
+        gr.Audio(label="Output Enhanced Audio"),
+    ]
+    interface = gr.Interface(
+        fn=partial(_fn, unlimited=args.unlimited),
+        title="Resemble Enhance",
+        description="AI-driven audio enhancement for your audio files, powered by Resemble AI.",
+        inputs=inputs,
+        outputs=outputs,
+    )
+    interface.launch()
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+celluloid==0.2.0
+deepspeed==0.15.1
+librosa==0.10.2.post1
+matplotlib==3.9.2
+numpy==2.0.2
+omegaconf==2.3.0
+pandas==2.2.2
+ptflops==0.7.3
+resampy==0.4.3
+rich==13.8.1
+scipy==1.14.1
+soundfile==0.12.1
+tabulate==0.9.0
+torch==2.4.0
+torchaudio==2.4.0
+torchvision==0.19.0
+tqdm==4.66.5
+gradio==4.44.0

resemble-enhance ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit bd713fae892212e0ae3bf76eabf4f5665e95b370