R-Kentaren commited on
Commit
cb511f0
·
verified ·
1 Parent(s): 4051b07
Files changed (1) hide show
  1. app.py +545 -246
app.py CHANGED
@@ -20,15 +20,38 @@ from pedalboard.io import AudioFile
20
  from pydub import AudioSegment
21
  import noisereduce as nr
22
  import edge_tts
23
- from huggingface_hub import hf_hub_download, HfApi # For robust HF link handling
24
-
25
- # Suppress logging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
27
 
28
- # Initialize converter
29
- converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
 
 
 
 
 
 
 
 
 
30
 
31
- # Theme & Title
32
  title = "<center><strong><font size='7'>🔊 RVC+</font></strong></center>"
33
  description = """
34
  <div style="text-align: center; font-size: 1.1em; color: #aaa; margin: 10px 0;">
@@ -37,253 +60,458 @@ Misuse of voice conversion technology is unethical. Use responsibly.<br>
37
  Authors are not liable for inappropriate usage.
38
  </div>
39
  """
40
- theme = "Thatguy099/Sonix" # Maintained as requested
41
 
42
- # Global constants
43
- PITCH_ALGO_OPT = ["pm", "harvest", "crepe", "rmvpe", "rmvpe+"]
44
- MAX_FILE_SIZE = 500 * 1024 * 1024 # 500 MB
45
- DOWNLOAD_DIR = "downloads"
46
- OUTPUT_DIR = "output"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- os.makedirs(DOWNLOAD_DIR, exist_ok=True)
49
- os.makedirs(OUTPUT_DIR, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- # --- Utility Functions ---
52
- def find_files(directory, exts=(".pth", ".index", ".zip")):
53
- return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(exts)]
54
-
55
- def unzip_in_folder(zip_path, extract_to):
56
- with zipfile.ZipFile(zip_path, 'r') as zip_ref:
57
- for member in zip_ref.infolist():
58
- if not member.is_dir():
59
- # Preserve filename, avoid path traversal
60
- member.filename = os.path.basename(member.filename)
61
- zip_ref.extract(member, extract_to)
62
-
63
- def get_file_size(url):
64
- """Check file size for Hugging Face URLs, supporting resolve/main links."""
65
- if "huggingface" not in url.lower():
66
- raise ValueError("❌ Only Hugging Face links are allowed.")
67
- try:
68
- # The Hugging Face Hub API can handle various link types
69
- api = HfApi()
70
- # Extract repo_id and filename from the URL
71
- if "/resolve/main/" in url:
72
- parts = url.split("/resolve/main/")
73
- elif "/resolve/" in url:
74
- # Handle specific branches
75
- parts = url.split("/resolve/")
76
- parts[1] = parts[1].split("/", 1)[1] # Remove branch name
77
- else:
78
- # Assume it's a blob link or direct file link
79
- parts = url.rstrip("/").rsplit("/", 2)
80
- if len(parts) == 3:
81
- repo_parts = "/".join(parts[0].split("/")[-2:])
82
- filename = parts[2]
83
- repo_id = f"{parts[0].split('/')[-2]}/{parts[0].split('/')[-1]}"
84
- file_info = api.repo_info(repo_id=repo_id, repo_type="model")
85
- file_entry = next((f for f in file_info.siblings if f.rfilename == filename), None)
86
- if not file_entry:
87
- raise ValueError(f" File '{filename}' not found in repository '{repo_id}'.")
88
- file_size = file_entry.size
89
- if file_size > MAX_FILE_SIZE:
90
- raise ValueError(f"⚠️ File too large: {file_size / 1e6:.1f} MB (>500MB)")
91
- return file_size
92
- else:
93
- raise ValueError("❌ Unable to parse Hugging Face URL.")
94
-
95
- repo_parts = parts[0].split("/")[-2:]
96
- repo_id = f"{repo_parts[0]}/{repo_parts[1]}"
97
- filename = parts[1]
98
-
99
- file_info = api.repo_info(repo_id=repo_id, repo_type="model")
100
- file_entry = next((f for f in file_info.siblings if f.rfilename == filename), None)
101
- if not file_entry:
102
- raise ValueError(f"❌ File '{filename}' not found in repository '{repo_id}'.")
103
 
104
- file_size = file_entry.size
105
- if file_size > MAX_FILE_SIZE:
106
- raise ValueError(f"⚠️ File too large: {file_size / 1e6:.1f} MB (>500MB)")
107
- return file_size
108
- except Exception as e:
109
- raise RuntimeError(f" Failed to fetch file info: {str(e)}")
110
-
111
- def clear_directory_later(directory, delay=30):
112
- """Clear temp directory after delay in a background thread."""
113
- def _clear():
114
- time.sleep(delay)
115
- if os.path.exists(directory):
116
- shutil.rmtree(directory, ignore_errors=True)
117
- print(f"🧹 Cleaned up: {directory}")
118
- threading.Thread(target=_clear, daemon=True).start()
119
-
120
- def find_model_and_index(directory):
121
- files = find_files(directory)
122
- model = next((f for f in files if f.endswith(".pth")), None)
123
- index = next((f for f in files if f.endswith(".index")), None)
124
- return model, index
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
- # --- Model Download Handler ---
127
- @spaces.GPU(duration=60)
128
- def download_model(url_data):
129
- if not url_data.strip():
130
- raise ValueError("❌ No URL provided.")
131
 
132
- urls = [u.strip() for u in url_data.split(",") if u.strip()]
133
- if len(urls) > 2:
134
- raise ValueError("❌ Provide up to two URLs (model.pth, index.index).")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
- # Validate size first
137
- for url in urls:
138
- get_file_size(url)
139
 
140
- folder_name = f"model_{random.randint(1000, 9999)}"
141
- directory = os.path.join(DOWNLOAD_DIR, folder_name)
142
- os.makedirs(directory, exist_ok=True)
143
 
144
- try:
145
- downloaded_files = []
146
- for url in urls:
147
- # Use the robust Hugging Face Hub library for download
148
- # This handles resolve/main, blob, and other link types seamlessly
149
- parsed_url = urllib.parse.urlparse(url)
150
- path_parts = parsed_url.path.strip("/").split("/")
151
- if len(path_parts) < 4:
152
- raise ValueError("❌ Invalid Hugging Face URL structure.")
153
- repo_id = f"{path_parts[0]}/{path_parts[1]}"
154
- revision = "main"
155
- if "resolve" in path_parts:
156
- resolve_idx = path_parts.index("resolve")
157
- if resolve_idx + 1 < len(path_parts):
158
- revision = path_parts[resolve_idx + 1]
159
- filename = "/".join(path_parts[resolve_idx + 2:])
160
  else:
161
- # Assume it's a blob link pointing to a file
162
- filename = path_parts[-1]
163
- # Download the file
164
- local_path = hf_hub_download(
165
- repo_id=repo_id,
166
- filename=filename,
167
- revision=revision,
168
- cache_dir=directory,
169
- local_dir=directory,
170
- local_dir_use_symlinks=False
171
- )
172
- downloaded_files.append(local_path)
173
 
174
- # Unzip if needed
175
- for f in find_files(directory, (".zip",)):
176
- unzip_in_folder(f, directory)
 
 
177
 
178
- model, index = find_model_and_index(directory)
179
 
180
- if not model:
181
- raise ValueError("❌ .pth model file not found in downloaded content.")
182
- gr.Info(f" Model loaded: {os.path.basename(model)}")
183
- if index:
184
- gr.Info(f"📌 Index loaded: {os.path.basename(index)}")
185
- else:
186
- gr.Warning("⚠️ Index file not found – conversion may be less accurate.")
187
-
188
- # Schedule cleanup
189
- clear_directory_later(directory, delay=30)
190
-
191
- return os.path.abspath(model), os.path.abspath(index) if index else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
- except Exception as e:
194
- shutil.rmtree(directory, ignore_errors=True)
195
- raise gr.Error(f"❌ Download failed: {str(e)}")
196
-
197
- # --- Audio Processing ---
198
- def apply_noisereduce(audio_paths):
199
- results = []
200
- for path in audio_paths:
201
- out_path = f"{os.path.splitext(path)[0]}_denoised.wav"
202
  try:
203
- audio = AudioSegment.from_file(path)
204
- samples = np.array(audio.get_array_of_samples())
205
- sr = audio.frame_rate
206
- reduced = nr.reduce_noise(y=samples.astype(np.float32), sr=sr, prop_decrease=0.6)
207
- reduced_audio = AudioSegment(
208
- reduced.tobytes(),
209
- frame_rate=sr,
210
- sample_width=audio.sample_width,
211
- channels=audio.channels
212
- )
213
- reduced_audio.export(out_path, format="wav")
214
- results.append(out_path)
215
- gr.Info("🔊 Noise reduction applied.")
216
  except Exception as e:
217
- print(f"Noise reduction failed: {e}")
218
- results.append(path)
219
- return results
220
-
221
- def apply_audio_effects(audio_paths):
222
- results = []
223
- board = Pedalboard([
224
- HighpassFilter(cutoff_frequency_hz=80),
225
- Compressor(ratio=4, threshold_db=-15),
226
- Reverb(room_size=0.15, damping=0.7, wet_level=0.15, dry_level=0.85)
227
- ])
228
- for path in audio_paths:
229
- out_path = f"{os.path.splitext(path)[0]}_reverb.wav"
230
  try:
231
- with AudioFile(path) as f:
232
- with AudioFile(out_path, 'w', f.samplerate, f.num_channels) as o:
233
- while f.tell() < f.frames:
234
- chunk = f.read(int(f.samplerate))
235
- effected = board(chunk, f.samplerate)
236
- o.write(effected)
237
- results.append(out_path)
238
- gr.Info("🎛️ Audio effects applied.")
239
  except Exception as e:
240
- print(f"Effects failed: {e}")
241
- results.append(path)
242
- return results
243
-
244
- # --- TTS Handler ---
245
- async def generate_tts(text, voice, output_path):
246
- communicate = edge_tts.Communicate(text, voice.split("-")[0])
247
- await communicate.save(output_path)
248
-
249
- def infer_tts(tts_voice, tts_text, play_tts):
250
- if not tts_text.strip():
251
- raise ValueError("❌ Text is empty.")
252
- folder = f"tts_{random.randint(10000, 99999)}"
253
- out_dir = os.path.join(OUTPUT_DIR, folder)
254
- os.makedirs(out_dir, exist_ok=True)
255
- out_path = os.path.join(out_dir, "tts_output.mp3")
256
-
257
- try:
258
- asyncio.run(generate_tts(tts_text, tts_voice, out_path))
259
- if play_tts:
260
- return [out_path], out_path
261
- return [out_path], None
262
- except Exception as e:
263
- raise gr.Error(f"TTS generation failed: {str(e)}")
264
 
265
- # --- Main Conversion Function ---
266
  @spaces.GPU()
267
  def run_conversion(
268
- audio_files,
269
- model_path,
270
- pitch_algo,
271
- pitch_level,
272
- index_path,
273
- index_rate,
274
- filter_radius,
275
- rms_mix_rate,
276
- protect,
277
- denoise,
278
- effects,
279
- ):
 
 
280
  if not audio_files:
281
  raise ValueError("❌ Please upload at least one audio file.")
 
 
 
 
 
282
 
283
  random_tag = f"USER_{random.randint(10000000, 99999999)}"
284
 
285
  # Configure converter
286
- converter.apply_conf(
287
  tag=random_tag,
288
  file_model=model_path,
289
  pitch_algo=pitch_algo,
@@ -298,21 +526,26 @@ def run_conversion(
298
 
299
  # Run conversion
300
  try:
301
- results = converter(audio_files, random_tag, overwrite=False, parallel_workers=8)
302
  except Exception as e:
 
303
  raise gr.Error(f"❌ Conversion failed: {str(e)}")
304
 
305
  # Post-processing
306
  if denoise:
307
- results = apply_noisereduce(results)
308
  if effects:
309
- results = apply_audio_effects(results)
310
 
311
  return results
312
 
313
- # --- Gradio UI Builder ---
314
  def create_ui():
315
- with gr.Blocks(theme=theme, title="RVC+", fill_width=True, delete_cache=(3200, 3200)) as app:
 
 
 
 
316
  gr.HTML(title)
317
  gr.HTML(description)
318
 
@@ -323,7 +556,7 @@ def create_ui():
323
  with gr.Column(scale=1):
324
  gr.Markdown("### 🔊 Upload Audio")
325
  audio_input = gr.File(
326
- label="Audio Files (WAV, MP3, OGG)",
327
  file_count="multiple",
328
  type="filepath"
329
  )
@@ -332,6 +565,19 @@ def create_ui():
332
  model_file = gr.File(label="Upload .pth Model", type="filepath")
333
  index_file = gr.File(label="Upload .index File (Optional)", type="filepath")
334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  use_url = gr.Checkbox(label="🌐 Download from Hugging Face URL", value=False)
336
 
337
  with gr.Group(visible=False) as url_group:
@@ -354,12 +600,15 @@ def create_ui():
354
  )
355
 
356
  download_btn.click(
357
- download_model,
358
  inputs=[model_url],
359
  outputs=[model_file, index_file]
360
  ).then(
361
- lambda: gr.update(visible=False), # Hide URL group after download
362
  outputs=[url_group]
 
 
 
363
  )
364
 
365
  with gr.Column(scale=1):
@@ -376,9 +625,24 @@ def create_ui():
376
 
377
  denoise = gr.Checkbox(False, label="🔇 Denoise Output")
378
  reverb = gr.Checkbox(False, label="🎛️ Add Reverb")
 
 
 
 
 
 
 
 
 
 
 
 
379
 
380
  convert_btn = gr.Button("🚀 Convert Voice", variant="primary", size="lg")
381
  output_files = gr.File(label="✅ Converted Audio", file_count="multiple")
 
 
 
382
 
383
  convert_btn.click(
384
  run_conversion,
@@ -394,6 +658,7 @@ def create_ui():
394
  protect,
395
  denoise,
396
  reverb,
 
397
  ],
398
  outputs=output_files,
399
  )
@@ -402,13 +667,8 @@ def create_ui():
402
  with gr.Tab("🗣️ Text-to-Speech", id=1):
403
  gr.Markdown("### Convert text to speech using Edge TTS.")
404
 
405
- # Pre-fetch voices outside the main event loop if possible, or handle async properly
406
- try:
407
- tts_voice_list = sorted(
408
- [f"{v['ShortName']}-{v['Gender']}" for v in asyncio.run(edge_tts.list_voices())]
409
- )
410
- except:
411
- tts_voice_list = ["en-US-JennyNeural-Female"] # Fallback
412
 
413
  with gr.Row():
414
  with gr.Column(scale=1):
@@ -417,7 +677,11 @@ def create_ui():
417
  label="Text Input",
418
  lines=5
419
  )
420
- tts_voice = gr.Dropdown(tts_voice_list, value=tts_voice_list[0] if tts_voice_list else None, label="Voice")
 
 
 
 
421
  tts_play = gr.Checkbox(False, label="🎧 Auto-play audio")
422
  tts_btn = gr.Button("🔊 Generate Speech", variant="secondary")
423
 
@@ -426,7 +690,7 @@ def create_ui():
426
  tts_preview = gr.Audio(label="Preview", visible=False, autoplay=True)
427
 
428
  tts_btn.click(
429
- infer_tts,
430
  inputs=[tts_voice, tts_text, tts_play],
431
  outputs=[tts_output_audio, tts_preview],
432
  ).then(
@@ -434,8 +698,43 @@ def create_ui():
434
  inputs=[tts_preview],
435
  outputs=[tts_preview]
436
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
 
438
- # Examples (Moved to be more prominent)
439
  gr.Markdown("### 📚 Examples")
440
  gr.Examples(
441
  examples=[
@@ -447,13 +746,13 @@ def create_ui():
447
  index_rate, filter_radius, rms_mix_rate, protect, denoise, reverb
448
  ],
449
  outputs=output_files,
450
- fn=run_conversion,
451
  cache_examples=False,
452
  )
453
 
454
  return app
455
 
456
- # --- Launch App ---
457
  if __name__ == "__main__":
458
  app = create_ui()
459
  app.queue(default_concurrency_limit=10)
 
20
  from pydub import AudioSegment
21
  import noisereduce as nr
22
  import edge_tts
23
+ from huggingface_hub import hf_hub_download, HfApi
24
+ from typing import List, Tuple, Optional, Dict, Any
25
+ import json
26
+ from pathlib import Path
27
+
28
+ # Configure logging
29
+ logging.basicConfig(
30
+ level=logging.INFO,
31
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
32
+ handlers=[
33
+ logging.FileHandler("rvc_app.log"),
34
+ logging.StreamHandler()
35
+ ]
36
+ )
37
+ logger = logging.getLogger("RVC_APP")
38
+
39
+ # Suppress third-party logging
40
  logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
41
 
42
+ # Constants
43
+ PITCH_ALGO_OPT = ["pm", "harvest", "crepe", "rmvpe", "rmvpe+"]
44
+ MAX_FILE_SIZE = 500 * 1024 * 1024 # 500 MB
45
+ DOWNLOAD_DIR = "downloads"
46
+ OUTPUT_DIR = "output"
47
+ CONFIG_FILE = "rvc_config.json"
48
+ SUPPORTED_AUDIO_FORMATS = [".wav", ".mp3", ".ogg", ".flac", ".m4a"]
49
+
50
+ # Create necessary directories
51
+ os.makedirs(DOWNLOAD_DIR, exist_ok=True)
52
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
53
 
54
+ # Theme and UI Configuration
55
  title = "<center><strong><font size='7'>🔊 RVC+</font></strong></center>"
56
  description = """
57
  <div style="text-align: center; font-size: 1.1em; color: #aaa; margin: 10px 0;">
 
60
  Authors are not liable for inappropriate usage.
61
  </div>
62
  """
 
63
 
64
+ # Theme definition (keeping your existing theme code)
65
+ from gradio.themes import Soft
66
+ from gradio.themes.utils import colors, fonts, sizes
67
+
68
+ # Define the new OrangeRed color palette
69
+ colors.orange_red = colors.Color(
70
+ name="orange_red",
71
+ c50="#FFF0E5",
72
+ c100="#FFE0CC",
73
+ c200="#FFC299",
74
+ c300="#FFA366",
75
+ c400="#FF8533",
76
+ c500="#FF4500", # OrangeRed base color
77
+ c600="#E63E00",
78
+ c700="#CC3700",
79
+ c800="#B33000",
80
+ c900="#992900",
81
+ c950="#802200",
82
+ )
83
+
84
+ class OrangeRedTheme(Soft):
85
+ def __init__(
86
+ self,
87
+ *,
88
+ primary_hue: colors.Color | str = colors.gray,
89
+ secondary_hue: colors.Color | str = colors.orange_red,
90
+ neutral_hue: colors.Color | str = colors.slate,
91
+ text_size: sizes.Size | str = sizes.text_lg,
92
+ font: fonts.Font | str | Iterable[fonts.Font | str] = (
93
+ fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
94
+ ),
95
+ font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
96
+ fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
97
+ ),
98
+ ):
99
+ super().__init__(
100
+ primary_hue=primary_hue,
101
+ secondary_hue=secondary_hue,
102
+ neutral_hue=neutral_hue,
103
+ text_size=text_size,
104
+ font=font,
105
+ font_mono=font_mono,
106
+ )
107
+ super().set(
108
+ background_fill_primary="*primary_50",
109
+ background_fill_primary_dark="*primary_900",
110
+ body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
111
+ body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
112
+ button_primary_text_color="white",
113
+ button_primary_text_color_hover="white",
114
+ button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
115
+ button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
116
+ button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_700)",
117
+ button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_600)",
118
+ button_secondary_text_color="black",
119
+ button_secondary_text_color_hover="white",
120
+ button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
121
+ button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
122
+ button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
123
+ button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
124
+ slider_color="*secondary_500",
125
+ slider_color_dark="*secondary_600",
126
+ block_title_text_weight="600",
127
+ block_border_width="3px",
128
+ block_shadow="*shadow_drop_lg",
129
+ button_primary_shadow="*shadow_drop_lg",
130
+ button_large_padding="11px",
131
+ color_accent_soft="*primary_100",
132
+ block_label_background_fill="*primary_200",
133
+ )
134
 
135
+ # Instantiate the theme
136
+ orange_red_theme = OrangeRedTheme()
137
+
138
+ # CSS (keeping your existing CSS)
139
+ css = """
140
+ #main-title h1 {
141
+ font-size: 2.3em !important;
142
+ }
143
+ #output-title h2 {
144
+ font-size: 2.1em !important;
145
+ }
146
+ :root {
147
+ --color-grey-50: #f9fafb;
148
+ --banner-background: var(--secondary-400);
149
+ --banner-text-color: var(--primary-100);
150
+ --banner-background-dark: var(--secondary-800);
151
+ --banner-text-color-dark: var(--primary-100);
152
+ --banner-chrome-height: calc(16px + 43px);
153
+ --chat-chrome-height-wide-no-banner: 320px;
154
+ --chat-chrome-height-narrow-no-banner: 450px;
155
+ --chat-chrome-height-wide: calc(var(--chat-chrome-height-wide-no-banner) + var(--banner-chrome-height));
156
+ --chat-chrome-height-narrow: calc(var(--chat-chrome-height-narrow-no-banner) + var(--banner-chrome-height));
157
+ }
158
+ .banner-message { background-color: var(--banner-background); padding: 5px; margin: 0; border-radius: 5px; border: none; }
159
+ .banner-message-text { font-size: 13px; font-weight: bolder; color: var(--banner-text-color) !important; }
160
+ body.dark .banner-message { background-color: var(--banner-background-dark) !important; }
161
+ body.dark .gradio-container .contain .banner-message .banner-message-text { color: var(--banner-text-color-dark) !important; }
162
+ .toast-body { background-color: var(--color-grey-50); }
163
+ .html-container:has(.css-styles) { padding: 0; margin: 0; }
164
+ .css-styles { height: 0; }
165
+ .model-message { text-align: end; }
166
+ .model-dropdown-container { display: flex; align-items: center; gap: 10px; padding: 0; }
167
+ .user-input-container .multimodal-textbox{ border: none !important; }
168
+ .control-button { height: 51px; }
169
+ button.cancel { border: var(--button-border-width) solid var(--button-cancel-border-color); background: var(--button-cancel-background-fill); color: var(--button-cancel-text-color); box-shadow: var(--button-cancel-shadow); }
170
+ button.cancel:hover, .cancel[disabled] { background: var(--button-cancel-background-fill-hover); color: var(--button-cancel-text-color-hover); }
171
+ .opt-out-message { top: 8px; }
172
+ .opt-out-message .html-container, .opt-out-checkbox label { font-size: 14px !important; padding: 0 !important; margin: 0 !important; color: var(--neutral-400) !important; }
173
+ div.block.chatbot { height: calc(100svh - var(--chat-chrome-height-wide)) !important; max-height: 900px !important; }
174
+ div.no-padding { padding: 0 !important; }
175
+ @media (max-width: 1280px) { div.block.chatbot { height: calc(100svh - var(--chat-chrome-height-wide)) !important; } }
176
+ @media (max-width: 1024px) {
177
+ .responsive-row { flex-direction: column; }
178
+ .model-message { text-align: start; font-size: 10px !important; }
179
+ .model-dropdown-container { flex-direction: column; align-items: flex-start; }
180
+ div.block.chatbot { height: calc(100svh - var(--chat-chrome-height-narrow)) !important; }
181
+ }
182
+ @media (max-width: 400px) {
183
+ .responsive-row { flex-direction: column; }
184
+ .model-message { text-align: start; font-size: 10px !important; }
185
+ .model-dropdown-container { flex-direction: column; align-items: flex-start; }
186
+ div.block.chatbot { max-height: 360px !important; }
187
+ }
188
+ @media (max-height: 932px) { .chatbot { max-height: 500px !important; } }
189
+ @media (max-height: 1280px) { div.block.chatbot { max-height: 800px !important; } }
190
+ """
191
 
192
+ # Model Management Class
193
+ class ModelManager:
194
+ """Manages model loading, downloading, and caching."""
195
+
196
+ def __init__(self):
197
+ self.converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
198
+ self.loaded_models = {} # Cache for loaded models
199
+ self.config = self._load_config()
200
+
201
+ def _load_config(self) -> Dict[str, Any]:
202
+ """Load configuration from file if exists."""
203
+ if os.path.exists(CONFIG_FILE):
204
+ try:
205
+ with open(CONFIG_FILE, 'r') as f:
206
+ return json.load(f)
207
+ except Exception as e:
208
+ logger.error(f"Failed to load config: {e}")
209
+ return {"recent_models": [], "default_settings": {}}
210
+
211
+ def save_config(self):
212
+ """Save current configuration to file."""
213
+ try:
214
+ with open(CONFIG_FILE, 'w') as f:
215
+ json.dump(self.config, f)
216
+ except Exception as e:
217
+ logger.error(f"Failed to save config: {e}")
218
+
219
+ def add_recent_model(self, model_path: str):
220
+ """Add a model to recent models list."""
221
+ if model_path not in self.config["recent_models"]:
222
+ self.config["recent_models"].append(model_path)
223
+ # Keep only the 5 most recent models
224
+ self.config["recent_models"] = self.config["recent_models"][-5:]
225
+ self.save_config()
226
+
227
+ def find_files(self, directory: str, exts: Tuple[str] = (".pth", ".index", ".zip")) -> List[str]:
228
+ """Find files with specific extensions in a directory."""
229
+ return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(exts)]
230
+
231
+ def unzip_in_folder(self, zip_path: str, extract_to: str):
232
+ """Unzip a file to a specific folder."""
233
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
234
+ for member in zip_ref.infolist():
235
+ if not member.is_dir():
236
+ # Preserve filename, avoid path traversal
237
+ member.filename = os.path.basename(member.filename)
238
+ zip_ref.extract(member, extract_to)
239
+
240
+ def get_file_size(self, url: str) -> int:
241
+ """Check file size for Hugging Face URLs."""
242
+ if "huggingface" not in url.lower():
243
+ raise ValueError("❌ Only Hugging Face links are allowed.")
244
 
245
+ try:
246
+ api = HfApi()
247
+ # Extract repo_id and filename from the URL
248
+ if "/resolve/main/" in url:
249
+ parts = url.split("/resolve/main/")
250
+ elif "/resolve/" in url:
251
+ # Handle specific branches
252
+ parts = url.split("/resolve/")
253
+ parts[1] = parts[1].split("/", 1)[1] # Remove branch name
254
+ else:
255
+ # Assume it's a blob link or direct file link
256
+ parts = url.rstrip("/").rsplit("/", 2)
257
+ if len(parts) == 3:
258
+ repo_parts = "/".join(parts[0].split("/")[-2:])
259
+ filename = parts[2]
260
+ repo_id = f"{parts[0].split('/')[-2]}/{parts[0].split('/')[-1]}"
261
+ file_info = api.repo_info(repo_id=repo_id, repo_type="model")
262
+ file_entry = next((f for f in file_info.siblings if f.rfilename == filename), None)
263
+ if not file_entry:
264
+ raise ValueError(f"❌ File '{filename}' not found in repository '{repo_id}'.")
265
+ file_size = file_entry.size
266
+ if file_size > MAX_FILE_SIZE:
267
+ raise ValueError(f"⚠️ File too large: {file_size / 1e6:.1f} MB (>500MB)")
268
+ return file_size
269
+ else:
270
+ raise ValueError("❌ Unable to parse Hugging Face URL.")
271
+
272
+ repo_parts = parts[0].split("/")[-2:]
273
+ repo_id = f"{repo_parts[0]}/{repo_parts[1]}"
274
+ filename = parts[1]
275
+
276
+ file_info = api.repo_info(repo_id=repo_id, repo_type="model")
277
+ file_entry = next((f for f in file_info.siblings if f.rfilename == filename), None)
278
+ if not file_entry:
279
+ raise ValueError(f"❌ File '{filename}' not found in repository '{repo_id}'.")
280
+
281
+ file_size = file_entry.size
282
+ if file_size > MAX_FILE_SIZE:
283
+ raise ValueError(f"⚠️ File too large: {file_size / 1e6:.1f} MB (>500MB)")
284
+ return file_size
285
+ except Exception as e:
286
+ raise RuntimeError(f"❌ Failed to fetch file info: {str(e)}")
287
+
288
+ def clear_directory_later(self, directory: str, delay: int = 30):
289
+ """Clear temp directory after delay in a background thread."""
290
+ def _clear():
291
+ time.sleep(delay)
292
+ if os.path.exists(directory):
293
+ shutil.rmtree(directory, ignore_errors=True)
294
+ logger.info(f"🧹 Cleaned up: {directory}")
295
+ threading.Thread(target=_clear, daemon=True).start()
296
+
297
+ def find_model_and_index(self, directory: str) -> Tuple[Optional[str], Optional[str]]:
298
+ """Find model and index files in a directory."""
299
+ files = self.find_files(directory)
300
+ model = next((f for f in files if f.endswith(".pth")), None)
301
+ index = next((f for f in files if f.endswith(".index")), None)
302
+ return model, index
303
+
304
+ @spaces.GPU(duration=60)
305
+ def download_model(self, url_data: str) -> Tuple[str, Optional[str]]:
306
+ """Download model from Hugging Face URL."""
307
+ if not url_data.strip():
308
+ raise ValueError("❌ No URL provided.")
309
+
310
+ urls = [u.strip() for u in url_data.split(",") if u.strip()]
311
+ if len(urls) > 2:
312
+ raise ValueError("❌ Provide up to two URLs (model.pth, index.index).")
313
+
314
+ # Validate size first
315
+ for url in urls:
316
+ self.get_file_size(url)
317
 
318
+ folder_name = f"model_{random.randint(1000, 9999)}"
319
+ directory = os.path.join(DOWNLOAD_DIR, folder_name)
320
+ os.makedirs(directory, exist_ok=True)
 
 
321
 
322
+ try:
323
+ downloaded_files = []
324
+ for url in urls:
325
+ # Use the robust Hugging Face Hub library for download
326
+ parsed_url = urllib.parse.urlparse(url)
327
+ path_parts = parsed_url.path.strip("/").split("/")
328
+ if len(path_parts) < 4:
329
+ raise ValueError("❌ Invalid Hugging Face URL structure.")
330
+ repo_id = f"{path_parts[0]}/{path_parts[1]}"
331
+ revision = "main"
332
+ if "resolve" in path_parts:
333
+ resolve_idx = path_parts.index("resolve")
334
+ if resolve_idx + 1 < len(path_parts):
335
+ revision = path_parts[resolve_idx + 1]
336
+ filename = "/".join(path_parts[resolve_idx + 2:])
337
+ else:
338
+ # Assume it's a blob link pointing to a file
339
+ filename = path_parts[-1]
340
+
341
+ # Download the file
342
+ local_path = hf_hub_download(
343
+ repo_id=repo_id,
344
+ filename=filename,
345
+ revision=revision,
346
+ cache_dir=directory,
347
+ local_dir=directory,
348
+ local_dir_use_symlinks=False
349
+ )
350
+ downloaded_files.append(local_path)
351
 
352
+ # Unzip if needed
353
+ for f in self.find_files(directory, (".zip",)):
354
+ self.unzip_in_folder(f, directory)
355
 
356
+ model, index = self.find_model_and_index(directory)
 
 
357
 
358
+ if not model:
359
+ raise ValueError("❌ .pth model file not found in downloaded content.")
360
+
361
+ gr.Info(f"✅ Model loaded: {os.path.basename(model)}")
362
+ if index:
363
+ gr.Info(f"📌 Index loaded: {os.path.basename(index)}")
 
 
 
 
 
 
 
 
 
 
364
  else:
365
+ gr.Warning("⚠️ Index file not found conversion may be less accurate.")
 
 
 
 
 
 
 
 
 
 
 
366
 
367
+ # Schedule cleanup
368
+ self.clear_directory_later(directory, delay=30)
369
+
370
+ # Add to recent models
371
+ self.add_recent_model(os.path.abspath(model))
372
 
373
+ return os.path.abspath(model), os.path.abspath(index) if index else None
374
 
375
+ except Exception as e:
376
+ shutil.rmtree(directory, ignore_errors=True)
377
+ logger.error(f"Download failed: {e}")
378
+ raise gr.Error(f"❌ Download failed: {str(e)}")
379
+
380
+ # Audio Processing Class
381
+ class AudioProcessor:
382
+ """Handles audio processing tasks like noise reduction and effects."""
383
+
384
+ @staticmethod
385
+ def apply_noisereduce(audio_paths: List[str]) -> List[str]:
386
+ """Apply noise reduction to audio files."""
387
+ results = []
388
+ for path in audio_paths:
389
+ out_path = f"{os.path.splitext(path)[0]}_denoised.wav"
390
+ try:
391
+ audio = AudioSegment.from_file(path)
392
+ samples = np.array(audio.get_array_of_samples())
393
+ sr = audio.frame_rate
394
+ reduced = nr.reduce_noise(y=samples.astype(np.float32), sr=sr, prop_decrease=0.6)
395
+ reduced_audio = AudioSegment(
396
+ reduced.tobytes(),
397
+ frame_rate=sr,
398
+ sample_width=audio.sample_width,
399
+ channels=audio.channels
400
+ )
401
+ reduced_audio.export(out_path, format="wav")
402
+ results.append(out_path)
403
+ gr.Info("🔊 Noise reduction applied.")
404
+ except Exception as e:
405
+ logger.error(f"Noise reduction failed: {e}")
406
+ results.append(path)
407
+ return results
408
+
409
+ @staticmethod
410
+ def apply_audio_effects(audio_paths: List[str]) -> List[str]:
411
+ """Apply audio effects to audio files."""
412
+ results = []
413
+ board = Pedalboard([
414
+ HighpassFilter(cutoff_frequency_hz=80),
415
+ Compressor(ratio=4, threshold_db=-15),
416
+ Reverb(room_size=0.15, damping=0.7, wet_level=0.15, dry_level=0.85)
417
+ ])
418
+ for path in audio_paths:
419
+ out_path = f"{os.path.splitext(path)[0]}_reverb.wav"
420
+ try:
421
+ with AudioFile(path) as f:
422
+ with AudioFile(out_path, 'w', f.samplerate, f.num_channels) as o:
423
+ while f.tell() < f.frames:
424
+ chunk = f.read(int(f.samplerate))
425
+ effected = board(chunk, f.samplerate)
426
+ o.write(effected)
427
+ results.append(out_path)
428
+ gr.Info("🎛️ Audio effects applied.")
429
+ except Exception as e:
430
+ logger.error(f"Effects failed: {e}")
431
+ results.append(path)
432
+ return results
433
+
434
+ @staticmethod
435
+ def validate_audio_files(file_paths: List[str]) -> List[str]:
436
+ """Validate that files are supported audio formats."""
437
+ valid_files = []
438
+ for path in file_paths:
439
+ if os.path.splitext(path)[1].lower() in SUPPORTED_AUDIO_FORMATS:
440
+ valid_files.append(path)
441
+ else:
442
+ gr.Warning(f"⚠️ Skipping unsupported file: {os.path.basename(path)}")
443
+ return valid_files
444
+
445
+ # TTS Handler Class
446
+ class TTSHandler:
447
+ """Handles text-to-speech functionality."""
448
+
449
+ @staticmethod
450
+ async def generate_tts(text: str, voice: str, output_path: str):
451
+ """Generate TTS audio from text."""
452
+ communicate = edge_tts.Communicate(text, voice.split("-")[0])
453
+ await communicate.save(output_path)
454
+
455
+ @staticmethod
456
+ def infer_tts(tts_voice: str, tts_text: str, play_tts: bool) -> Tuple[List[str], Optional[str]]:
457
+ """Generate TTS audio with the specified voice."""
458
+ if not tts_text.strip():
459
+ raise ValueError("❌ Text is empty.")
460
+
461
+ folder = f"tts_{random.randint(10000, 99999)}"
462
+ out_dir = os.path.join(OUTPUT_DIR, folder)
463
+ os.makedirs(out_dir, exist_ok=True)
464
+ out_path = os.path.join(out_dir, "tts_output.mp3")
465
 
 
 
 
 
 
 
 
 
 
466
  try:
467
+ asyncio.run(TTSHandler.generate_tts(tts_text, tts_voice, out_path))
468
+ if play_tts:
469
+ return [out_path], out_path
470
+ return [out_path], None
 
 
 
 
 
 
 
 
 
471
  except Exception as e:
472
+ logger.error(f"TTS generation failed: {e}")
473
+ raise gr.Error(f"TTS generation failed: {str(e)}")
474
+
475
+ @staticmethod
476
+ def get_voice_list() -> List[str]:
477
+ """Get list of available TTS voices."""
 
 
 
 
 
 
 
478
  try:
479
+ return sorted(
480
+ [f"{v['ShortName']}-{v['Gender']}" for v in asyncio.run(edge_tts.list_voices())]
481
+ )
 
 
 
 
 
482
  except Exception as e:
483
+ logger.error(f"Failed to get voice list: {e}")
484
+ return ["en-US-JennyNeural-Female"] # Fallback
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
 
486
+ # Main Conversion Function
487
  @spaces.GPU()
488
  def run_conversion(
489
+ audio_files: List[str],
490
+ model_path: str,
491
+ pitch_algo: str,
492
+ pitch_level: int,
493
+ index_path: Optional[str],
494
+ index_rate: float,
495
+ filter_radius: int,
496
+ rms_mix_rate: float,
497
+ protect: float,
498
+ denoise: bool,
499
+ effects: bool,
500
+ model_manager: ModelManager
501
+ ) -> List[str]:
502
+ """Run voice conversion on the provided audio files."""
503
  if not audio_files:
504
  raise ValueError("❌ Please upload at least one audio file.")
505
+
506
+ # Validate audio files
507
+ audio_files = AudioProcessor.validate_audio_files(audio_files)
508
+ if not audio_files:
509
+ raise ValueError("❌ No valid audio files provided.")
510
 
511
  random_tag = f"USER_{random.randint(10000000, 99999999)}"
512
 
513
  # Configure converter
514
+ model_manager.converter.apply_conf(
515
  tag=random_tag,
516
  file_model=model_path,
517
  pitch_algo=pitch_algo,
 
526
 
527
  # Run conversion
528
  try:
529
+ results = model_manager.converter(audio_files, random_tag, overwrite=False, parallel_workers=8)
530
  except Exception as e:
531
+ logger.error(f"Conversion failed: {e}")
532
  raise gr.Error(f"❌ Conversion failed: {str(e)}")
533
 
534
  # Post-processing
535
  if denoise:
536
+ results = AudioProcessor.apply_noisereduce(results)
537
  if effects:
538
+ results = AudioProcessor.apply_audio_effects(results)
539
 
540
  return results
541
 
542
+ # Gradio UI Builder
543
  def create_ui():
544
+ """Create and configure the Gradio UI."""
545
+ # Initialize model manager
546
+ model_manager = ModelManager()
547
+
548
+ with gr.Blocks(theme=orange_red_theme, title="RVC+", fill_width=True, delete_cache=(3200, 3200), css=css) as app:
549
  gr.HTML(title)
550
  gr.HTML(description)
551
 
 
556
  with gr.Column(scale=1):
557
  gr.Markdown("### 🔊 Upload Audio")
558
  audio_input = gr.File(
559
+ label="Audio Files (WAV, MP3, OGG, FLAC, M4A)",
560
  file_count="multiple",
561
  type="filepath"
562
  )
 
565
  model_file = gr.File(label="Upload .pth Model", type="filepath")
566
  index_file = gr.File(label="Upload .index File (Optional)", type="filepath")
567
 
568
+ # Recent models dropdown
569
+ recent_models = gr.Dropdown(
570
+ label="Recent Models",
571
+ choices=model_manager.config["recent_models"],
572
+ value=None,
573
+ interactive=True
574
+ )
575
+ recent_models.change(
576
+ lambda x: x if x else None,
577
+ inputs=[recent_models],
578
+ outputs=[model_file]
579
+ )
580
+
581
  use_url = gr.Checkbox(label="🌐 Download from Hugging Face URL", value=False)
582
 
583
  with gr.Group(visible=False) as url_group:
 
600
  )
601
 
602
  download_btn.click(
603
+ model_manager.download_model,
604
  inputs=[model_url],
605
  outputs=[model_file, index_file]
606
  ).then(
607
+ lambda: gr.update(visible=False), # Hide URL group after download
608
  outputs=[url_group]
609
+ ).then(
610
+ lambda: gr.update(choices=model_manager.config["recent_models"]),
611
+ outputs=[recent_models]
612
  )
613
 
614
  with gr.Column(scale=1):
 
625
 
626
  denoise = gr.Checkbox(False, label="🔇 Denoise Output")
627
  reverb = gr.Checkbox(False, label="🎛️ Add Reverb")
628
+
629
+ # Save settings button
630
+ save_settings_btn = gr.Button("💾 Save as Default", size="sm")
631
+ save_settings_btn.click(
632
+ lambda *args: model_manager.config.update({"default_settings": {
633
+ "pitch_algo": args[0], "pitch_level": args[1], "index_rate": args[2],
634
+ "filter_radius": args[3], "rms_mix_rate": args[4], "protect": args[5],
635
+ "denoise": args[6], "reverb": args[7]
636
+ }}) or model_manager.save_config(),
637
+ inputs=[pitch_algo, pitch_level, index_rate, filter_radius,
638
+ rms_mix_rate, protect, denoise, reverb]
639
+ )
640
 
641
  convert_btn = gr.Button("🚀 Convert Voice", variant="primary", size="lg")
642
  output_files = gr.File(label="✅ Converted Audio", file_count="multiple")
643
+
644
+ # Progress indicator
645
+ progress = gr.Progress()
646
 
647
  convert_btn.click(
648
  run_conversion,
 
658
  protect,
659
  denoise,
660
  reverb,
661
+ gr.State(model_manager) # Pass model manager as state
662
  ],
663
  outputs=output_files,
664
  )
 
667
  with gr.Tab("🗣️ Text-to-Speech", id=1):
668
  gr.Markdown("### Convert text to speech using Edge TTS.")
669
 
670
+ # Get voice list
671
+ tts_voice_list = TTSHandler.get_voice_list()
 
 
 
 
 
672
 
673
  with gr.Row():
674
  with gr.Column(scale=1):
 
677
  label="Text Input",
678
  lines=5
679
  )
680
+ tts_voice = gr.Dropdown(
681
+ tts_voice_list,
682
+ value=tts_voice_list[0] if tts_voice_list else None,
683
+ label="Voice"
684
+ )
685
  tts_play = gr.Checkbox(False, label="🎧 Auto-play audio")
686
  tts_btn = gr.Button("🔊 Generate Speech", variant="secondary")
687
 
 
690
  tts_preview = gr.Audio(label="Preview", visible=False, autoplay=True)
691
 
692
  tts_btn.click(
693
+ TTSHandler.infer_tts,
694
  inputs=[tts_voice, tts_text, tts_play],
695
  outputs=[tts_output_audio, tts_preview],
696
  ).then(
 
698
  inputs=[tts_preview],
699
  outputs=[tts_preview]
700
  )
701
+
702
+ # ============= TAB 3: Settings =============
703
+ with gr.Tab("⚙️ Settings", id=2):
704
+ gr.Markdown("### Application Settings")
705
+
706
+ with gr.Row():
707
+ with gr.Column():
708
+ gr.Markdown("#### Model Management")
709
+ clear_cache_btn = gr.Button("🗑️ Clear Model Cache", variant="secondary")
710
+ clear_cache_btn.click(
711
+ lambda: shutil.rmtree(DOWNLOAD_DIR, ignore_errors=True) or gr.Info("Cache cleared"),
712
+ outputs=[]
713
+ )
714
+
715
+ gr.Markdown("#### Recent Models")
716
+ recent_models_list = gr.DataFrame(
717
+ value=[[model] for model in model_manager.config["recent_models"]],
718
+ headers=["Model Path"],
719
+ datatype=["str"],
720
+ interactive=False
721
+ )
722
+
723
+ with gr.Row():
724
+ with gr.Column():
725
+ gr.Markdown("#### System Information")
726
+ system_info = gr.HTML(
727
+ f"""
728
+ <div>
729
+ <p><strong>Python Version:</strong> {os.sys.version}</p>
730
+ <p><strong>Platform:</strong> {os.sys.platform}</p>
731
+ <p><strong>Download Directory:</strong> {os.path.abspath(DOWNLOAD_DIR)}</p>
732
+ <p><strong>Output Directory:</strong> {os.path.abspath(OUTPUT_DIR)}</p>
733
+ </div>
734
+ """
735
+ )
736
 
737
+ # Examples
738
  gr.Markdown("### 📚 Examples")
739
  gr.Examples(
740
  examples=[
 
746
  index_rate, filter_radius, rms_mix_rate, protect, denoise, reverb
747
  ],
748
  outputs=output_files,
749
+ fn=lambda *args: run_conversion(*args, model_manager),
750
  cache_examples=False,
751
  )
752
 
753
  return app
754
 
755
+ # Launch App
756
  if __name__ == "__main__":
757
  app = create_ui()
758
  app.queue(default_concurrency_limit=10)