File size: 41,567 Bytes
658e790
 
 
 
 
 
966b1a5
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
966b1a5
658e790
 
 
 
 
 
 
 
966b1a5
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
966b1a5
 
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bfebb6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef411bc
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1fba260
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c87f4fd
658e790
 
 
 
 
 
 
 
 
 
9362c34
 
658e790
 
 
 
 
 
ce7990e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9853042
658e790
ce7990e
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
921388c
 
658e790
921388c
 
658e790
326af3b
658e790
921388c
658e790
 
 
 
 
 
e3adaee
 
 
 
658e790
2ab39a8
 
 
 
658e790
e3adaee
77da414
658e790
 
 
 
 
 
 
 
 
 
 
 
e3adaee
 
 
 
 
 
035fdcd
 
 
 
 
 
 
 
 
 
 
e3adaee
 
035fdcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3adaee
ef411bc
035fdcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef411bc
 
 
 
 
 
 
e3adaee
658e790
 
 
e3adaee
658e790
 
 
 
 
 
47f310c
658e790
47f310c
3dfa434
 
 
 
 
 
47f310c
 
 
 
 
 
 
 
658e790
 
 
47f310c
658e790
 
47f310c
 
658e790
 
ef411bc
 
 
 
 
 
658e790
 
 
 
 
 
 
966b1a5
658e790
 
 
 
966b1a5
 
7b364b8
658e790
 
 
8b5e5aa
966b1a5
8b5e5aa
658e790
966b1a5
 
658e790
 
 
 
966b1a5
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77da414
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6cfc4a3
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e48849
658e790
 
 
 
 
 
26a0c39
 
 
 
 
658e790
26a0c39
658e790
26a0c39
658e790
 
 
 
26a0c39
658e790
26a0c39
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1fba260
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b5e5aa
7e55e4f
8b5e5aa
658e790
 
 
6cfc4a3
1fba260
 
6cfc4a3
1fba260
2e48849
658e790
8b5e5aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658e790
 
51c37c6
658e790
 
 
51c37c6
 
 
 
8b5e5aa
51c37c6
 
 
8b5e5aa
51c37c6
658e790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
#!/usr/bin/env python3
"""

MiloMusic - Hugging Face Spaces Version

AI-powered music generation platform optimized for cloud deployment with high-performance configuration.

"""

import multiprocessing
import os
import sys
import subprocess
import tempfile
import gradio as gr
import soundfile as sf
from dataclasses import dataclass, field
from typing import Any
import xxhash
import numpy as np
import spaces
import groq

# Import environment setup for Spaces
def setup_spaces_environment():
    """Setup environment variables and paths for Hugging Face Spaces"""
    # Set HuggingFace cache directory
    os.environ["HF_HOME"] = "/tmp/hf_cache"
    os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache"
    os.environ["HF_HUB_CACHE"] = "/tmp/hf_hub_cache"
    
    # 1.PyTorch CUDA memory optimization 2.็”จPyTorch็š„ๅฏๆ‰ฉๅฑ•ๅ†…ๅญ˜ๆฎตๅˆ†้…, ๆ้ซ˜GPUๅ†…ๅญ˜ไฝฟ็”จๆ•ˆ็އ, ๅ‡ๅฐ‘ๅ†…ๅญ˜็ขŽ็‰‡้—ฎ้ข˜
    os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
    
    # Set temp directory for audio files
    os.environ["TMPDIR"] = "/tmp"
    
    print("๐Ÿš€ Environment setup complete for Spaces")

# Install flash-attn if not already installed
def install_flash_attn() -> bool:
    """Install flash-attn from source with proper compilation flags"""
    try:
        import flash_attn
        print("โœ… flash-attn already installed")
        return True
    except ImportError:
        print("๐Ÿ“ฆ Installing flash-attn from source...")
        try:
            # Install with optimized settings for Spaces
            cmd = [
                sys.executable, "-m", "pip", "install", 
                "--no-build-isolation", 
                "--no-cache-dir",
                "flash-attn",
                "--verbose"
            ]
            
            # Use more parallel jobs for faster compilation in Spaces
            env = os.environ.copy()
            max_jobs = min(4, multiprocessing.cpu_count())  # Utilize more CPU cores
            env["MAX_JOBS"] = str(max_jobs)
            env["NVCC_PREPEND_FLAGS"] = "-ccbin /usr/bin/gcc"
            
            result = subprocess.run(cmd, env=env, capture_output=True, text=True, timeout=1800)  # 30 min timeout
            
            if result.returncode == 0:
                print("โœ… flash-attn installed successfully")
                return True
            else:
                print(f"โŒ flash-attn installation failed: {result.stderr}")
                return False
                
        except subprocess.TimeoutExpired:
            print("โฐ flash-attn installation timed out")
            return False
        except Exception as e:
            print(f"โŒ Error installing flash-attn: {e}")
            return False

# Setup environment first
setup_spaces_environment()

# Download required models for YuEGP inference
def download_required_models():
    """Download required model files at startup"""
    try:
        from download_models import ensure_model_availability
        print("๐Ÿš€ Checking and downloading required models...")
        success = ensure_model_availability()
        if success:
            print("โœ… Model setup completed successfully")
        else:
            print("โš ๏ธ Some models may be missing - continuing with available resources")
        return success
    except ImportError as e:
        print(f"โš ๏ธ Model download script not found: {e}")
        return False
    except Exception as e:
        print(f"โŒ Error during model download: {e}")
        return False

# Download models before other setup
models_ready = download_required_models()

# Install flash-attn if needed
flash_attn_available = install_flash_attn()

# Apply transformers patches for performance optimization
def apply_transformers_patch():
    """

    Apply YuEGP transformers patches for high-performance generation.

    

    This function applies optimized transformers patches that provide:

    - 2x speed improvement for low VRAM profiles

    - 3x speed improvement for Stage 1 generation (16GB+ VRAM)

    - 2x speed improvement for Stage 2 generation (all profiles)

    

    The patches replace two key files in the transformers library:

    - models/llama/modeling_llama.py (LLaMA model optimizations)

    - generation/utils.py (generation utilities optimizations)

    

    Includes smart detection to avoid re-applying patches on restart.

    """
    try:
        import shutil
        import site
        import hashlib
        
        # Define source and target directories
        source_dir = os.path.join(project_root, "YuEGP", "transformers")
        
        # Get the site-packages directory where transformers is installed
        site_packages = site.getsitepackages()
        if not site_packages:
            # Fallback for some environments
            import transformers
            transformers_path = os.path.dirname(transformers.__file__)
            target_base = os.path.dirname(transformers_path)
        else:
            target_base = site_packages[0]
        
        target_dir = os.path.join(target_base, "transformers")
        
        # Check if source patches exist
        if not os.path.exists(source_dir):
            print("โš ๏ธ  YuEGP transformers patches not found, skipping optimization")
            return False
        
        if not os.path.exists(target_dir):
            print("โš ๏ธ  Transformers library not found, skipping patches")
            return False
        
        # Check if patches are already applied by comparing file hashes
        def get_file_hash(filepath):
            """Get MD5 hash of file content"""
            if not os.path.exists(filepath):
                return None
            with open(filepath, 'rb') as f:
                return hashlib.md5(f.read()).hexdigest()
        
        # Key files to check for patch status
        key_patches = [
            "models/llama/modeling_llama.py",
            "generation/utils.py"
        ]
        
        patches_needed = False
        for patch_file in key_patches:
            source_file = os.path.join(source_dir, patch_file)
            target_file = os.path.join(target_dir, patch_file)
            
            if os.path.exists(source_file):
                source_hash = get_file_hash(source_file)
                target_hash = get_file_hash(target_file)
                
                if source_hash != target_hash:
                    patches_needed = True
                    break
        
        if not patches_needed:
            print("โœ… YuEGP transformers patches already applied, skipping re-installation")
            print("  ๐Ÿ“ˆ High-performance optimizations are active:")
            print("    โ€ข Stage 1 generation: 3x faster (16GB+ VRAM)")
            print("    โ€ข Stage 2 generation: 2x faster (all profiles)")
            return True
        
        # Apply patches by copying optimized files
        print("๐Ÿ”ง Applying YuEGP transformers patches for high-performance generation...")
        
        # Copy the patched files, preserving directory structure
        for root, dirs, files in os.walk(source_dir):
            # Calculate relative path from source_dir
            rel_path = os.path.relpath(root, source_dir)
            target_subdir = os.path.join(target_dir, rel_path) if rel_path != '.' else target_dir
            
            # Ensure target subdirectory exists
            os.makedirs(target_subdir, exist_ok=True)
            
            # Copy all Python files in this directory
            for file in files:
                if file.endswith('.py'):
                    src_file = os.path.join(root, file)
                    dst_file = os.path.join(target_subdir, file)
                    
                    shutil.copy2(src_file, dst_file)
                    print(f"  โœ… Patched: {os.path.relpath(dst_file, target_base)}")
        
        print("๐Ÿš€ Transformers patches applied successfully!")
        print("  ๐Ÿ“ˆ Expected performance gains:")
        print("    โ€ข Stage 1 generation: 3x faster (16GB+ VRAM)")
        print("    โ€ข Stage 2 generation: 2x faster (all profiles)")
        return True
        
    except Exception as e:
        print(f"โŒ Error applying transformers patches: {e}")
        print("   Continuing without patches - performance may be reduced")
        return False

# Now import the rest of the dependencies
# Add project root to Python path for imports
project_root = os.path.dirname(os.path.abspath(__file__))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from tools.groq_client import client as groq_client
from openai import OpenAI
from tools.generate_lyrics import generate_structured_lyrics, format_lyrics

# Apply patches after all imports are set up
patch_applied = apply_transformers_patch()

# Import CUDA info after flash-attn setup
import torch
if torch.cuda.is_available():
    print(f"๐ŸŽฎ GPU: {torch.cuda.get_device_name(0)}")
    print(f"๐Ÿ’พ VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")
else:
    print("โš ๏ธ  No CUDA GPU detected")

@dataclass
class AppState:
    """

    Maintains the application state throughout user interactions.

    """
    conversation: list = field(default_factory=list)
    stopped: bool = False
    model_outs: Any = None
    lyrics: str = ""
    genre: str = "pop"
    mood: str = "upbeat"
    theme: str = "love"

def validate_api_keys():
    """Validate required API keys for Spaces deployment"""
    required_keys = ["GROQ_API_KEY",  "GEMINI_API_KEY"]
    missing_keys = []
    
    for key in required_keys:
        if not os.getenv(key):
            missing_keys.append(key)
    
    if missing_keys:
        print(f"โš ๏ธ  Missing API keys: {missing_keys}")
        return False
    
    print("โœ… All API keys validated")
    return True

def validate_file_structure():
    """Validate that required files and directories exist"""
    required_paths = [
        "YuEGP/inference/infer.py",
        "YuEGP/inference/codecmanipulator.py", 
        "YuEGP/inference/mmtokenizer.py",
        "tools/generate_lyrics.py",
        "tools/groq_client.py",
        "schemas/lyrics.py"  # Required for lyrics structure models
    ]
    
    missing_files = []
    for path in required_paths:
        if not os.path.exists(path):
            missing_files.append(path)
    
    if missing_files:
        print(f"โš ๏ธ  Missing required files: {missing_files}")
        return False
    
    print("โœ… All required files found")
    return True

@spaces.GPU(duration=1200) # H200 on ZeroGPU is free for 25mins, for compatibility on A10G large and L40s
def generate_music_spaces(lyrics: str, genre: str, mood: str, progress=gr.Progress()) -> str:
    """

    Generate music using YuE model with high-performance Spaces configuration

    """
    if not lyrics.strip():
        return "Please provide lyrics to generate music."
    
    try:
        progress(0.1, desc="Preparing lyrics...")
        
        # Use lyrics directly (already formatted from chat interface)
        formatted_lyrics = lyrics
        
        # Create temporary files
        with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as genre_file:
            genre_file.write(f"instrumental,{genre},{mood},male vocals")
            genre_file_path = genre_file.name
        
        # Convert lyrics format for YuEGP compatibility
        # YuEGP expects [VERSE], [CHORUS] format, but our AI generates **VERSE**, **CHORUS**
        import re
        
        # Extract only the actual lyrics content, removing AI commentary
        formatted_lyrics_for_yue = formatted_lyrics
        
        # Convert **VERSE 1** to [VERSE], **CHORUS** to [CHORUS], etc.
        formatted_lyrics_for_yue = re.sub(r'\*\*(VERSE\s*\d*)\*\*', r'[\1]', formatted_lyrics_for_yue)
        formatted_lyrics_for_yue = re.sub(r'\*\*(CHORUS)\*\*', r'[\1]', formatted_lyrics_for_yue)
        formatted_lyrics_for_yue = re.sub(r'\*\*(BRIDGE)\*\*', r'[\1]', formatted_lyrics_for_yue)
        formatted_lyrics_for_yue = re.sub(r'\*\*(OUTRO)\*\*', r'[\1]', formatted_lyrics_for_yue)
        
        # Remove AI commentary (lines that don't contain actual lyrics)
        lines = formatted_lyrics_for_yue.split('\n')
        clean_lines = []
        in_song = False
        
        for line in lines:
            line = line.strip()
            # Start collecting from first section marker
            if re.match(r'\[(VERSE|CHORUS|BRIDGE|OUTRO)', line):
                in_song = True
            # Stop at AI commentary
            if in_song and line and not line.startswith('[') and any(phrase in line.lower() for phrase in ['how do you like', 'would you like', 'let me know', 'take a look']):
                break
            if in_song:
                clean_lines.append(line)
        
        formatted_lyrics_for_yue = '\n'.join(clean_lines).strip()
        
        print(f"๐Ÿ› DEBUG - Original lyrics length: {len(formatted_lyrics)}")
        print(f"๐Ÿ› DEBUG - Converted lyrics for YuE: '{formatted_lyrics_for_yue}'")
        print(f"๐Ÿ› DEBUG - Converted lyrics length: {len(formatted_lyrics_for_yue)}")
        
        with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as lyrics_file:
            lyrics_file.write(formatted_lyrics_for_yue)
            lyrics_file_path = lyrics_file.name
        
        progress(0.2, desc="Setting up generation...")
        
        # Generate music with high-performance Spaces configuration
        output_dir = tempfile.mkdtemp()
        
        # High-performance command based on Spaces GPU resources
        # In Spaces, working directory is /app
        infer_script_path = os.path.join(os.getcwd(), "YuEGP", "inference", "infer.py")
        cmd = [
            sys.executable,
            infer_script_path,
            "--cuda_idx", "0",
            "--stage1_model", "m-a-p/YuE-s1-7B-anneal-en-cot",
            "--stage2_model", "m-a-p/YuE-s2-1B-general",
            "--genre_txt", genre_file_path,
            "--lyrics_txt", lyrics_file_path,
            "--run_n_segments", "2",        # Full segments for better quality
            "--stage2_batch_size", "4",     # Higher batch size for speed
            "--output_dir", output_dir,
            "--max_new_tokens", "3000",     # Full token count
            "--profile", "1",               # Highest performance profile
            "--verbose", "3",
            "--rescale",                    # Enable audio rescaling to proper volume
            "--prompt_start_time", "0",
            "--prompt_end_time", "30",      # Full 30-second clips
        ]
        
        # Use flash attention if available, otherwise fallback
        if not flash_attn_available:
            cmd.append("--sdpa")
        
        # More detailed progress updates
        progress(0.1, desc="๐Ÿš€ Initializing models...")
        progress(0.15, desc="๐Ÿ“ Processing lyrics...")
        progress(0.2, desc="๐ŸŽต Starting Stage 1 (7B model generation)...")
        
        # Extract parameters from cmd for logging
        run_n_segments = cmd[cmd.index("--run_n_segments") + 1] if "--run_n_segments" in cmd else "2"
        max_new_tokens = cmd[cmd.index("--max_new_tokens") + 1] if "--max_new_tokens" in cmd else "3000"
        
        print("๐ŸŽต Starting high-quality music generation...")
        print(f"๐Ÿ“Š Generation settings: {run_n_segments} segments, {max_new_tokens} tokens, 30s audio")
        print(f"โฑ๏ธ Estimated time: 8-9 minutes for high-quality generation")
        print(f"Working directory: {os.getcwd()}")
        print(f"Command: {' '.join(cmd)}")
        
        # Change to YuEGP/inference directory for execution
        original_cwd = os.getcwd()
        inference_dir = os.path.join(os.getcwd(), "YuEGP", "inference")
        
        try:
            os.chdir(inference_dir)
            print(f"Changed to inference directory: {inference_dir}")
            cmd[1] = "infer.py"
            
            progress(0.25, desc="๐Ÿ”ฅ Stage 1: Running 7B parameter model...")
            
            # Start the subprocess
            import threading
            import time
            
            def parse_output_and_update_progress(process):
                """Parse subprocess output in real-time and update progress accordingly"""
                stage1_messages = [
                    "๐Ÿง  Stage 1: Generating musical concepts...",
                    "๐ŸŽผ Stage 1: Creating melody patterns...",
                    "๐ŸŽน Stage 1: Composing harmony structure..."
                ]
                stage2_messages = [
                    "โšก Starting Stage 2: Refining with 1B model...",
                    "๐ŸŽต Stage 2: Adding musical details...",
                    "๐ŸŽถ Stage 2: Finalizing composition..."
                ]
                
                stage1_progress = [0.3, 0.45, 0.6]
                stage2_progress = [0.7, 0.8, 0.85]
                
                current_stage = 1
                stage1_step = 0
                stage2_step = 0
                
                output_lines = []
                
                try:
                    while True:
                        line = process.stdout.readline()
                        if not line:
                            break
                            
                        line = line.strip()
                        output_lines.append(line)
                        print(line)  # Still print for debugging
                        
                        # Check for stage transitions based on actual output
                        if "Stage 2 inference..." in line:
                            current_stage = 2
                            stage2_step = 0
                            progress(0.7, desc=stage2_messages[0])
                            print(f"โณ {stage2_messages[0]}")
                        
                        elif "Stage 2 DONE" in line:
                            progress(0.9, desc="๐Ÿ”Š Decoding to audio format...")
                            print("โณ ๐Ÿ”Š Decoding to audio format...")
                        
                        # Update Stage 1 progress periodically
                        elif current_stage == 1 and stage1_step < len(stage1_messages):
                            # Update Stage 1 progress every 15 seconds or on specific markers
                            if stage1_step < len(stage1_progress):
                                progress(stage1_progress[stage1_step], desc=stage1_messages[stage1_step])
                                print(f"โณ {stage1_messages[stage1_step]}")
                                stage1_step += 1
                        
                        # Update Stage 2 progress periodically
                        elif current_stage == 2 and stage2_step < len(stage2_messages) - 1:
                            stage2_step += 1
                            if stage2_step < len(stage2_progress):
                                progress(stage2_progress[stage2_step], desc=stage2_messages[stage2_step])
                                print(f"โณ {stage2_messages[stage2_step]}")
                
                except Exception as e:
                    print(f"Progress parsing error: {e}")
                
                return '\n'.join(output_lines)
            
            print(f"๐Ÿš€ Executing command: {' '.join(cmd)}")
            
            # Use Popen for real-time output processing
            process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, 
                                     text=True, bufsize=1, universal_newlines=True)
            
            # Parse output in real-time
            stdout_output = parse_output_and_update_progress(process)
            
            # Wait for process to complete and get return code
            return_code = process.wait()
            
            # Create result object similar to subprocess.run
            class Result:
                def __init__(self, returncode, stdout, stderr=""):
                    self.returncode = returncode
                    self.stdout = stdout
                    self.stderr = stderr
            
            result = Result(return_code, stdout_output)
            
            # Print stdout and stderr for debugging
            if result.stdout:
                print(f"โœ… Command output:\n{result.stdout}")
            if result.stderr:
                print(f"โš ๏ธ Command stderr:\n{result.stderr}")
            print(f"๐Ÿ“Š Return code: {result.returncode}")
            
        finally:
            os.chdir(original_cwd)
        
        progress(0.95, desc="๐ŸŽ‰ Processing completed, finalizing output...")
        
        # Clean up input files
        os.unlink(genre_file_path)
        os.unlink(lyrics_file_path)
        
        if result.returncode == 0:
            # Find generated audio file - prioritize mixed audio from vocoder/mix directory
            import glob

            final_files = glob.glob(os.path.join(output_dir, "*_mixed.mp3"))
            if final_files:
                progress(1.0, desc="Finish music generation")
                print(f"โœ… Found audio file at root: {final_files[0]}")
                return final_files[0]

            # First look for the final mixed audio in vocoder/mix
            mixed_files = glob.glob(os.path.join(output_dir, "vocoder/mix/*_mixed.mp3"))
            if mixed_files:
                progress(1.0, desc="Music generation complete!")
                print(f"โœ… Found mixed audio file: {mixed_files[0]}")
                return mixed_files[0]

            # Fallback to any MP3 file
            audio_files = glob.glob(os.path.join(output_dir, "**/*.mp3"), recursive=True)
            if audio_files:
                progress(1.0, desc="Music generation complete!")
                print(f"โœ… Found audio file: {audio_files[0]}")
                return audio_files[0]  # Return path to generated audio
            else:
                print(f"โŒ No audio files found in {output_dir}")
                print(f"Directory contents: {os.listdir(output_dir) if os.path.exists(output_dir) else 'Directory not found'}")
                return "Music generation completed but no audio file found."
        else:
            error_msg = f"Return code: {result.returncode}\n"
            if result.stderr:
                error_msg += f"Error: {result.stderr[-1000:]}\n"
            if result.stdout:
                error_msg += f"Output: {result.stdout[-1000:]}"
            return f"Music generation failed:\n{error_msg}"
            
    except subprocess.TimeoutExpired:
        return "Music generation timed out after 20 minutes. Please try again."
    except Exception as e:
        return f"Error during music generation: {str(e)}"

def respond(message, state):
    """Enhanced response function for conversational lyrics generation"""
    try:
        # Add user message to conversation
        state.conversation.append({"role": "user", "content": message})
        
        # Use conversational generation logic (same as voice input)
        response = generate_chat_completion(groq_client, state.conversation, state.genre, state.mood, state.theme)
        
        # Add assistant response
        state.conversation.append({"role": "assistant", "content": response})
        
        # Update lyrics with improved format recognition - extract only segments
        if any(marker in response.lower() for marker in ["[verse", "[chorus", "[bridge", "**verse", "**chorus", "sectiontype.verse", "verse:"]):
            state.lyrics = extract_lyrics_segments_only(response)
        
        # Format conversation for display
        return "", [{"role": msg["role"], "content": msg["content"]} for msg in state.conversation], state
        
    except Exception as e:
        error_response = f"Sorry, I encountered an error: {str(e)}"
        state.conversation.append({"role": "assistant", "content": error_response})
        return "", [{"role": msg["role"], "content": msg["content"]} for msg in state.conversation], state

def build_interface():
    """Build the Gradio interface optimized for Spaces with high performance"""
    
    with gr.Blocks(
        title="MiloMusic - AI Music Generation", 
        theme=gr.themes.Soft(),
        css="""

        .container { max-width: 1400px; margin: auto; }

        .performance-notice { background-color: #d4edda; padding: 15px; border-radius: 5px; margin: 10px 0; }

        .generation-status { background-color: #f8f9fa; padding: 10px; border-radius: 5px; }

        """
    ) as demo:
        
        # Header
        gr.Markdown("""

        # ๐ŸŽต MiloMusic - AI Music Generation

        ### Professional AI-powered music creation from natural language

        """)
        
        # Performance notice for Spaces
        gr.Markdown("""

        <div class="performance-notice">

        ๐Ÿš€ <strong>High-Performance Mode:</strong> Running on Spaces GPU with optimized settings for best quality.

        Generation time: ~8-9 minutes for professional-grade music with vocals and instruments.

        </div>

        """)
        
        state = gr.State(AppState())
        
        with gr.Row():
            with gr.Column(scale=2):
                # Input controls
                with gr.Group():
                    gr.Markdown("### ๐ŸŽ›๏ธ Music Settings")
                    with gr.Row():
                        genre = gr.Dropdown(
                            choices=["pop", "rock", "jazz", "classical", "electronic", "folk", "r&b", "country", "hip-hop"],
                            value="pop", label="Genre"
                        )
                        mood = gr.Dropdown(
                            choices=["upbeat", "melancholic", "energetic", "calm", "romantic", "dark", "mysterious", "joyful"],
                            value="upbeat", label="Mood"
                        )
                        theme = gr.Dropdown(
                            choices=["love", "friendship", "adventure", "nostalgia", "freedom", "hope", "dreams", "nature"],
                            value="love", label="Theme"
                        )
                
                # Voice Input
                with gr.Group():
                    gr.Markdown("### ๐ŸŽค Voice Input")
                    input_audio = gr.Audio(
                        label="Speak Your Musical Ideas",
                        sources=["microphone"],
                        type="numpy",
                        streaming=False,
                        waveform_options=gr.WaveformOptions(waveform_color="#B83A4B"),
                    )
                
                # Chat interface
                with gr.Group():
                    gr.Markdown("### ๐Ÿ’ฌ Lyrics Creation Chat")
                    chatbot = gr.Chatbot(height=400, label="AI Lyrics Assistant", show_copy_button=True, type="messages")
                    
                    with gr.Row():
                        text_input = gr.Textbox(
                            placeholder="Or type your song idea here...", 
                            show_label=False, 
                            scale=4,
                            lines=2
                        )
                        send_btn = gr.Button("Send", scale=1, variant="primary")
            
            with gr.Column(scale=1):
                # Output controls
                with gr.Group():
                    gr.Markdown("### ๐ŸŽต Music Generation")
                    lyrics_display = gr.Textbox(
                        label="Current Lyrics", 
                        lines=12, 
                        interactive=True,
                        placeholder="Your generated lyrics will appear here..."
                    )
                    
                    generate_btn = gr.Button("๐ŸŽผ Generate High-Quality Music", variant="primary", size="lg")
                    
                    with gr.Column():
                        music_output = gr.Audio(label="Generated Music", type="filepath", show_download_button=True)
                        
                        gr.Markdown("""

                        <div class="generation-status">

                        <strong>Generation Features:</strong><br>

                        โ€ข Full 30-second clips<br>

                        โ€ข Professional vocals<br>

                        โ€ข Rich instrumentation<br>

                        โ€ข High-fidelity audio

                        </div>

                        """)
                
                # Controls
                with gr.Group():
                    gr.Markdown("### ๐Ÿ”ง Controls")
                    new_song_btn = gr.Button("๐Ÿ†• Start New Song")
                    clear_btn = gr.Button("๐Ÿงน Clear Chat")
        
        # Event handlers
        def update_state_settings(genre_val, mood_val, theme_val, state):
            state.genre = genre_val
            state.mood = mood_val  
            state.theme = theme_val
            return state
        
        # Update state when settings change
        for component in [genre, mood, theme]:
            component.change(
                fn=update_state_settings,
                inputs=[genre, mood, theme, state],
                outputs=[state]
            )
        
        # Voice recording functionality (from app.py)
        stream = input_audio.start_recording(
            process_audio,
            [input_audio, state],
            [input_audio, state],
        )

        respond_audio = input_audio.stop_recording(
            response_audio, [state, input_audio, genre, mood, theme], [state, chatbot, lyrics_display]
        )

        restart = respond_audio.then(start_recording_user, [state], [input_audio]).then(
            lambda state: state, state, state, js=js_reset
        )
        
        # Text chat functionality with lyrics update
        def respond_with_lyrics_update(message, state):
            text_output, chat_output, updated_state = respond(message, state)
            return text_output, chat_output, updated_state, updated_state.lyrics
        
        send_btn.click(
            fn=respond_with_lyrics_update,
            inputs=[text_input, state],
            outputs=[text_input, chatbot, state, lyrics_display],
            queue=True
        )
        
        text_input.submit(
            fn=respond_with_lyrics_update,
            inputs=[text_input, state],
            outputs=[text_input, chatbot, state, lyrics_display],
            queue=True
        )
        
        # Music generation with progress
        generate_btn.click(
            fn=generate_music_spaces,
            inputs=[lyrics_display, genre, mood],
            outputs=[music_output],
            queue=True,
            show_progress=True
        )
        
        # Control buttons
        new_song_btn.click(
            fn=lambda: (AppState(), [], "", None, gr.Audio(recording=False)),
            outputs=[state, chatbot, lyrics_display, music_output, input_audio],
            cancels=[respond_audio, restart]
        )
        
        clear_btn.click(
            fn=lambda: [],
            outputs=[chatbot]
        )
        
        # Auto-update lyrics display when state changes
        state.change(
            fn=lambda s: s.lyrics,
            inputs=[state],
            outputs=[lyrics_display]
        )
        
        # Instructions
        gr.Markdown("""

        ### ๐Ÿ“– How to create your music:

        1. **Set your preferences**: Choose genre, mood, and theme

        2. **Voice or chat**: Either speak your ideas or type them in the chat

        3. **Refine the lyrics**: Ask for changes, different verses, or style adjustments  

        4. **Generate music**: Click the generate button for professional-quality output

        5. **Download & enjoy**: Your high-fidelity music with vocals and instruments

        

        **Tips**: Be specific about your vision - mention instruments, vocal style, or song structure!

        """)
        
        # Footer
        gr.Markdown("""

        ---

        <center>

        Made with โค๏ธ by the MiloMusic Team | Powered by YuE (ไน) Model | ๐Ÿค— Hugging Face Spaces

        </center>

        """)
    
    return demo

# Audio transcription functions (from app.py)
def process_whisper_response(completion):
    """

    Process Whisper transcription response and filter out silence.

    """
    if completion.segments and len(completion.segments) > 0:
        no_speech_prob = completion.segments[0].get('no_speech_prob', 0)
        print("No speech prob:", no_speech_prob)

        if no_speech_prob > 0.7:
            print("No speech detected")
            return None

        return completion.text.strip()

    return None

def transcribe_audio(client, file_name):
    """

    Transcribe an audio file using the Whisper model via the Groq API.

    """
    if file_name is None:
        return None

    try:
        with open(file_name, "rb") as audio_file:
            with open("audio.wav", "wb") as f:
                f.write(audio_file.read())

            response = client.audio.transcriptions.create(
                model="whisper-large-v3-turbo",
                file=("audio.wav", audio_file),
                response_format="text",
                language="en",
            )
            
            # Process the response to filter out silence
            # For text response format, we need to check if response is meaningful
            if response and len(response.strip()) > 0:
                return response.strip()
            else:
                return None
                
    except Exception as e:
        print(f"Transcription error: {e}")
        return f"Error in audio transcription: {str(e)}"

def start_recording_user(state: AppState):
    """

    Reset the audio recording component for a new user input.

    """
    return None

def process_audio(audio: tuple, state: AppState):
    """

    Process recorded audio in real-time during recording.

    """
    return audio, state

@spaces.GPU(duration=40, progress=gr.Progress(track_tqdm=True))
def response_audio(state: AppState, audio: tuple, genre_value, mood_value, theme_value):
    """

    Process recorded audio and generate a response based on transcription.

    """
    if not audio:
        return state, []

    # Update state with current dropdown values
    state.genre, state.mood, state.theme = genre_value, mood_value, theme_value

    temp_dir = tempfile.gettempdir()
    file_name = os.path.join(temp_dir, f"{xxhash.xxh32(bytes(audio[1])).hexdigest()}.wav")

    sf.write(file_name, audio[1], audio[0], format="wav")

    api_key = os.environ.get("GROQ_API_KEY")
    if not api_key:
        raise ValueError("Please set the GROQ_API_KEY environment variable.")
    client = groq.Client(api_key=api_key)

    # Transcribe the audio file
    transcription = transcribe_audio(client, file_name)
    if transcription:
        if isinstance(transcription, str) and transcription.startswith("Error"):
            transcription = "Error in audio transcription."

        state.conversation.append({"role": "user", "content": transcription})

        assistant_message = generate_chat_completion(client, state.conversation, state.genre, state.mood, state.theme)

        state.conversation.append({"role": "assistant", "content": assistant_message})

        # Update lyrics using same logic as text input for consistency - extract only segments
        if any(marker in assistant_message.lower() for marker in ["[verse", "[chorus", "[bridge", "**verse", "**chorus", "sectiontype.verse", "verse:"]):
            state.lyrics = extract_lyrics_segments_only(assistant_message)

        os.remove(file_name)

    # Format conversation for display in messages format
    conversation_display = []
    for msg in state.conversation:
        conversation_display.append({"role": msg["role"], "content": msg["content"]})
    
    return state, conversation_display, state.lyrics

def extract_lyrics_segments_only(content):
    """

    Extract only the lyrics segments (VERSE, CHORUS, etc.) from AI response,

    removing any AI commentary or explanation text.

    """
    import re

    if not content:
        return ""

    lines = content.split('\n')
    lyrics_lines = []
    in_lyrics_section = False

    for line in lines:
        line = line.strip()

        # Check if this line is a section header (VERSE, CHORUS, etc.)
        if re.match(r'^\*\*(VERSE|CHORUS|BRIDGE|OUTRO).*\*\*$', line) or re.match(r'^\[(VERSE|CHORUS|BRIDGE|OUTRO).*\]$', line):
            in_lyrics_section = True
            lyrics_lines.append(line)
            continue

        # If we're in a lyrics section
        if in_lyrics_section:
            # Stop if we hit AI commentary
            if line and any(phrase in line.lower() for phrase in [
                'how do you like', 'would you like', 'let me know',
                'what do you think', 'any changes', 'take a look',
                'here are the lyrics', 'i\'ve created', 'feel free to'
            ]):
                break

            # Add lyrics line (including empty lines for formatting)
            lyrics_lines.append(line)

    return '\n'.join(lyrics_lines).strip()

def extract_lyrics_from_conversation(conversation):
    """

    Extract lyrics from conversation history with cross-platform compatibility.

    """
    lyrics = ""
    for message in reversed(conversation):
        if message["role"] == "assistant":
            content_lower = message["content"].lower()
            # ๅ…ˆๅฐ่ฏ•ไธฅๆ ผๅŒน้…๏ผˆไฟๆŒๅŽŸ้€ป่พ‘๏ผ‰
            if "verse" in content_lower and "chorus" in content_lower:
                lyrics = extract_lyrics_segments_only(message["content"])
                break
            # ๅฆ‚ๆžœๆฒกๆ‰พๅˆฐ๏ผŒๅ†็”จๅฎฝๆณ›ๅŒน้…๏ผˆๅ…ผๅฎนๆ€งๅค‡้€‰๏ผ‰
            elif any(marker in content_lower for marker in ["[verse", "[chorus", "**verse", "**chorus"]):
                lyrics = extract_lyrics_segments_only(message["content"])
                break
    return lyrics

def generate_chat_completion(client, history, genre, mood, theme):
    """

    Generate an AI assistant response based on conversation history and song parameters.

    """
    messages = []
    system_prompt = f"""You are a creative AI music generator assistant. Help users create song lyrics in the {genre} genre with a {mood} mood about {theme}.

When generating lyrics, create a chorus and at least one verse. Format lyrics clearly with VERSE and CHORUS labels.

Ask if they like the lyrics or want changes. Be conversational, friendly, and creative.

Keep the lyrics appropriate for the selected genre, mood, and theme unless the user specifically requests changes."""

    messages.append({
        "role": "system",
        "content": system_prompt,
    })

    for message in history:
        messages.append(message)

    try:
        completion = client.chat.completions.create(
            model="meta-llama/llama-4-scout-17b-16e-instruct",
            messages=messages,
        )
        return completion.choices[0].message.content
    except Exception as e:
        return f"Error in generating chat completion: {str(e)}"

# JavaScript for frontend enhancements
js_reset = """

() => {

  var record = document.querySelector('.record-button');

  if (record) {

    record.textContent = "Just Start Talking!"

    record.style = "width: fit-content; padding-right: 0.5vw;"

  }

}

"""

# Build the interface
demo = build_interface()

if __name__ == "__main__":
    """

    Spaces entry point - optimized for high-performance deployment

    """
    print("๐Ÿš€ Starting MiloMusic High-Performance Mode on Hugging Face Spaces...")
    print(f"๐Ÿ“ Working directory: {os.getcwd()}")
    print(f"๐Ÿ“‚ Directory contents: {os.listdir('.')}")
    
    # Validate file structure
    if not validate_file_structure():
        print("โŒ Required files missing - please check your upload")
        sys.exit(1)
    
    # Validate environment
    if not validate_api_keys():
        print("โš ๏ธ  Some API keys missing - functionality may be limited")
    
    # Launch with optimized settings for Spaces
    demo.queue(
        default_concurrency_limit=5,  # Allow more concurrent users
        max_size=20
    ).launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,  # Spaces handles sharing
        show_error=True,
        quiet=False,
        favicon_path=None,
        ssl_verify=False
    )