MelodyFlowtry2production

Paused

App Files Files Community

Khushi Dahiya commited on Oct 2

Commit

f568365

1 Parent(s): af4f55f

updating predict to handle batch processing oof concurrent requests

Browse files

Files changed (2) hide show

demos/melodyflow_app.py +314 -50
requirements.txt +1 -0

demos/melodyflow_app.py CHANGED Viewed

@@ -16,6 +16,11 @@ import time
 import typing as tp
 import warnings
 import base64
 import torch
 import gradio as gr
@@ -26,6 +31,11 @@ from audiocraft.models import MelodyFlow
 MODEL = None  # Last used model
 SPACE_ID = os.environ.get('SPACE_ID', '')
 MODEL_PREFIX = os.environ.get('MODEL_PREFIX', 'facebook/')
 IS_HF_SPACE = (MODEL_PREFIX + "MelodyFlow") in SPACE_ID
@@ -68,6 +78,220 @@ class FileCleaner:
 file_cleaner = FileCleaner()
 def make_waveform(*args, **kwargs):
     # Further remove some warnings.
     be = time.time()
@@ -80,14 +304,16 @@ def make_waveform(*args, **kwargs):
 def load_model(version=(MODEL_PREFIX + "melodyflow-t24-30secs")):
     global MODEL
-    print("Loading model", version)
-    if MODEL is None or MODEL.name != version:
-        # Clear PyTorch CUDA cache and delete model
-        del MODEL
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        MODEL = None  # in case loading would crash
-        MODEL = MelodyFlow.get_pretrained(version)
 def _do_predictions(texts,
@@ -153,24 +379,32 @@ def _do_predictions(texts,
     return out_wavs
-@spaces.GPU(duration=30)
 def predict(model, text,
-                 solver, steps, target_flowstep,
-                 regularize,
-                 regularization_strength,
-                 duration,
-                 melody=None,
-                 model_path=None,
-                 progress=gr.Progress()):
     if melody is not None:
         if solver == MIDPOINT:
             steps = steps//2
         else:
             steps = steps//5
-    global INTERRUPTING
     INTERRUPTING = False
-    progress(0, desc="Loading model...")
     if model_path:
         model_path = model_path.strip()
         if not Path(model_path).exists():
@@ -180,40 +414,51 @@ def predict(model, text,
                            "state_dict.bin and compression_state_dict_.bin.")
         model = model_path
-    load_model(model)
-    max_generated = 0
-    def _progress(generated, to_generate):
-        nonlocal max_generated
-        max_generated = max(generated, max_generated)
-        progress((min(max_generated, to_generate), to_generate))
         if INTERRUPTING:
             raise gr.Error("Interrupted.")
-    MODEL.set_custom_progress_callback(_progress)
-    wavs = _do_predictions(
-        [text] * N_REPEATS, [melody],
-        solver=solver,
-        steps=steps,
-        target_flowstep=target_flowstep,
-        regularize=regularize,
-        regularization_strength=regularization_strength,
-        duration=duration,
-        progress=True,)
-    # Read the audio file and convert to base64
-    wav_path = wavs[0]
-    with open(wav_path, 'rb') as f:
-        audio_bytes = f.read()
-    audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
-    # Return as a dictionary with base64 data
-    return {
-        "audio": audio_base64,
-        "format": "wav"
-    }
 def toggle_audio_src(choice):
@@ -353,7 +598,11 @@ def ui_local(launch_kwargs):
             """
         )
-        interface.queue().launch(**launch_kwargs)
 def ui_hf(launch_kwargs):
     with gr.Blocks() as interface:
@@ -470,7 +719,19 @@ def ui_hf(launch_kwargs):
         for more details.
         """)
-        interface.queue().launch(**launch_kwargs)
 if __name__ == "__main__":
@@ -514,6 +775,9 @@ if __name__ == "__main__":
     if args.share:
         launch_kwargs['share'] = args.share
     logging.basicConfig(level=logging.INFO, stream=sys.stderr)
     # Show the interface

 import typing as tp
 import warnings
 import base64
+import asyncio
+import threading
+from concurrent.futures import ThreadPoolExecutor
+from queue import Queue, Empty
+import uuid
 import torch
 import gradio as gr
 MODEL = None  # Last used model
+MODEL_LOCK = threading.Lock()  # Thread lock for model access
+REQUEST_QUEUE = Queue()  # Queue for batch processing
+BATCH_PROCESSOR = None  # Background batch processor
+BATCH_SIZE = 4  # Maximum batch size for concurrent processing
+BATCH_TIMEOUT = 2.0  # Maximum wait time to form a batch (seconds)
 SPACE_ID = os.environ.get('SPACE_ID', '')
 MODEL_PREFIX = os.environ.get('MODEL_PREFIX', 'facebook/')
 IS_HF_SPACE = (MODEL_PREFIX + "MelodyFlow") in SPACE_ID
 file_cleaner = FileCleaner()
+class RequestBatch:
+    """Represents a batch of requests to process together"""
+    def __init__(self):
+        self.requests = []
+        self.futures = []
+        self.created_at = time.time()
+    def add_request(self, request_data, future):
+        self.requests.append(request_data)
+        self.futures.append(future)
+    def is_full(self):
+        return len(self.requests) >= BATCH_SIZE
+    def is_expired(self):
+        return time.time() - self.created_at > BATCH_TIMEOUT
+    def should_process(self):
+        return self.is_full() or self.is_expired() or len(self.requests) > 0
+class BatchProcessor:
+    """Handles batched processing of requests"""
+    def __init__(self):
+        self.current_batch = RequestBatch()
+        self.processing = False
+        self.stop_event = threading.Event()
+    def start(self):
+        """Start the background batch processing thread"""
+        self.thread = threading.Thread(target=self._process_loop, daemon=True)
+        self.thread.start()
+    def stop(self):
+        """Stop the background batch processing"""
+        self.stop_event.set()
+    def add_request(self, request_data):
+        """Add a request to the batch and return a future for the result"""
+        from concurrent.futures import Future
+        future = Future()
+        # Add to current batch
+        self.current_batch.add_request(request_data, future)
+        # Signal that we have a new request
+        REQUEST_QUEUE.put("new_request")
+        return future
+    def _process_loop(self):
+        """Main processing loop that runs in background thread"""
+        while not self.stop_event.is_set():
+            try:
+                # Wait for a signal or timeout
+                REQUEST_QUEUE.get(timeout=0.5)
+                # Check if we should process current batch
+                if self.current_batch.should_process() and not self.processing:
+                    self._process_current_batch()
+            except Empty:
+                # Timeout - check if we have an expired batch
+                if self.current_batch.should_process() and not self.processing:
+                    self._process_current_batch()
+                continue
+            except Exception as e:
+                print(f"Error in batch processing loop: {e}")
+    @spaces.GPU(duration=45)  # Increased duration for batch processing
+    def _process_current_batch(self):
+        """Process the current batch of requests"""
+        if len(self.current_batch.requests) == 0:
+            return
+        self.processing = True
+        batch = self.current_batch
+        self.current_batch = RequestBatch()  # Start new batch
+        try:
+            # Extract batch data
+            texts = []
+            melodies = []
+            params_list = []
+            for request_data in batch.requests:
+                texts.append(request_data['text'])
+                melodies.append(request_data['melody'])
+                params_list.append({
+                    'solver': request_data['solver'],
+                    'steps': request_data['steps'],
+                    'target_flowstep': request_data['target_flowstep'],
+                    'regularize': request_data['regularize'],
+                    'regularization_strength': request_data['regularization_strength'],
+                    'duration': request_data['duration'],
+                    'model': request_data['model']
+                })
+            # Load model if needed (use the first request's model)
+            model_version = params_list[0]['model']
+            load_model(model_version)
+            # Process batch with unified parameters (use first request's params)
+            params = params_list[0]
+            results = _do_predictions_batch(
+                texts=texts,
+                melodies=melodies,
+                solver=params['solver'],
+                steps=params['steps'],
+                target_flowstep=params['target_flowstep'],
+                regularize=params['regularize'],
+                regularization_strength=params['regularization_strength'],
+                duration=params['duration'],
+                progress=False
+            )
+            # Set results for each future
+            for i, future in enumerate(batch.futures):
+                if i < len(results):
+                    future.set_result(results[i])
+                else:
+                    future.set_exception(Exception("Batch processing failed"))
+        except Exception as e:
+            # Set exception for all futures in batch
+            for future in batch.futures:
+                future.set_exception(e)
+        finally:
+            self.processing = False
+def _do_predictions_batch(texts, melodies, solver, steps, target_flowstep,
+                         regularize, regularization_strength, duration, progress=False):
+    """Process a batch of predictions efficiently"""
+    with MODEL_LOCK:
+        MODEL.set_generation_params(solver=solver, steps=steps, duration=duration)
+        MODEL.set_editing_params(
+            solver=solver,
+            steps=steps,
+            target_flowstep=target_flowstep,
+            regularize=regularize,
+            lambda_kl=regularization_strength
+        )
+        print(f"Processing batch: {len(texts)} requests")
+        be = time.time()
+        processed_melodies = []
+        target_sr = 48000
+        target_ac = 2
+        for melody in melodies:
+            if melody is None:
+                processed_melodies.append(None)
+            else:
+                melody, sr = audio_read(melody)
+                if melody.dim() == 2:
+                    melody = melody[None]
+                if melody.shape[-1] > int(sr * MODEL.duration):
+                    melody = melody[..., :int(sr * MODEL.duration)]
+                melody = convert_audio(melody, sr, target_sr, target_ac)
+                melody = MODEL.encode_audio(melody.to(MODEL.device))
+                processed_melodies.append(melody)
+        try:
+            # Process all requests in the batch together
+            if any(m is not None for m in processed_melodies):
+                # For editing mode, process each request individually due to melody constraints
+                outputs_list = []
+                for i, (text, melody) in enumerate(zip(texts, processed_melodies)):
+                    if melody is not None:
+                        output = MODEL.edit(
+                            prompt_tokens=melody.repeat(1, 1, 1),
+                            descriptions=[text],
+                            src_descriptions=[""],
+                            progress=progress,
+                            return_tokens=False,
+                        )
+                    else:
+                        output = MODEL.generate([text], progress=progress, return_tokens=False)
+                    outputs_list.append(output[0])
+                outputs = torch.stack(outputs_list)
+            else:
+                # For generation mode, we can batch all requests
+                outputs = MODEL.generate(texts, progress=progress, return_tokens=False)
+        except RuntimeError as e:
+            raise gr.Error("Error while generating " + e.args[0])
+        outputs = outputs.detach().cpu().float()
+        results = []
+        for output in outputs:
+            with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
+                audio_write(
+                    file.name, output, MODEL.sample_rate, strategy="loudness",
+                    loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
+                # Read and encode audio
+                with open(file.name, 'rb') as f:
+                    audio_bytes = f.read()
+                audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
+                results.append({
+                    "audio": audio_base64,
+                    "format": "wav"
+                })
+                file_cleaner.add(file.name)
+        print(f"Batch finished: {len(texts)} requests in {time.time() - be:.2f}s")
+        return results
 def make_waveform(*args, **kwargs):
     # Further remove some warnings.
     be = time.time()
 def load_model(version=(MODEL_PREFIX + "melodyflow-t24-30secs")):
     global MODEL
+    with MODEL_LOCK:
+        print("Loading model", version)
+        if MODEL is None or MODEL.name != version:
+            # Clear PyTorch CUDA cache and delete model
+            del MODEL
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            MODEL = None  # in case loading would crash
+            MODEL = MelodyFlow.get_pretrained(version)
+            print(f"Model {version} loaded successfully")
 def _do_predictions(texts,
     return out_wavs
 def predict(model, text,
+             solver, steps, target_flowstep,
+             regularize,
+             regularization_strength,
+             duration,
+             melody=None,
+             model_path=None,
+             progress=gr.Progress()):
+    """Non-blocking predict function that uses batch processing"""
     if melody is not None:
         if solver == MIDPOINT:
             steps = steps//2
         else:
             steps = steps//5
+    global INTERRUPTING, BATCH_PROCESSOR
     INTERRUPTING = False
+    # Initialize batch processor if not already running
+    if BATCH_PROCESSOR is None:
+        BATCH_PROCESSOR = BatchProcessor()
+        BATCH_PROCESSOR.start()
+    progress(0, desc="Queuing request...")
     if model_path:
         model_path = model_path.strip()
         if not Path(model_path).exists():
                            "state_dict.bin and compression_state_dict_.bin.")
         model = model_path
+    # Prepare request data
+    request_data = {
+        'text': text,
+        'melody': melody,
+        'solver': solver,
+        'steps': steps,
+        'target_flowstep': target_flowstep,
+        'regularize': regularize,
+        'regularization_strength': regularization_strength,
+        'duration': duration,
+        'model': model,
+        'request_id': str(uuid.uuid4())
+    }
+    # Add to batch processor
+    future = BATCH_PROCESSOR.add_request(request_data)
+    progress(0.3, desc="Waiting for GPU...")
+    # Wait for result with progress updates
+    max_wait = 60  # Maximum wait time in seconds
+    wait_start = time.time()
+    while not future.done():
+        elapsed = time.time() - wait_start
+        if elapsed > max_wait:
+            raise gr.Error("Request timeout")
+        # Update progress based on wait time
+        progress_val = min(0.9, 0.3 + (elapsed / max_wait) * 0.6)
+        progress(progress_val, desc="Processing...")
         if INTERRUPTING:
             raise gr.Error("Interrupted.")
+        time.sleep(0.1)
+    progress(1.0, desc="Complete!")
+    # Get result
+    try:
+        result = future.result()
+        return result
+    except Exception as e:
+        raise gr.Error(f"Generation failed: {str(e)}")
 def toggle_audio_src(choice):
             """
         )
+        interface.queue(
+            concurrency_count=8,  # Allow up to 8 concurrent requests
+            max_size=50,          # Queue up to 50 requests
+            api_open=True         # Enable API access
+        ).launch(**launch_kwargs)
 def ui_hf(launch_kwargs):
     with gr.Blocks() as interface:
         for more details.
         """)
+        interface.queue(
+            concurrency_count=8,  # Allow up to 8 concurrent requests
+            max_size=50,          # Queue up to 50 requests
+            api_open=True         # Enable API access
+        ).launch(**launch_kwargs)
+def cleanup():
+    """Cleanup function for graceful shutdown"""
+    global BATCH_PROCESSOR
+    if BATCH_PROCESSOR:
+        BATCH_PROCESSOR.stop()
+    print("Cleanup completed")
 if __name__ == "__main__":
     if args.share:
         launch_kwargs['share'] = args.share
+    import atexit
+    atexit.register(cleanup)
     logging.basicConfig(level=logging.INFO, stream=sys.stderr)
     # Show the interface

requirements.txt CHANGED Viewed

@@ -26,3 +26,4 @@ torchvision
 torchtext
 pesq
 pystoi

 torchtext
 pesq
 pystoi
+spaces