Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 28

Commit

a099dfd

1 Parent(s): 3c1a16a

Create loaders/sam2_loader.py

Browse files

Files changed (1) hide show

models/loaders/sam2_loader.py +219 -0

models/loaders/sam2_loader.py ADDED Viewed

	@@ -0,0 +1,219 @@

+#!/usr/bin/env python3
+"""
+SAM2 Model Loader
+Handles all SAM2 loading strategies with proper fallbacks
+"""
+import os
+import time
+import logging
+import traceback
+from pathlib import Path
+from typing import Optional, Dict, Any
+import torch
+import numpy as np
+logger = logging.getLogger(__name__)
+class SAM2Loader:
+    """Dedicated loader for SAM2 models"""
+    def __init__(self, device: str = "cuda", cache_dir: str = "./checkpoints/sam2_cache"):
+        self.device = device
+        self.cache_dir = cache_dir
+        os.makedirs(self.cache_dir, exist_ok=True)
+        # Configure HF hub for spaces
+        os.environ["HF_HUB_DISABLE_SYMLINKS"] = "1"
+        os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
+        self.model = None
+        self.model_id = None
+        self.load_time = 0.0
+    def load(self, model_size: str = "auto") -> Optional[Any]:
+        """
+        Load SAM2 model with specified size
+        Args:
+            model_size: "tiny", "small", "base", "large", or "auto"
+        Returns:
+            Loaded model or None
+        """
+        if model_size == "auto":
+            model_size = self._determine_optimal_size()
+        model_map = {
+            "tiny": "facebook/sam2.1-hiera-tiny",
+            "small": "facebook/sam2.1-hiera-small",
+            "base": "facebook/sam2.1-hiera-base-plus",
+            "large": "facebook/sam2.1-hiera-large",
+        }
+        self.model_id = model_map.get(model_size, model_map["tiny"])
+        logger.info(f"Loading SAM2 model: {self.model_id}")
+        # Try loading strategies in order
+        strategies = [
+            ("official", self._load_official),
+            ("transformers", self._load_transformers),
+            ("fallback", self._load_fallback)
+        ]
+        for strategy_name, strategy_func in strategies:
+            try:
+                logger.info(f"Trying SAM2 loading strategy: {strategy_name}")
+                start_time = time.time()
+                model = strategy_func()
+                if model:
+                    self.load_time = time.time() - start_time
+                    self.model = model
+                    logger.info(f"SAM2 loaded successfully via {strategy_name} in {self.load_time:.2f}s")
+                    return model
+            except Exception as e:
+                logger.error(f"SAM2 {strategy_name} strategy failed: {e}")
+                logger.debug(traceback.format_exc())
+                continue
+        logger.error("All SAM2 loading strategies failed")
+        return None
+    def _determine_optimal_size(self) -> str:
+        """Determine optimal model size based on available memory"""
+        try:
+            if torch.cuda.is_available():
+                props = torch.cuda.get_device_properties(0)
+                vram_gb = props.total_memory / (1024**3)
+                if vram_gb < 4:
+                    return "tiny"
+                elif vram_gb < 8:
+                    return "small"
+                elif vram_gb < 12:
+                    return "base"
+                else:
+                    return "large"
+        except:
+            pass
+        return "tiny"  # Conservative default
+    def _load_official(self) -> Optional[Any]:
+        """Load using official SAM2 API"""
+        from sam2.sam2_image_predictor import SAM2ImagePredictor
+        predictor = SAM2ImagePredictor.from_pretrained(
+            self.model_id,
+            cache_dir=self.cache_dir,
+            local_files_only=False,
+            trust_remote_code=True,
+        )
+        # Move to device and set to eval mode
+        if hasattr(predictor, "model"):
+            predictor.model = predictor.model.to(self.device)
+            predictor.model.eval()
+        # Set device attribute for the predictor
+        if hasattr(predictor, "device"):
+            predictor.device = self.device
+        return predictor
+    def _load_transformers(self) -> Optional[Any]:
+        """Load using transformers library"""
+        from transformers import AutoModel, AutoProcessor
+        dtype = torch.float16 if "cuda" in self.device else torch.float32
+        model = AutoModel.from_pretrained(
+            self.model_id,
+            trust_remote_code=True,
+            torch_dtype=dtype,
+            cache_dir=self.cache_dir
+        )
+        model = model.to(self.device)
+        model.eval()
+        try:
+            processor = AutoProcessor.from_pretrained(
+                self.model_id,
+                cache_dir=self.cache_dir
+            )
+        except:
+            processor = None
+        # Wrap to match expected API
+        class SAM2TransformersWrapper:
+            def __init__(self, model, processor, device):
+                self.model = model
+                self.processor = processor
+                self.device = device
+                self.current_image = None
+            def set_image(self, image):
+                """Store image for processing"""
+                self.current_image = image
+                # TODO: Actually encode image with model here
+            def predict(self, point_coords=None, point_labels=None, box=None, **kwargs):
+                """Generate masks from prompts"""
+                # TODO: Implement actual prediction
+                if self.current_image is not None:
+                    h, w = self.current_image.shape[:2]
+                else:
+                    h, w = 512, 512
+                # For now, return dummy mask
+                return {
+                    "masks": np.ones((1, h, w), dtype=np.float32),
+                    "scores": np.array([0.9]),
+                    "logits": np.ones((1, h, w), dtype=np.float32),
+                }
+        return SAM2TransformersWrapper(model, processor, self.device)
+    def _load_fallback(self) -> Optional[Any]:
+        """Create fallback predictor for testing"""
+        class FallbackSAM2:
+            def __init__(self, device):
+                self.device = device
+                self.current_image = None
+            def set_image(self, image):
+                self.current_image = image
+            def predict(self, point_coords=None, point_labels=None, box=None, **kwargs):
+                """Return full mask as fallback"""
+                if self.current_image is not None:
+                    h, w = self.current_image.shape[:2]
+                else:
+                    h, w = 512, 512
+                return {
+                    "masks": np.ones((1, h, w), dtype=np.float32),
+                    "scores": np.array([0.5]),
+                    "logits": np.ones((1, h, w), dtype=np.float32),
+                }
+        logger.warning("Using fallback SAM2 (no real segmentation)")
+        return FallbackSAM2(self.device)
+    def cleanup(self):
+        """Clean up resources"""
+        if self.model:
+            del self.model
+            self.model = None
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    def get_info(self) -> Dict[str, Any]:
+        """Get loader information"""
+        return {
+            "loaded": self.model is not None,
+            "model_id": self.model_id,
+            "device": self.device,
+            "load_time": self.load_time,
+            "model_type": type(self.model).__name__ if self.model else None
+        }