Spaces:

algoryn
/

yolo-e-idcard

Sleeping

App Files Files Community

tommulder commited on Sep 9

Commit

ef82d3b

1 Parent(s): 94a9860

Enable GPU runtime: switch to PyTorch CUDA base, prefer CUDA device, warm-up pass, and detailed GPU logs; update README; remove CPU torch wheels from requirements

Browse files

Files changed (4) hide show

Dockerfile +1 -1
README.md +23 -0
app.py +56 -5
requirements.txt +1 -2

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-FROM python:3.11-slim
 # Install system dependencies as root first
 RUN apt-get update && apt-get install -y \

+FROM pytorch/pytorch:2.7.0-cuda12.6-cudnn9-runtime
 # Install system dependencies as root first
 RUN apt-get update && apt-get install -y \

README.md CHANGED Viewed

@@ -216,6 +216,29 @@ The system uses `config/labels.json` for class mapping:
 3. Configure environment variables if needed
 4. Deploy and test the endpoints
 ### Local Development
 ```bash

 3. Configure environment variables if needed
 4. Deploy and test the endpoints
+### GPU Docker Runtime
+- Ensure host has recent NVIDIA driver installed
+- Install NVIDIA Container Toolkit on the host
+- Run the container with GPU access enabled:
+```bash
+# Build image
+docker build -t kybtech-yolo-e-idcard:gpu .
+# Run with all GPUs and necessary capabilities
+docker run --rm \
+  --gpus all \
+  --ipc=host \
+  -p 7860:7860 \
+  kybtech-yolo-e-idcard:gpu
+```
+Notes:
+- The Dockerfile uses `pytorch/pytorch:2.7.0-cuda12.6-cudnn9-runtime` as base (CUDA included).
+- The app auto-selects GPU if available and performs a warm-up pass.
+- Verify GPU is visible inside the container with `python -c "import torch; print(torch.cuda.is_available())"`.
 ### Local Development
 ```bash

app.py CHANGED Viewed

@@ -33,6 +33,8 @@ logger = logging.getLogger(__name__)
 yolo_model = None
 orientation_classifier = None
 class_mapping = {}
 # Load class mapping from config
 def load_class_mapping():
@@ -739,21 +741,70 @@ def get_class_name(class_id: int) -> str:
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Application lifespan manager for model loading."""
-    global yolo_model, orientation_classifier
     logger.info("Loading YOLO-E model and initializing components...")
     try:
         # Load class mapping
         load_class_mapping()
         # Load YOLO-E model (yolo11 variant)
         yolo_model = YOLOE("yolo11n.pt")  # Use nano for faster inference
         logger.info("YOLO-E model loaded successfully")
         # Initialize orientation classifier with YOLO model
         orientation_classifier = OrientationClassifier(yolo_model)
         logger.info("Orientation classifier initialized")
     except Exception as e:
         logger.error(f"Failed to load models: {e}")
         raise
@@ -797,8 +848,8 @@ async def detect_documents(
         image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
         img_height, img_width = image_cv.shape[:2]
-        # Run YOLO-E detection
-        results = yolo_model(image_cv, conf=min_confidence)
         detections = []
         for result in results:
@@ -909,8 +960,8 @@ async def detect_documents_video(
         for frame_idx, (frame, timestamp) in enumerate(frames):
             frame_detections = []
-            # Run YOLO-E detection on frame
-            results = yolo_model(frame, conf=min_confidence)
             for result in results:
                 if result.boxes is not None:

 yolo_model = None
 orientation_classifier = None
 class_mapping = {}
+# Selected inference device string (e.g., 'cuda:0', 'mps', or 'cpu')
+yolo_device: str = "cpu"
 # Load class mapping from config
 def load_class_mapping():
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Application lifespan manager for model loading."""
+    global yolo_model, orientation_classifier, yolo_device
     logger.info("Loading YOLO-E model and initializing components...")
     try:
         # Load class mapping
         load_class_mapping()
+        # Select device (prefer CUDA on HF GPU instances; otherwise CPU)
+        # Why: deployment targets Linux GPU; macOS MPS is not relevant here.
+        yolo_device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        logger.info(f"Selected device: {yolo_device} (cuda_available={torch.cuda.is_available()})")
+        # Log detailed device/runtime information for observability
+        try:
+            if yolo_device.startswith("cuda"):
+                # Query active CUDA device details to confirm GPU runtime
+                device_index = torch.cuda.current_device()
+                device_name = torch.cuda.get_device_name(device_index)
+                cc_major, cc_minor = torch.cuda.get_device_capability(device_index)
+                logger.info(
+                    "CUDA device info: name=%s index=%s capability=%s.%s torch=%s cuda_runtime=%s",
+                    device_name,
+                    device_index,
+                    cc_major,
+                    cc_minor,
+                    torch.__version__,
+                    getattr(torch.version, "cuda", "unknown"),
+                )
+            else:
+                logger.info("CPU runtime active: torch=%s", torch.__version__)
+        except Exception as device_log_err:
+            # Avoid startup failure if device metadata is unavailable
+            logger.warning(f"Device info logging failed: {device_log_err}")
         # Load YOLO-E model (yolo11 variant)
         yolo_model = YOLOE("yolo11n.pt")  # Use nano for faster inference
+        # Move model to device when API is available. Fallback to underlying .model.
+        try:
+            # Preferred: Ultralytics model interface
+            _ = yolo_model.to(yolo_device)
+        except Exception:
+            try:
+                # Fallback: underlying PyTorch module
+                _ = yolo_model.model.to(yolo_device)  # type: ignore[attr-defined]
+            except Exception:
+                # If neither works, we'll rely on per-call device selection below
+                logger.warning("Could not move model to device at load time; will set device per call")
         logger.info("YOLO-E model loaded successfully")
         # Initialize orientation classifier with YOLO model
         orientation_classifier = OrientationClassifier(yolo_model)
         logger.info("Orientation classifier initialized")
+        # Optional warm-up on GPU to trigger lazy CUDA init and JITs
+        try:
+            dummy = np.zeros((640, 640, 3), dtype=np.uint8)
+            # Use a very low confidence and no verbose to minimize overhead
+            _ = yolo_model(dummy, conf=0.01, verbose=False, device=yolo_device)
+            if yolo_device.startswith("cuda"):
+                torch.cuda.synchronize()
+            logger.info("Warm-up inference completed")
+        except Exception as warmup_err:
+            logger.warning(f"Warm-up skipped due to: {warmup_err}")
     except Exception as e:
         logger.error(f"Failed to load models: {e}")
         raise
         image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
         img_height, img_width = image_cv.shape[:2]
+        # Run YOLO-E detection on the selected device
+        results = yolo_model(image_cv, conf=min_confidence, device=yolo_device, verbose=False)
         detections = []
         for result in results:
         for frame_idx, (frame, timestamp) in enumerate(frames):
             frame_detections = []
+            # Run YOLO-E detection on the selected device
+            results = yolo_model(frame, conf=min_confidence, device=yolo_device, verbose=False)
             for result in results:
                 if result.boxes is not None:

requirements.txt CHANGED Viewed

@@ -8,5 +8,4 @@ ultralytics>=8.3.50
 opencv-python-headless>=4.9.0.80
 numpy>=1.26.0
 pillow>=10.3.0
-torch>=2.2.0
-torchvision>=0.17.0

 opencv-python-headless>=4.9.0.80
 numpy>=1.26.0
 pillow>=10.3.0
+# Torch and TorchVision are installed in Dockerfile with CUDA wheels (cu121)