tommulder commited on
Commit
ef82d3b
·
1 Parent(s): 94a9860

Enable GPU runtime: switch to PyTorch CUDA base, prefer CUDA device, warm-up pass, and detailed GPU logs; update README; remove CPU torch wheels from requirements

Browse files
Files changed (4) hide show
  1. Dockerfile +1 -1
  2. README.md +23 -0
  3. app.py +56 -5
  4. requirements.txt +1 -2
Dockerfile CHANGED
@@ -1,4 +1,4 @@
1
- FROM python:3.11-slim
2
 
3
  # Install system dependencies as root first
4
  RUN apt-get update && apt-get install -y \
 
1
+ FROM pytorch/pytorch:2.7.0-cuda12.6-cudnn9-runtime
2
 
3
  # Install system dependencies as root first
4
  RUN apt-get update && apt-get install -y \
README.md CHANGED
@@ -216,6 +216,29 @@ The system uses `config/labels.json` for class mapping:
216
  3. Configure environment variables if needed
217
  4. Deploy and test the endpoints
218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  ### Local Development
220
 
221
  ```bash
 
216
  3. Configure environment variables if needed
217
  4. Deploy and test the endpoints
218
 
219
+ ### GPU Docker Runtime
220
+
221
+ - Ensure host has recent NVIDIA driver installed
222
+ - Install NVIDIA Container Toolkit on the host
223
+ - Run the container with GPU access enabled:
224
+
225
+ ```bash
226
+ # Build image
227
+ docker build -t kybtech-yolo-e-idcard:gpu .
228
+
229
+ # Run with all GPUs and necessary capabilities
230
+ docker run --rm \
231
+ --gpus all \
232
+ --ipc=host \
233
+ -p 7860:7860 \
234
+ kybtech-yolo-e-idcard:gpu
235
+ ```
236
+
237
+ Notes:
238
+ - The Dockerfile uses `pytorch/pytorch:2.7.0-cuda12.6-cudnn9-runtime` as base (CUDA included).
239
+ - The app auto-selects GPU if available and performs a warm-up pass.
240
+ - Verify GPU is visible inside the container with `python -c "import torch; print(torch.cuda.is_available())"`.
241
+
242
  ### Local Development
243
 
244
  ```bash
app.py CHANGED
@@ -33,6 +33,8 @@ logger = logging.getLogger(__name__)
33
  yolo_model = None
34
  orientation_classifier = None
35
  class_mapping = {}
 
 
36
 
37
  # Load class mapping from config
38
  def load_class_mapping():
@@ -739,21 +741,70 @@ def get_class_name(class_id: int) -> str:
739
  @asynccontextmanager
740
  async def lifespan(app: FastAPI):
741
  """Application lifespan manager for model loading."""
742
- global yolo_model, orientation_classifier
743
 
744
  logger.info("Loading YOLO-E model and initializing components...")
745
  try:
746
  # Load class mapping
747
  load_class_mapping()
748
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
749
  # Load YOLO-E model (yolo11 variant)
750
  yolo_model = YOLOE("yolo11n.pt") # Use nano for faster inference
 
 
 
 
 
 
 
 
 
 
 
 
751
  logger.info("YOLO-E model loaded successfully")
752
 
753
  # Initialize orientation classifier with YOLO model
754
  orientation_classifier = OrientationClassifier(yolo_model)
755
  logger.info("Orientation classifier initialized")
756
 
 
 
 
 
 
 
 
 
 
 
 
757
  except Exception as e:
758
  logger.error(f"Failed to load models: {e}")
759
  raise
@@ -797,8 +848,8 @@ async def detect_documents(
797
  image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
798
  img_height, img_width = image_cv.shape[:2]
799
 
800
- # Run YOLO-E detection
801
- results = yolo_model(image_cv, conf=min_confidence)
802
 
803
  detections = []
804
  for result in results:
@@ -909,8 +960,8 @@ async def detect_documents_video(
909
  for frame_idx, (frame, timestamp) in enumerate(frames):
910
  frame_detections = []
911
 
912
- # Run YOLO-E detection on frame
913
- results = yolo_model(frame, conf=min_confidence)
914
 
915
  for result in results:
916
  if result.boxes is not None:
 
33
  yolo_model = None
34
  orientation_classifier = None
35
  class_mapping = {}
36
+ # Selected inference device string (e.g., 'cuda:0', 'mps', or 'cpu')
37
+ yolo_device: str = "cpu"
38
 
39
  # Load class mapping from config
40
  def load_class_mapping():
 
741
  @asynccontextmanager
742
  async def lifespan(app: FastAPI):
743
  """Application lifespan manager for model loading."""
744
+ global yolo_model, orientation_classifier, yolo_device
745
 
746
  logger.info("Loading YOLO-E model and initializing components...")
747
  try:
748
  # Load class mapping
749
  load_class_mapping()
750
 
751
+ # Select device (prefer CUDA on HF GPU instances; otherwise CPU)
752
+ # Why: deployment targets Linux GPU; macOS MPS is not relevant here.
753
+ yolo_device = "cuda:0" if torch.cuda.is_available() else "cpu"
754
+ logger.info(f"Selected device: {yolo_device} (cuda_available={torch.cuda.is_available()})")
755
+ # Log detailed device/runtime information for observability
756
+ try:
757
+ if yolo_device.startswith("cuda"):
758
+ # Query active CUDA device details to confirm GPU runtime
759
+ device_index = torch.cuda.current_device()
760
+ device_name = torch.cuda.get_device_name(device_index)
761
+ cc_major, cc_minor = torch.cuda.get_device_capability(device_index)
762
+ logger.info(
763
+ "CUDA device info: name=%s index=%s capability=%s.%s torch=%s cuda_runtime=%s",
764
+ device_name,
765
+ device_index,
766
+ cc_major,
767
+ cc_minor,
768
+ torch.__version__,
769
+ getattr(torch.version, "cuda", "unknown"),
770
+ )
771
+ else:
772
+ logger.info("CPU runtime active: torch=%s", torch.__version__)
773
+ except Exception as device_log_err:
774
+ # Avoid startup failure if device metadata is unavailable
775
+ logger.warning(f"Device info logging failed: {device_log_err}")
776
+
777
  # Load YOLO-E model (yolo11 variant)
778
  yolo_model = YOLOE("yolo11n.pt") # Use nano for faster inference
779
+
780
+ # Move model to device when API is available. Fallback to underlying .model.
781
+ try:
782
+ # Preferred: Ultralytics model interface
783
+ _ = yolo_model.to(yolo_device)
784
+ except Exception:
785
+ try:
786
+ # Fallback: underlying PyTorch module
787
+ _ = yolo_model.model.to(yolo_device) # type: ignore[attr-defined]
788
+ except Exception:
789
+ # If neither works, we'll rely on per-call device selection below
790
+ logger.warning("Could not move model to device at load time; will set device per call")
791
  logger.info("YOLO-E model loaded successfully")
792
 
793
  # Initialize orientation classifier with YOLO model
794
  orientation_classifier = OrientationClassifier(yolo_model)
795
  logger.info("Orientation classifier initialized")
796
 
797
+ # Optional warm-up on GPU to trigger lazy CUDA init and JITs
798
+ try:
799
+ dummy = np.zeros((640, 640, 3), dtype=np.uint8)
800
+ # Use a very low confidence and no verbose to minimize overhead
801
+ _ = yolo_model(dummy, conf=0.01, verbose=False, device=yolo_device)
802
+ if yolo_device.startswith("cuda"):
803
+ torch.cuda.synchronize()
804
+ logger.info("Warm-up inference completed")
805
+ except Exception as warmup_err:
806
+ logger.warning(f"Warm-up skipped due to: {warmup_err}")
807
+
808
  except Exception as e:
809
  logger.error(f"Failed to load models: {e}")
810
  raise
 
848
  image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
849
  img_height, img_width = image_cv.shape[:2]
850
 
851
+ # Run YOLO-E detection on the selected device
852
+ results = yolo_model(image_cv, conf=min_confidence, device=yolo_device, verbose=False)
853
 
854
  detections = []
855
  for result in results:
 
960
  for frame_idx, (frame, timestamp) in enumerate(frames):
961
  frame_detections = []
962
 
963
+ # Run YOLO-E detection on the selected device
964
+ results = yolo_model(frame, conf=min_confidence, device=yolo_device, verbose=False)
965
 
966
  for result in results:
967
  if result.boxes is not None:
requirements.txt CHANGED
@@ -8,5 +8,4 @@ ultralytics>=8.3.50
8
  opencv-python-headless>=4.9.0.80
9
  numpy>=1.26.0
10
  pillow>=10.3.0
11
- torch>=2.2.0
12
- torchvision>=0.17.0
 
8
  opencv-python-headless>=4.9.0.80
9
  numpy>=1.26.0
10
  pillow>=10.3.0
11
+ # Torch and TorchVision are installed in Dockerfile with CUDA wheels (cu121)