Spaces:
Sleeping
Sleeping
Enable GPU runtime: switch to PyTorch CUDA base, prefer CUDA device, warm-up pass, and detailed GPU logs; update README; remove CPU torch wheels from requirements
Browse files- Dockerfile +1 -1
- README.md +23 -0
- app.py +56 -5
- requirements.txt +1 -2
Dockerfile
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
FROM
|
| 2 |
|
| 3 |
# Install system dependencies as root first
|
| 4 |
RUN apt-get update && apt-get install -y \
|
|
|
|
| 1 |
+
FROM pytorch/pytorch:2.7.0-cuda12.6-cudnn9-runtime
|
| 2 |
|
| 3 |
# Install system dependencies as root first
|
| 4 |
RUN apt-get update && apt-get install -y \
|
README.md
CHANGED
|
@@ -216,6 +216,29 @@ The system uses `config/labels.json` for class mapping:
|
|
| 216 |
3. Configure environment variables if needed
|
| 217 |
4. Deploy and test the endpoints
|
| 218 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
### Local Development
|
| 220 |
|
| 221 |
```bash
|
|
|
|
| 216 |
3. Configure environment variables if needed
|
| 217 |
4. Deploy and test the endpoints
|
| 218 |
|
| 219 |
+
### GPU Docker Runtime
|
| 220 |
+
|
| 221 |
+
- Ensure host has recent NVIDIA driver installed
|
| 222 |
+
- Install NVIDIA Container Toolkit on the host
|
| 223 |
+
- Run the container with GPU access enabled:
|
| 224 |
+
|
| 225 |
+
```bash
|
| 226 |
+
# Build image
|
| 227 |
+
docker build -t kybtech-yolo-e-idcard:gpu .
|
| 228 |
+
|
| 229 |
+
# Run with all GPUs and necessary capabilities
|
| 230 |
+
docker run --rm \
|
| 231 |
+
--gpus all \
|
| 232 |
+
--ipc=host \
|
| 233 |
+
-p 7860:7860 \
|
| 234 |
+
kybtech-yolo-e-idcard:gpu
|
| 235 |
+
```
|
| 236 |
+
|
| 237 |
+
Notes:
|
| 238 |
+
- The Dockerfile uses `pytorch/pytorch:2.7.0-cuda12.6-cudnn9-runtime` as base (CUDA included).
|
| 239 |
+
- The app auto-selects GPU if available and performs a warm-up pass.
|
| 240 |
+
- Verify GPU is visible inside the container with `python -c "import torch; print(torch.cuda.is_available())"`.
|
| 241 |
+
|
| 242 |
### Local Development
|
| 243 |
|
| 244 |
```bash
|
app.py
CHANGED
|
@@ -33,6 +33,8 @@ logger = logging.getLogger(__name__)
|
|
| 33 |
yolo_model = None
|
| 34 |
orientation_classifier = None
|
| 35 |
class_mapping = {}
|
|
|
|
|
|
|
| 36 |
|
| 37 |
# Load class mapping from config
|
| 38 |
def load_class_mapping():
|
|
@@ -739,21 +741,70 @@ def get_class_name(class_id: int) -> str:
|
|
| 739 |
@asynccontextmanager
|
| 740 |
async def lifespan(app: FastAPI):
|
| 741 |
"""Application lifespan manager for model loading."""
|
| 742 |
-
global yolo_model, orientation_classifier
|
| 743 |
|
| 744 |
logger.info("Loading YOLO-E model and initializing components...")
|
| 745 |
try:
|
| 746 |
# Load class mapping
|
| 747 |
load_class_mapping()
|
| 748 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 749 |
# Load YOLO-E model (yolo11 variant)
|
| 750 |
yolo_model = YOLOE("yolo11n.pt") # Use nano for faster inference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 751 |
logger.info("YOLO-E model loaded successfully")
|
| 752 |
|
| 753 |
# Initialize orientation classifier with YOLO model
|
| 754 |
orientation_classifier = OrientationClassifier(yolo_model)
|
| 755 |
logger.info("Orientation classifier initialized")
|
| 756 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 757 |
except Exception as e:
|
| 758 |
logger.error(f"Failed to load models: {e}")
|
| 759 |
raise
|
|
@@ -797,8 +848,8 @@ async def detect_documents(
|
|
| 797 |
image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
| 798 |
img_height, img_width = image_cv.shape[:2]
|
| 799 |
|
| 800 |
-
# Run YOLO-E detection
|
| 801 |
-
results = yolo_model(image_cv, conf=min_confidence)
|
| 802 |
|
| 803 |
detections = []
|
| 804 |
for result in results:
|
|
@@ -909,8 +960,8 @@ async def detect_documents_video(
|
|
| 909 |
for frame_idx, (frame, timestamp) in enumerate(frames):
|
| 910 |
frame_detections = []
|
| 911 |
|
| 912 |
-
# Run YOLO-E detection on
|
| 913 |
-
results = yolo_model(frame, conf=min_confidence)
|
| 914 |
|
| 915 |
for result in results:
|
| 916 |
if result.boxes is not None:
|
|
|
|
| 33 |
yolo_model = None
|
| 34 |
orientation_classifier = None
|
| 35 |
class_mapping = {}
|
| 36 |
+
# Selected inference device string (e.g., 'cuda:0', 'mps', or 'cpu')
|
| 37 |
+
yolo_device: str = "cpu"
|
| 38 |
|
| 39 |
# Load class mapping from config
|
| 40 |
def load_class_mapping():
|
|
|
|
| 741 |
@asynccontextmanager
|
| 742 |
async def lifespan(app: FastAPI):
|
| 743 |
"""Application lifespan manager for model loading."""
|
| 744 |
+
global yolo_model, orientation_classifier, yolo_device
|
| 745 |
|
| 746 |
logger.info("Loading YOLO-E model and initializing components...")
|
| 747 |
try:
|
| 748 |
# Load class mapping
|
| 749 |
load_class_mapping()
|
| 750 |
|
| 751 |
+
# Select device (prefer CUDA on HF GPU instances; otherwise CPU)
|
| 752 |
+
# Why: deployment targets Linux GPU; macOS MPS is not relevant here.
|
| 753 |
+
yolo_device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 754 |
+
logger.info(f"Selected device: {yolo_device} (cuda_available={torch.cuda.is_available()})")
|
| 755 |
+
# Log detailed device/runtime information for observability
|
| 756 |
+
try:
|
| 757 |
+
if yolo_device.startswith("cuda"):
|
| 758 |
+
# Query active CUDA device details to confirm GPU runtime
|
| 759 |
+
device_index = torch.cuda.current_device()
|
| 760 |
+
device_name = torch.cuda.get_device_name(device_index)
|
| 761 |
+
cc_major, cc_minor = torch.cuda.get_device_capability(device_index)
|
| 762 |
+
logger.info(
|
| 763 |
+
"CUDA device info: name=%s index=%s capability=%s.%s torch=%s cuda_runtime=%s",
|
| 764 |
+
device_name,
|
| 765 |
+
device_index,
|
| 766 |
+
cc_major,
|
| 767 |
+
cc_minor,
|
| 768 |
+
torch.__version__,
|
| 769 |
+
getattr(torch.version, "cuda", "unknown"),
|
| 770 |
+
)
|
| 771 |
+
else:
|
| 772 |
+
logger.info("CPU runtime active: torch=%s", torch.__version__)
|
| 773 |
+
except Exception as device_log_err:
|
| 774 |
+
# Avoid startup failure if device metadata is unavailable
|
| 775 |
+
logger.warning(f"Device info logging failed: {device_log_err}")
|
| 776 |
+
|
| 777 |
# Load YOLO-E model (yolo11 variant)
|
| 778 |
yolo_model = YOLOE("yolo11n.pt") # Use nano for faster inference
|
| 779 |
+
|
| 780 |
+
# Move model to device when API is available. Fallback to underlying .model.
|
| 781 |
+
try:
|
| 782 |
+
# Preferred: Ultralytics model interface
|
| 783 |
+
_ = yolo_model.to(yolo_device)
|
| 784 |
+
except Exception:
|
| 785 |
+
try:
|
| 786 |
+
# Fallback: underlying PyTorch module
|
| 787 |
+
_ = yolo_model.model.to(yolo_device) # type: ignore[attr-defined]
|
| 788 |
+
except Exception:
|
| 789 |
+
# If neither works, we'll rely on per-call device selection below
|
| 790 |
+
logger.warning("Could not move model to device at load time; will set device per call")
|
| 791 |
logger.info("YOLO-E model loaded successfully")
|
| 792 |
|
| 793 |
# Initialize orientation classifier with YOLO model
|
| 794 |
orientation_classifier = OrientationClassifier(yolo_model)
|
| 795 |
logger.info("Orientation classifier initialized")
|
| 796 |
|
| 797 |
+
# Optional warm-up on GPU to trigger lazy CUDA init and JITs
|
| 798 |
+
try:
|
| 799 |
+
dummy = np.zeros((640, 640, 3), dtype=np.uint8)
|
| 800 |
+
# Use a very low confidence and no verbose to minimize overhead
|
| 801 |
+
_ = yolo_model(dummy, conf=0.01, verbose=False, device=yolo_device)
|
| 802 |
+
if yolo_device.startswith("cuda"):
|
| 803 |
+
torch.cuda.synchronize()
|
| 804 |
+
logger.info("Warm-up inference completed")
|
| 805 |
+
except Exception as warmup_err:
|
| 806 |
+
logger.warning(f"Warm-up skipped due to: {warmup_err}")
|
| 807 |
+
|
| 808 |
except Exception as e:
|
| 809 |
logger.error(f"Failed to load models: {e}")
|
| 810 |
raise
|
|
|
|
| 848 |
image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
| 849 |
img_height, img_width = image_cv.shape[:2]
|
| 850 |
|
| 851 |
+
# Run YOLO-E detection on the selected device
|
| 852 |
+
results = yolo_model(image_cv, conf=min_confidence, device=yolo_device, verbose=False)
|
| 853 |
|
| 854 |
detections = []
|
| 855 |
for result in results:
|
|
|
|
| 960 |
for frame_idx, (frame, timestamp) in enumerate(frames):
|
| 961 |
frame_detections = []
|
| 962 |
|
| 963 |
+
# Run YOLO-E detection on the selected device
|
| 964 |
+
results = yolo_model(frame, conf=min_confidence, device=yolo_device, verbose=False)
|
| 965 |
|
| 966 |
for result in results:
|
| 967 |
if result.boxes is not None:
|
requirements.txt
CHANGED
|
@@ -8,5 +8,4 @@ ultralytics>=8.3.50
|
|
| 8 |
opencv-python-headless>=4.9.0.80
|
| 9 |
numpy>=1.26.0
|
| 10 |
pillow>=10.3.0
|
| 11 |
-
|
| 12 |
-
torchvision>=0.17.0
|
|
|
|
| 8 |
opencv-python-headless>=4.9.0.80
|
| 9 |
numpy>=1.26.0
|
| 10 |
pillow>=10.3.0
|
| 11 |
+
# Torch and TorchVision are installed in Dockerfile with CUDA wheels (cu121)
|
|
|