Spaces:

jay208
/

pf-depth

Sleeping

App Files Files Community

jay208 commited on Sep 3

Commit

b8b7444

1 Parent(s): 4b4a9dc

1.1.0

Browse files

Files changed (3) hide show

app.py +129 -124
depth_pro/utils.py +114 -0
requirements.txt +4 -1

app.py CHANGED Viewed

@@ -2,14 +2,20 @@ import os
 import tempfile
 import numpy as np
 import cv2
 import torch
 from PIL import Image
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException
 from fastapi.responses import JSONResponse, HTMLResponse
-from transformers import pipeline
-from typing import Optional
 import json
 # Initialize FastAPI app
 app = FastAPI(
     title="Depth Pro Distance Estimation",
@@ -26,150 +32,120 @@ def initialize_depth_pipeline():
     """Initialize the Depth Pro pipeline"""
     try:
         print("Initializing Depth Pro pipeline...")
-        pipe = pipeline(
-            "depth-estimation",
-            model="apple/DepthPro",
-            device=0 if torch.cuda.is_available() else -1,  # -1 for CPU
-            torch_dtype=torch.float32  # Use float32 for CPU compatibility
-        )
-        print("Depth Pro pipeline initialized successfully!")
-        return pipe
     except Exception as e:
         print(f"Error initializing pipeline: {e}")
         print("Falling back to dummy pipeline...")
         return None
-class DummyDepthPipeline:
-    """Dummy pipeline for when the real model fails to load"""
-    def __call__(self, image):
-        """Generate dummy depth prediction"""
-        if isinstance(image, str):
-            image = Image.open(image)
-        elif isinstance(image, np.ndarray):
-            image = Image.fromarray(image)
-        width, height = image.size
-        # Generate a realistic-looking depth map
-        depth = self._generate_dummy_depth(height, width)
-        return {"depth": depth}
-    def _generate_dummy_depth(self, height, width):
-        """Generate a dummy depth map that looks realistic"""
-        # Create depth that decreases from bottom to top (simulating perspective)
-        y_coords = np.linspace(10.0, 2.0, height)  # 10m to 2m depth
-        depth = np.tile(y_coords[:, np.newaxis], (1, width))
-        # Add some noise and variation
-        noise = np.random.normal(0, 0.5, (height, width))
-        depth += noise
-        # Ensure positive depths
-        depth = np.maximum(depth, 0.1)
-        return depth
 class DepthEstimator:
-    def __init__(self, pipeline=None):
         self.device = torch.device('cpu')  # Force CPU
         print("Initializing Depth Pro estimator...")
-        self.pipeline = pipeline or DummyDepthPipeline()
         print("Depth Pro estimator initialized successfully!")
     def estimate_depth(self, image_path):
         try:
             # Load image
-            image = Image.open(image_path).convert('RGB')
             # Resize image for processing
             resized_image, new_size = self.resize_image(image)
-            # Perform inference using pipeline
-            result = self.pipeline(resized_image)
-            # Extract depth map
-            if isinstance(result, dict) and 'depth' in result:
-                depth = result['depth']
-            elif hasattr(result, 'depth'):
-                depth = result.depth
-            else:
-                depth = result
             # Convert to numpy if needed
             if isinstance(depth, torch.Tensor):
-                depth = depth.cpu().numpy()
             elif not isinstance(depth, np.ndarray):
                 depth = np.array(depth)
             # Estimate focal length (rough estimation)
-            focal_length_px = 1.2 * max(new_size)
-            return depth, new_size, focal_length_px
         except Exception as e:
             print(f"Error in depth estimation: {e}")
             return None, None, None
-    def resize_image(self, image, max_size=1536):
-        """Resize image to manageable size"""
-        if isinstance(image, str):
-            image = Image.open(image).convert('RGB')
-        ratio = max_size / max(image.size)
-        new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
-        resized_image = image.resize(new_size, Image.Resampling.LANCZOS)
-        return resized_image, new_size
-def find_topmost_pixel(image):
-    """Find the topmost non-zero pixel in the image (simulating footpath detection)"""
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    # Simple edge detection to find potential footpath boundaries
-    edges = cv2.Canny(gray, 50, 150)
-    # Find topmost edge pixel
-    edge_pixels = np.where(edges > 0)
-    if len(edge_pixels[0]) == 0:
         return None
-    min_y = np.min(edge_pixels[0])
-    top_pixels_mask = edge_pixels[0] == min_y
-    top_x_coords = edge_pixels[1][top_pixels_mask]
     center_idx = len(top_x_coords) // 2
     return (min_y, top_x_coords[center_idx])
-def find_bottommost_pixel(image, topmost_pixel):
-    """Find the bottommost pixel in the same column as topmost"""
     if topmost_pixel is None:
         return None
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    edges = cv2.Canny(gray, 50, 150)
     top_y, top_x = topmost_pixel
-    # Find pixels in the same column
-    column_pixels = np.where((edges > 0) & (np.arange(edges.shape[1])[None, :] == top_x))
     if len(column_pixels[0]) == 0:
-        # Fallback to bottommost edge pixel
-        edge_pixels = np.where(edges > 0)
-        if len(edge_pixels[0]) == 0:
             return None
-        max_y = np.max(edge_pixels[0])
-        bottom_pixels_mask = edge_pixels[0] == max_y
-        bottom_x_coords = edge_pixels[1][bottom_pixels_mask]
         center_idx = len(bottom_x_coords) // 2
         return (max_y, bottom_x_coords[center_idx])
     max_y_in_column = np.max(column_pixels[0])
     return (max_y_in_column, top_x)
-def estimate_real_world_distance(depth_map, topmost_pixel, bottommost_pixel):
     """Estimate real-world distance between two pixels using depth information"""
-    if topmost_pixel is None or bottommost_pixel is None or depth_map is None:
         return None
     top_y, top_x = topmost_pixel
@@ -188,7 +164,7 @@ def estimate_real_world_distance(depth_map, topmost_pixel, bottommost_pixel):
         print("Invalid depth values (NaN) found")
         return None
-    distance_meters = float(abs(topmost_depth - bottommost_depth))
     print(f"Distance calculation:")
     print(f"  Topmost pixel: ({top_y}, {top_x}) = {topmost_depth:.3f}m")
@@ -197,10 +173,14 @@ def estimate_real_world_distance(depth_map, topmost_pixel, bottommost_pixel):
     return distance_meters
 # Initialize depth estimator globally
 print("Initializing Depth Pro pipeline...")
-depth_pipeline = initialize_depth_pipeline()
-depth_estimator = DepthEstimator(depth_pipeline)
 @app.get("/health")
 async def health_check():
@@ -218,7 +198,7 @@ async def api_info():
     }
 @app.post("/estimate-depth")
-async def estimate_depth_endpoint(file: UploadFile = File(...)):
     """FastAPI endpoint for depth estimation and distance calculation"""
     try:
         # Save uploaded file temporarily
@@ -226,13 +206,20 @@ async def estimate_depth_endpoint(file: UploadFile = File(...)):
             content = await file.read()
             temp_file.write(content)
             temp_file_path = temp_file.name
         # Load image for pixel detection
         image = cv2.imread(temp_file_path)
-        if image is None:
             return JSONResponse(
                 status_code=400,
-                content={"error": "Could not load image"}
             )
         # Estimate depth
@@ -249,19 +236,18 @@ async def estimate_depth_endpoint(file: UploadFile = File(...)):
         # Find key pixels
         topmost_pixel = find_topmost_pixel(resized_image)
-        bottommost_pixel = find_bottommost_pixel(resized_image, topmost_pixel)
         # Calculate distance
-        distance_meters = estimate_real_world_distance(depth_map, topmost_pixel, bottommost_pixel)
         # Clean up
         os.unlink(temp_file_path)
         result = {
             "depth_map_shape": depth_map.shape,
             "focal_length_px": float(focal_length_px) if focal_length_px is not None else None,
-            "topmost_pixel": [int(topmost_pixel[0]), int(topmost_pixel[1])] if topmost_pixel else None,
-            "bottommost_pixel": [int(bottommost_pixel[0]), int(bottommost_pixel[1])] if bottommost_pixel else None,
             "distance_meters": distance_meters,
             "depth_stats": {
                 "min_depth": float(np.min(depth_map)),
@@ -325,11 +311,20 @@ async def root():
                 background-color: #ecf0f1;
             }
             input[type="file"] {
-                margin: 20px 0;
                 padding: 10px;
                 border: 1px solid #bdc3c7;
                 border-radius: 5px;
             }
             button {
                 background-color: #3498db;
                 color: white;
@@ -373,14 +368,20 @@ async def root():
     <body>
         <div class="container">
             <h1>🔍 Depth Pro Distance Estimation</h1>
-            <p class="subtitle">Upload an image to estimate depth and calculate distances using Apple's Depth Pro model</p>
             <div class="upload-section">
-                <h3>Upload Image</h3>
                 <form id="uploadForm" enctype="multipart/form-data">
-                    <input type="file" id="imageFile" name="file" accept="image/*" required>
-                    <br>
-                    <button type="submit">Analyze Image</button>
                 </form>
                 <div id="results" class="results">
@@ -391,7 +392,7 @@ async def root():
             <div class="endpoint-info">
                 <h3>🔗 API Endpoints</h3>
-                <p><strong>POST /estimate-depth</strong> - Upload image for depth estimation</p>
                 <p><strong>GET /docs</strong> - API documentation</p>
                 <p><strong>GET /health</strong> - Health check</p>
             </div>
@@ -400,7 +401,8 @@ async def root():
                 <h3>✨ Features</h3>
                 <ul>
                     <li>🎯 Monocular depth estimation using Depth Pro</li>
-                    <li>📏 Real-world distance calculation</li>
                     <li>🖥️ CPU-optimized processing</li>
                     <li>🚀 Fast inference suitable for real-time use</li>
                 </ul>
@@ -412,19 +414,26 @@ async def root():
                 e.preventDefault();
                 const fileInput = document.getElementById('imageFile');
                 const resultsDiv = document.getElementById('results');
                 const resultsContent = document.getElementById('resultsContent');
                 if (!fileInput.files[0]) {
-                    alert('Please select an image file');
                     return;
                 }
                 const formData = new FormData();
                 formData.append('file', fileInput.files[0]);
                 try {
-                    resultsContent.innerHTML = '<p>🔄 Processing image...</p>';
                     resultsDiv.style.display = 'block';
                     const response = await fetch('/estimate-depth', {
@@ -439,11 +448,10 @@ async def root():
                         html += `<p><strong>📐 Distance:</strong> ${result.distance_meters ? result.distance_meters.toFixed(3) + ' meters' : 'N/A'}</p>`;
                         html += `<p><strong>🎯 Focal Length:</strong> ${result.focal_length_px ? result.focal_length_px.toFixed(2) + ' pixels' : 'N/A'}</p>`;
                         html += `<p><strong>📊 Depth Map Shape:</strong> ${result.depth_map_shape ? result.depth_map_shape.join(' x ') : 'N/A'}</p>`;
-                        html += `<p><strong>🔝 Top Pixel:</strong> ${result.topmost_pixel ? `(${result.topmost_pixel[0]}, ${result.topmost_pixel[1]})` : 'N/A'}</p>`;
-                        html += `<p><strong>🔽 Bottom Pixel:</strong> ${result.bottommost_pixel ? `(${result.bottommost_pixel[0]}, ${result.bottommost_pixel[1]})` : 'N/A'}</p>`;
                         if (result.depth_stats) {
-                            html += '<h4>� Depth Statistics:</h4>';
                             html += `<p><strong>Min Depth:</strong> ${result.depth_stats.min_depth.toFixed(3)}m</p>`;
                             html += `<p><strong>Max Depth:</strong> ${result.depth_stats.max_depth.toFixed(3)}m</p>`;
                             html += `<p><strong>Mean Depth:</strong> ${result.depth_stats.mean_depth.toFixed(3)}m</p>`;
@@ -464,9 +472,6 @@ async def root():
     """
     return HTMLResponse(content=html_content)
-def gradio_interface(image):
-    """Removed Gradio interface - keeping for backward compatibility"""
-    return "Gradio interface has been removed. Please use the web interface or API.", None
 # FastAPI app is ready to run
 if __name__ == "__main__":

 import tempfile
 import numpy as np
 import cv2
+from pathlib import Path
+import logging
+from transformers import DepthProImageProcessorFast, DepthProForDepthEstimation
 import torch
 from PIL import Image
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException
 from fastapi.responses import JSONResponse, HTMLResponse
+from typing import Any, Dict, List, Tuple, Union
+import pillow_heif
 import json
+from depth_pro.utils import load_rgb, extract_exif
 # Initialize FastAPI app
 app = FastAPI(
     title="Depth Pro Distance Estimation",
     """Initialize the Depth Pro pipeline"""
     try:
         print("Initializing Depth Pro pipeline...")
+        image_processor = DepthProImageProcessorFast.from_pretrained("apple/DepthPro-hf")
+        model = DepthProForDepthEstimation.from_pretrained("apple/DepthPro-hf").to(device)
+        return model, image_processor
     except Exception as e:
         print(f"Error initializing pipeline: {e}")
         print("Falling back to dummy pipeline...")
         return None
 class DepthEstimator:
+    def __init__(self, model=None, image_processor=None):
         self.device = torch.device('cpu')  # Force CPU
         print("Initializing Depth Pro estimator...")
+        self.model = model
+        self.image_processor = image_processor
         print("Depth Pro estimator initialized successfully!")
     def estimate_depth(self, image_path):
         try:
             # Load image
+            image = Image.open(image_path)
             # Resize image for processing
             resized_image, new_size = self.resize_image(image)
+            rgb_image = load_rgb(resized_image.name)
+            f_px = rgb_image[-1]
+            eval_image = rgb_image[0]
+            # Perform inference using model
+            inputs = self.image_processor(eval_image, return_tensors="pt").to(self.device)
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+            post_processed_output = self.image_processor.post_process_depth_estimation(
+                outputs, target_sizes=[(new_size[1], new_size[0])],
+            )
+            result = post_processed_output[0]
+            field_of_view = result["field_of_view"]
+            focal_length = result["focal_length"]
+            depth = result["predicted_depth"]
             # Convert to numpy if needed
             if isinstance(depth, torch.Tensor):
+                depth = depth.detach().cpu().numpy()
             elif not isinstance(depth, np.ndarray):
                 depth = np.array(depth)
             # Estimate focal length (rough estimation)
+            print(f_px,focal_length)
+            return depth, new_size, focal_length
         except Exception as e:
             print(f"Error in depth estimation: {e}")
             return None, None, None
+    def resize_image(self, image_path, max_size=1536):
+        with Image.open(image_path) as img:
+            ratio = max_size / max(img.size)
+            new_size = (int(img.size[0] * ratio), int(img.size[1] * ratio))
+            img = img.resize(new_size, Image.Resampling.LANCZOS)
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
+                img.save(temp_file, format="PNG")
+                return temp_file, new_size
+def find_topmost_pixel(mask):
+    '''Top Pixel from footpath mask'''
+    footpath_pixels = np.where(mask > 0)
+    if len(footpath_pixels[0]) == 0:
         return None
+    min_y = np.min(footpath_pixels[0])
+    top_pixels_mask = footpath_pixels[0] == min_y
+    top_x_coords = footpath_pixels[1][top_pixels_mask]
     center_idx = len(top_x_coords) // 2
     return (min_y, top_x_coords[center_idx])
+def find_bottommost_footpath_pixel(mask, topmost_pixel):
+    """Find the bottommost pixel perpendicular to the topmost pixel within the mask"""
     if topmost_pixel is None:
         return None
     top_y, top_x = topmost_pixel
+    # Find all mask pixels in the same x-column as the topmost pixel
+    column_pixels = np.where((mask > 0) & (np.arange(mask.shape[1])[None, :] == top_x))
     if len(column_pixels[0]) == 0:
+        # If no pixels in the same column, find the bottommost pixel in the entire mask
+        footpath_pixels = np.where(mask > 0)
+        if len(footpath_pixels[0]) == 0:
             return None
+        max_y = np.max(footpath_pixels[0])
+        bottom_pixels_mask = footpath_pixels[0] == max_y
+        bottom_x_coords = footpath_pixels[1][bottom_pixels_mask]
         center_idx = len(bottom_x_coords) // 2
         return (max_y, bottom_x_coords[center_idx])
+    # Find the bottommost pixel in the same x-column
     max_y_in_column = np.max(column_pixels[0])
     return (max_y_in_column, top_x)
+def estimate_real_world_distance(depth_map, topmost_pixel, mask):
     """Estimate real-world distance between two pixels using depth information"""
+     if topmost_pixel is None or depth_map is None:
+        return None
+    # Find the bottommost pixel perpendicular to the topmost pixel
+    bottommost_pixel = find_bottommost_footpath_pixel(mask, topmost_pixel)
+    if bottommost_pixel is None:
         return None
     top_y, top_x = topmost_pixel
         print("Invalid depth values (NaN) found")
         return None
+    distance_meters = float(topmost_depth - bottommost_depth)
     print(f"Distance calculation:")
     print(f"  Topmost pixel: ({top_y}, {top_x}) = {topmost_depth:.3f}m")
     return distance_meters
 # Initialize depth estimator globally
 print("Initializing Depth Pro pipeline...")
+depth_model, image_processor = initialize_depth_pipeline()
+depth_estimator = DepthEstimator(depth_model, image_processor)
 @app.get("/health")
 async def health_check():
     }
 @app.post("/estimate-depth")
+async def estimate_depth_endpoint(file: UploadFile = File(...), mask: UploadFile = File(...)):
     """FastAPI endpoint for depth estimation and distance calculation"""
     try:
         # Save uploaded file temporarily
             content = await file.read()
             temp_file.write(content)
             temp_file_path = temp_file.name
+        # Save uploaded mask temporarily
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as mtemp_file:
+            content = await mask.read()
+            mtemp_file.write(content)
+            temp_file_path_mask = mtemp_file.name
         # Load image for pixel detection
         image = cv2.imread(temp_file_path)
+        mask = cv2.imread(temp_file_path_mask)
+        if image is None or mask is None:
             return JSONResponse(
                 status_code=400,
+                content={"error": "Could not load image or mask"}
             )
         # Estimate depth
         # Find key pixels
         topmost_pixel = find_topmost_pixel(resized_image)
         # Calculate distance
+        distance_meters = estimate_real_world_distance(depth_map, topmost_pixel, mask)
         # Clean up
         os.unlink(temp_file_path)
+        os.unlink(temp_file_path_mask)
         result = {
             "depth_map_shape": depth_map.shape,
             "focal_length_px": float(focal_length_px) if focal_length_px is not None else None,
+            "topmost_pixel": [ int(topmost_pixel[0]), int(topmost_pixel[1])] if topmost_pixel else None,
             "distance_meters": distance_meters,
             "depth_stats": {
                 "min_depth": float(np.min(depth_map)),
                 background-color: #ecf0f1;
             }
             input[type="file"] {
+                margin: 10px 0;
                 padding: 10px;
                 border: 1px solid #bdc3c7;
                 border-radius: 5px;
             }
+            .file-group {
+                margin: 20px 0;
+            }
+            .file-label {
+                display: block;
+                margin-bottom: 8px;
+                font-weight: bold;
+                color: #2c3e50;
+            }
             button {
                 background-color: #3498db;
                 color: white;
     <body>
         <div class="container">
             <h1>🔍 Depth Pro Distance Estimation</h1>
+            <p class="subtitle">Upload an image and a footpath mask to estimate depth and calculate distances using Apple's Depth Pro model</p>
             <div class="upload-section">
+                <h3>Upload Image and Mask</h3>
                 <form id="uploadForm" enctype="multipart/form-data">
+                    <div style="margin: 20px 0;">
+                        <label for="imageFile" style="display: block; margin-bottom: 5px; font-weight: bold;">📸 Main Image:</label>
+                        <input type="file" id="imageFile" name="file" accept="image/*" required style="width: 100%;">
+                    </div>
+                    <div style="margin: 20px 0;">
+                        <label for="maskFile" style="display: block; margin-bottom: 5px; font-weight: bold;">🎭 Footpath Mask:</label>
+                        <input type="file" id="maskFile" name="mask" accept="image/*" required style="width: 100%;">
+                    </div>
+                    <button type="submit">Analyze Image with Mask</button>
                 </form>
                 <div id="results" class="results">
             <div class="endpoint-info">
                 <h3>🔗 API Endpoints</h3>
+                <p><strong>POST /estimate-depth</strong> - Upload image and footpath mask for depth estimation</p>
                 <p><strong>GET /docs</strong> - API documentation</p>
                 <p><strong>GET /health</strong> - Health check</p>
             </div>
                 <h3>✨ Features</h3>
                 <ul>
                     <li>🎯 Monocular depth estimation using Depth Pro</li>
+                    <li>🎭 Footpath mask-based analysis</li>
+                    <li>📏 Real-world distance calculation between mask boundaries</li>
                     <li>🖥️ CPU-optimized processing</li>
                     <li>🚀 Fast inference suitable for real-time use</li>
                 </ul>
                 e.preventDefault();
                 const fileInput = document.getElementById('imageFile');
+                const maskInput = document.getElementById('maskFile');
                 const resultsDiv = document.getElementById('results');
                 const resultsContent = document.getElementById('resultsContent');
                 if (!fileInput.files[0]) {
+                    alert('Please select a main image file');
+                    return;
+                }
+                if (!maskInput.files[0]) {
+                    alert('Please select a footpath mask file');
                     return;
                 }
                 const formData = new FormData();
                 formData.append('file', fileInput.files[0]);
+                formData.append('mask', maskInput.files[0]);
                 try {
+                    resultsContent.innerHTML = '<p>🔄 Processing image and mask...</p>';
                     resultsDiv.style.display = 'block';
                     const response = await fetch('/estimate-depth', {
                         html += `<p><strong>📐 Distance:</strong> ${result.distance_meters ? result.distance_meters.toFixed(3) + ' meters' : 'N/A'}</p>`;
                         html += `<p><strong>🎯 Focal Length:</strong> ${result.focal_length_px ? result.focal_length_px.toFixed(2) + ' pixels' : 'N/A'}</p>`;
                         html += `<p><strong>📊 Depth Map Shape:</strong> ${result.depth_map_shape ? result.depth_map_shape.join(' x ') : 'N/A'}</p>`;
+                        html += `<p><strong>🔝 Top Mask Pixel:</strong> ${result.topmost_pixel ? `(${result.topmost_pixel[0]}, ${result.topmost_pixel[1]})` : 'N/A'}</p>`;
                         if (result.depth_stats) {
+                            html += '<h4>📈 Depth Statistics:</h4>';
                             html += `<p><strong>Min Depth:</strong> ${result.depth_stats.min_depth.toFixed(3)}m</p>`;
                             html += `<p><strong>Max Depth:</strong> ${result.depth_stats.max_depth.toFixed(3)}m</p>`;
                             html += `<p><strong>Mean Depth:</strong> ${result.depth_stats.mean_depth.toFixed(3)}m</p>`;
     """
     return HTMLResponse(content=html_content)
 # FastAPI app is ready to run
 if __name__ == "__main__":

depth_pro/utils.py ADDED Viewed

	@@ -0,0 +1,114 @@

+# ALL UTIL CREDITS TO DEPTH PRO TEAM
+# Copyright (C) 2024 Apple Inc. All Rights Reserved.
+import logging
+from pathlib import Path
+from typing import Any, Dict, List, Tuple, Union
+import numpy as np
+import pillow_heif
+from PIL import ExifTags, Image, TiffTags
+from pillow_heif import register_heif_opener
+register_heif_opener()
+LOGGER = logging.getLogger(__name__)
+def extract_exif(img_pil: Image) -> Dict[str, Any]:
+    """Return exif information as a dictionary.
+    Args:
+    ----
+        img_pil: A Pillow image.
+    Returns:
+    -------
+        A dictionary with extracted EXIF information.
+    """
+    # Get full exif description from get_ifd(0x8769):
+    # cf https://pillow.readthedocs.io/en/stable/releasenotes/8.2.0.html#image-getexif-exif-and-gps-ifd
+    img_exif = img_pil.getexif().get_ifd(0x8769)
+    exif_dict = {ExifTags.TAGS[k]: v for k, v in img_exif.items() if k in ExifTags.TAGS}
+    tiff_tags = img_pil.getexif()
+    tiff_dict = {
+        TiffTags.TAGS_V2[k].name: v
+        for k, v in tiff_tags.items()
+        if k in TiffTags.TAGS_V2
+    }
+    return {**exif_dict, **tiff_dict}
+def fpx_from_f35(width: float, height: float, f_mm: float = 50) -> float:
+    """Convert a focal length given in mm (35mm film equivalent) to pixels."""
+    return f_mm * np.sqrt(width**2.0 + height**2.0) / np.sqrt(36**2 + 24**2)
+def load_rgb(
+    path: Union[Path, str], auto_rotate: bool = True, remove_alpha: bool = True
+) -> Tuple[np.ndarray, List[bytes], float]:
+    """Load an RGB image.
+    Args:
+    ----
+        path: The url to the image to load.
+        auto_rotate: Rotate the image based on the EXIF data, default is True.
+        remove_alpha: Remove the alpha channel, default is True.
+    Returns:
+    -------
+        img: The image loaded as a numpy array.
+        icc_profile: The color profile of the image.
+        f_px: The optional focal length in pixels, extracting from the exif data.
+    """
+    LOGGER.debug(f"Loading image {path} ...")
+    path = Path(path)
+    if path.suffix.lower() in [".heic"]:
+        heif_file = pillow_heif.open_heif(path, convert_hdr_to_8bit=True)
+        img_pil = heif_file.to_pillow()
+    else:
+        img_pil = Image.open(path)
+    img_exif = extract_exif(img_pil)
+    icc_profile = img_pil.info.get("icc_profile", None)
+    # Rotate the image.
+    if auto_rotate:
+        exif_orientation = img_exif.get("Orientation", 1)
+        if exif_orientation == 3:
+            img_pil = img_pil.transpose(Image.ROTATE_180)
+        elif exif_orientation == 6:
+            img_pil = img_pil.transpose(Image.ROTATE_270)
+        elif exif_orientation == 8:
+            img_pil = img_pil.transpose(Image.ROTATE_90)
+        elif exif_orientation != 1:
+            LOGGER.warning(f"Ignoring image orientation {exif_orientation}.")
+    img = np.array(img_pil)
+    # Convert to RGB if single channel.
+    if img.ndim < 3 or img.shape[2] == 1:
+        img = np.dstack((img, img, img))
+    if remove_alpha:
+        img = img[:, :, :3]
+    LOGGER.debug(f"\tHxW: {img.shape[0]}x{img.shape[1]}")
+    # Extract the focal length from exif data.
+    f_35mm = img_exif.get(
+        "FocalLengthIn35mmFilm",
+        img_exif.get(
+            "FocalLenIn35mmFilm", img_exif.get("FocalLengthIn35mmFormat", None)
+        ),
+    )
+    if f_35mm is not None and f_35mm > 0:
+        LOGGER.debug(f"\tfocal length @ 35mm film: {f_35mm}mm")
+        f_px = fpx_from_f35(img.shape[1], img.shape[0], f_35mm)
+    else:
+        f_px = None
+    return img, icc_profile, f_px

requirements.txt CHANGED Viewed

@@ -7,4 +7,7 @@ numpy
 huggingface-hub
 requests
 python-multipart
-accelerate

 huggingface-hub
 requests
 python-multipart
+accelerate
+torch
+torchvision
+pillow_heif