jay208 commited on
Commit
b8b7444
·
1 Parent(s): 4b4a9dc
Files changed (3) hide show
  1. app.py +129 -124
  2. depth_pro/utils.py +114 -0
  3. requirements.txt +4 -1
app.py CHANGED
@@ -2,14 +2,20 @@ import os
2
  import tempfile
3
  import numpy as np
4
  import cv2
 
 
 
5
  import torch
6
  from PIL import Image
7
  from fastapi import FastAPI, File, UploadFile, Form, HTTPException
8
  from fastapi.responses import JSONResponse, HTMLResponse
9
- from transformers import pipeline
10
- from typing import Optional
11
  import json
12
 
 
 
 
13
  # Initialize FastAPI app
14
  app = FastAPI(
15
  title="Depth Pro Distance Estimation",
@@ -26,150 +32,120 @@ def initialize_depth_pipeline():
26
  """Initialize the Depth Pro pipeline"""
27
  try:
28
  print("Initializing Depth Pro pipeline...")
29
- pipe = pipeline(
30
- "depth-estimation",
31
- model="apple/DepthPro",
32
- device=0 if torch.cuda.is_available() else -1, # -1 for CPU
33
- torch_dtype=torch.float32 # Use float32 for CPU compatibility
34
- )
35
- print("Depth Pro pipeline initialized successfully!")
36
- return pipe
37
  except Exception as e:
38
  print(f"Error initializing pipeline: {e}")
39
  print("Falling back to dummy pipeline...")
40
  return None
41
 
42
- class DummyDepthPipeline:
43
- """Dummy pipeline for when the real model fails to load"""
44
-
45
- def __call__(self, image):
46
- """Generate dummy depth prediction"""
47
- if isinstance(image, str):
48
- image = Image.open(image)
49
- elif isinstance(image, np.ndarray):
50
- image = Image.fromarray(image)
51
-
52
- width, height = image.size
53
-
54
- # Generate a realistic-looking depth map
55
- depth = self._generate_dummy_depth(height, width)
56
-
57
- return {"depth": depth}
58
-
59
- def _generate_dummy_depth(self, height, width):
60
- """Generate a dummy depth map that looks realistic"""
61
- # Create depth that decreases from bottom to top (simulating perspective)
62
- y_coords = np.linspace(10.0, 2.0, height) # 10m to 2m depth
63
- depth = np.tile(y_coords[:, np.newaxis], (1, width))
64
-
65
- # Add some noise and variation
66
- noise = np.random.normal(0, 0.5, (height, width))
67
- depth += noise
68
-
69
- # Ensure positive depths
70
- depth = np.maximum(depth, 0.1)
71
-
72
- return depth
73
 
74
  class DepthEstimator:
75
- def __init__(self, pipeline=None):
76
  self.device = torch.device('cpu') # Force CPU
77
  print("Initializing Depth Pro estimator...")
78
- self.pipeline = pipeline or DummyDepthPipeline()
 
79
  print("Depth Pro estimator initialized successfully!")
80
 
81
  def estimate_depth(self, image_path):
82
  try:
83
  # Load image
84
- image = Image.open(image_path).convert('RGB')
85
 
86
  # Resize image for processing
87
  resized_image, new_size = self.resize_image(image)
88
-
89
- # Perform inference using pipeline
90
- result = self.pipeline(resized_image)
91
-
92
- # Extract depth map
93
- if isinstance(result, dict) and 'depth' in result:
94
- depth = result['depth']
95
- elif hasattr(result, 'depth'):
96
- depth = result.depth
97
- else:
98
- depth = result
99
-
 
 
 
 
100
  # Convert to numpy if needed
101
  if isinstance(depth, torch.Tensor):
102
- depth = depth.cpu().numpy()
103
  elif not isinstance(depth, np.ndarray):
104
  depth = np.array(depth)
105
 
106
  # Estimate focal length (rough estimation)
107
- focal_length_px = 1.2 * max(new_size)
 
108
 
109
- return depth, new_size, focal_length_px
110
 
111
  except Exception as e:
112
  print(f"Error in depth estimation: {e}")
113
  return None, None, None
114
 
115
- def resize_image(self, image, max_size=1536):
116
- """Resize image to manageable size"""
117
- if isinstance(image, str):
118
- image = Image.open(image).convert('RGB')
119
-
120
- ratio = max_size / max(image.size)
121
- new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
122
- resized_image = image.resize(new_size, Image.Resampling.LANCZOS)
123
-
124
- return resized_image, new_size
125
-
126
- def find_topmost_pixel(image):
127
- """Find the topmost non-zero pixel in the image (simulating footpath detection)"""
128
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
129
- # Simple edge detection to find potential footpath boundaries
130
- edges = cv2.Canny(gray, 50, 150)
131
 
132
- # Find topmost edge pixel
133
- edge_pixels = np.where(edges > 0)
134
- if len(edge_pixels[0]) == 0:
 
 
135
  return None
136
-
137
- min_y = np.min(edge_pixels[0])
138
- top_pixels_mask = edge_pixels[0] == min_y
139
- top_x_coords = edge_pixels[1][top_pixels_mask]
140
  center_idx = len(top_x_coords) // 2
141
  return (min_y, top_x_coords[center_idx])
142
 
143
- def find_bottommost_pixel(image, topmost_pixel):
144
- """Find the bottommost pixel in the same column as topmost"""
145
  if topmost_pixel is None:
146
  return None
147
 
148
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
149
- edges = cv2.Canny(gray, 50, 150)
150
-
151
  top_y, top_x = topmost_pixel
152
 
153
- # Find pixels in the same column
154
- column_pixels = np.where((edges > 0) & (np.arange(edges.shape[1])[None, :] == top_x))
155
 
156
  if len(column_pixels[0]) == 0:
157
- # Fallback to bottommost edge pixel
158
- edge_pixels = np.where(edges > 0)
159
- if len(edge_pixels[0]) == 0:
160
  return None
161
- max_y = np.max(edge_pixels[0])
162
- bottom_pixels_mask = edge_pixels[0] == max_y
163
- bottom_x_coords = edge_pixels[1][bottom_pixels_mask]
164
  center_idx = len(bottom_x_coords) // 2
165
  return (max_y, bottom_x_coords[center_idx])
166
 
 
167
  max_y_in_column = np.max(column_pixels[0])
168
  return (max_y_in_column, top_x)
169
 
170
- def estimate_real_world_distance(depth_map, topmost_pixel, bottommost_pixel):
 
171
  """Estimate real-world distance between two pixels using depth information"""
172
- if topmost_pixel is None or bottommost_pixel is None or depth_map is None:
 
 
 
 
 
 
 
173
  return None
174
 
175
  top_y, top_x = topmost_pixel
@@ -188,7 +164,7 @@ def estimate_real_world_distance(depth_map, topmost_pixel, bottommost_pixel):
188
  print("Invalid depth values (NaN) found")
189
  return None
190
 
191
- distance_meters = float(abs(topmost_depth - bottommost_depth))
192
 
193
  print(f"Distance calculation:")
194
  print(f" Topmost pixel: ({top_y}, {top_x}) = {topmost_depth:.3f}m")
@@ -197,10 +173,14 @@ def estimate_real_world_distance(depth_map, topmost_pixel, bottommost_pixel):
197
 
198
  return distance_meters
199
 
 
 
 
 
200
  # Initialize depth estimator globally
201
  print("Initializing Depth Pro pipeline...")
202
- depth_pipeline = initialize_depth_pipeline()
203
- depth_estimator = DepthEstimator(depth_pipeline)
204
 
205
  @app.get("/health")
206
  async def health_check():
@@ -218,7 +198,7 @@ async def api_info():
218
  }
219
 
220
  @app.post("/estimate-depth")
221
- async def estimate_depth_endpoint(file: UploadFile = File(...)):
222
  """FastAPI endpoint for depth estimation and distance calculation"""
223
  try:
224
  # Save uploaded file temporarily
@@ -226,13 +206,20 @@ async def estimate_depth_endpoint(file: UploadFile = File(...)):
226
  content = await file.read()
227
  temp_file.write(content)
228
  temp_file_path = temp_file.name
229
-
 
 
 
 
 
 
230
  # Load image for pixel detection
231
  image = cv2.imread(temp_file_path)
232
- if image is None:
 
233
  return JSONResponse(
234
  status_code=400,
235
- content={"error": "Could not load image"}
236
  )
237
 
238
  # Estimate depth
@@ -249,19 +236,18 @@ async def estimate_depth_endpoint(file: UploadFile = File(...)):
249
 
250
  # Find key pixels
251
  topmost_pixel = find_topmost_pixel(resized_image)
252
- bottommost_pixel = find_bottommost_pixel(resized_image, topmost_pixel)
253
 
254
  # Calculate distance
255
- distance_meters = estimate_real_world_distance(depth_map, topmost_pixel, bottommost_pixel)
256
 
257
  # Clean up
258
  os.unlink(temp_file_path)
 
259
 
260
  result = {
261
  "depth_map_shape": depth_map.shape,
262
  "focal_length_px": float(focal_length_px) if focal_length_px is not None else None,
263
- "topmost_pixel": [int(topmost_pixel[0]), int(topmost_pixel[1])] if topmost_pixel else None,
264
- "bottommost_pixel": [int(bottommost_pixel[0]), int(bottommost_pixel[1])] if bottommost_pixel else None,
265
  "distance_meters": distance_meters,
266
  "depth_stats": {
267
  "min_depth": float(np.min(depth_map)),
@@ -325,11 +311,20 @@ async def root():
325
  background-color: #ecf0f1;
326
  }
327
  input[type="file"] {
328
- margin: 20px 0;
329
  padding: 10px;
330
  border: 1px solid #bdc3c7;
331
  border-radius: 5px;
332
  }
 
 
 
 
 
 
 
 
 
333
  button {
334
  background-color: #3498db;
335
  color: white;
@@ -373,14 +368,20 @@ async def root():
373
  <body>
374
  <div class="container">
375
  <h1>🔍 Depth Pro Distance Estimation</h1>
376
- <p class="subtitle">Upload an image to estimate depth and calculate distances using Apple's Depth Pro model</p>
377
 
378
  <div class="upload-section">
379
- <h3>Upload Image</h3>
380
  <form id="uploadForm" enctype="multipart/form-data">
381
- <input type="file" id="imageFile" name="file" accept="image/*" required>
382
- <br>
383
- <button type="submit">Analyze Image</button>
 
 
 
 
 
 
384
  </form>
385
 
386
  <div id="results" class="results">
@@ -391,7 +392,7 @@ async def root():
391
 
392
  <div class="endpoint-info">
393
  <h3>🔗 API Endpoints</h3>
394
- <p><strong>POST /estimate-depth</strong> - Upload image for depth estimation</p>
395
  <p><strong>GET /docs</strong> - API documentation</p>
396
  <p><strong>GET /health</strong> - Health check</p>
397
  </div>
@@ -400,7 +401,8 @@ async def root():
400
  <h3>✨ Features</h3>
401
  <ul>
402
  <li>🎯 Monocular depth estimation using Depth Pro</li>
403
- <li>📏 Real-world distance calculation</li>
 
404
  <li>🖥️ CPU-optimized processing</li>
405
  <li>🚀 Fast inference suitable for real-time use</li>
406
  </ul>
@@ -412,19 +414,26 @@ async def root():
412
  e.preventDefault();
413
 
414
  const fileInput = document.getElementById('imageFile');
 
415
  const resultsDiv = document.getElementById('results');
416
  const resultsContent = document.getElementById('resultsContent');
417
 
418
  if (!fileInput.files[0]) {
419
- alert('Please select an image file');
 
 
 
 
 
420
  return;
421
  }
422
 
423
  const formData = new FormData();
424
  formData.append('file', fileInput.files[0]);
 
425
 
426
  try {
427
- resultsContent.innerHTML = '<p>🔄 Processing image...</p>';
428
  resultsDiv.style.display = 'block';
429
 
430
  const response = await fetch('/estimate-depth', {
@@ -439,11 +448,10 @@ async def root():
439
  html += `<p><strong>📐 Distance:</strong> ${result.distance_meters ? result.distance_meters.toFixed(3) + ' meters' : 'N/A'}</p>`;
440
  html += `<p><strong>🎯 Focal Length:</strong> ${result.focal_length_px ? result.focal_length_px.toFixed(2) + ' pixels' : 'N/A'}</p>`;
441
  html += `<p><strong>📊 Depth Map Shape:</strong> ${result.depth_map_shape ? result.depth_map_shape.join(' x ') : 'N/A'}</p>`;
442
- html += `<p><strong>🔝 Top Pixel:</strong> ${result.topmost_pixel ? `(${result.topmost_pixel[0]}, ${result.topmost_pixel[1]})` : 'N/A'}</p>`;
443
- html += `<p><strong>🔽 Bottom Pixel:</strong> ${result.bottommost_pixel ? `(${result.bottommost_pixel[0]}, ${result.bottommost_pixel[1]})` : 'N/A'}</p>`;
444
 
445
  if (result.depth_stats) {
446
- html += '<h4>� Depth Statistics:</h4>';
447
  html += `<p><strong>Min Depth:</strong> ${result.depth_stats.min_depth.toFixed(3)}m</p>`;
448
  html += `<p><strong>Max Depth:</strong> ${result.depth_stats.max_depth.toFixed(3)}m</p>`;
449
  html += `<p><strong>Mean Depth:</strong> ${result.depth_stats.mean_depth.toFixed(3)}m</p>`;
@@ -464,9 +472,6 @@ async def root():
464
  """
465
  return HTMLResponse(content=html_content)
466
 
467
- def gradio_interface(image):
468
- """Removed Gradio interface - keeping for backward compatibility"""
469
- return "Gradio interface has been removed. Please use the web interface or API.", None
470
 
471
  # FastAPI app is ready to run
472
  if __name__ == "__main__":
 
2
  import tempfile
3
  import numpy as np
4
  import cv2
5
+ from pathlib import Path
6
+ import logging
7
+ from transformers import DepthProImageProcessorFast, DepthProForDepthEstimation
8
  import torch
9
  from PIL import Image
10
  from fastapi import FastAPI, File, UploadFile, Form, HTTPException
11
  from fastapi.responses import JSONResponse, HTMLResponse
12
+ from typing import Any, Dict, List, Tuple, Union
13
+ import pillow_heif
14
  import json
15
 
16
+ from depth_pro.utils import load_rgb, extract_exif
17
+
18
+
19
  # Initialize FastAPI app
20
  app = FastAPI(
21
  title="Depth Pro Distance Estimation",
 
32
  """Initialize the Depth Pro pipeline"""
33
  try:
34
  print("Initializing Depth Pro pipeline...")
35
+ image_processor = DepthProImageProcessorFast.from_pretrained("apple/DepthPro-hf")
36
+ model = DepthProForDepthEstimation.from_pretrained("apple/DepthPro-hf").to(device)
37
+
38
+ return model, image_processor
 
 
 
 
39
  except Exception as e:
40
  print(f"Error initializing pipeline: {e}")
41
  print("Falling back to dummy pipeline...")
42
  return None
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  class DepthEstimator:
46
+ def __init__(self, model=None, image_processor=None):
47
  self.device = torch.device('cpu') # Force CPU
48
  print("Initializing Depth Pro estimator...")
49
+ self.model = model
50
+ self.image_processor = image_processor
51
  print("Depth Pro estimator initialized successfully!")
52
 
53
  def estimate_depth(self, image_path):
54
  try:
55
  # Load image
56
+ image = Image.open(image_path)
57
 
58
  # Resize image for processing
59
  resized_image, new_size = self.resize_image(image)
60
+
61
+ rgb_image = load_rgb(resized_image.name)
62
+ f_px = rgb_image[-1]
63
+ eval_image = rgb_image[0]
64
+ # Perform inference using model
65
+ inputs = self.image_processor(eval_image, return_tensors="pt").to(self.device)
66
+ with torch.no_grad():
67
+ outputs = self.model(**inputs)
68
+ post_processed_output = self.image_processor.post_process_depth_estimation(
69
+ outputs, target_sizes=[(new_size[1], new_size[0])],
70
+ )
71
+ result = post_processed_output[0]
72
+ field_of_view = result["field_of_view"]
73
+ focal_length = result["focal_length"]
74
+ depth = result["predicted_depth"]
75
+
76
  # Convert to numpy if needed
77
  if isinstance(depth, torch.Tensor):
78
+ depth = depth.detach().cpu().numpy()
79
  elif not isinstance(depth, np.ndarray):
80
  depth = np.array(depth)
81
 
82
  # Estimate focal length (rough estimation)
83
+ print(f_px,focal_length)
84
+
85
 
86
+ return depth, new_size, focal_length
87
 
88
  except Exception as e:
89
  print(f"Error in depth estimation: {e}")
90
  return None, None, None
91
 
92
+ def resize_image(self, image_path, max_size=1536):
93
+ with Image.open(image_path) as img:
94
+ ratio = max_size / max(img.size)
95
+ new_size = (int(img.size[0] * ratio), int(img.size[1] * ratio))
96
+ img = img.resize(new_size, Image.Resampling.LANCZOS)
97
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
98
+ img.save(temp_file, format="PNG")
99
+ return temp_file, new_size
 
 
 
 
 
 
 
 
100
 
101
+
102
+ def find_topmost_pixel(mask):
103
+ '''Top Pixel from footpath mask'''
104
+ footpath_pixels = np.where(mask > 0)
105
+ if len(footpath_pixels[0]) == 0:
106
  return None
107
+ min_y = np.min(footpath_pixels[0])
108
+ top_pixels_mask = footpath_pixels[0] == min_y
109
+ top_x_coords = footpath_pixels[1][top_pixels_mask]
 
110
  center_idx = len(top_x_coords) // 2
111
  return (min_y, top_x_coords[center_idx])
112
 
113
+ def find_bottommost_footpath_pixel(mask, topmost_pixel):
114
+ """Find the bottommost pixel perpendicular to the topmost pixel within the mask"""
115
  if topmost_pixel is None:
116
  return None
117
 
 
 
 
118
  top_y, top_x = topmost_pixel
119
 
120
+ # Find all mask pixels in the same x-column as the topmost pixel
121
+ column_pixels = np.where((mask > 0) & (np.arange(mask.shape[1])[None, :] == top_x))
122
 
123
  if len(column_pixels[0]) == 0:
124
+ # If no pixels in the same column, find the bottommost pixel in the entire mask
125
+ footpath_pixels = np.where(mask > 0)
126
+ if len(footpath_pixels[0]) == 0:
127
  return None
128
+ max_y = np.max(footpath_pixels[0])
129
+ bottom_pixels_mask = footpath_pixels[0] == max_y
130
+ bottom_x_coords = footpath_pixels[1][bottom_pixels_mask]
131
  center_idx = len(bottom_x_coords) // 2
132
  return (max_y, bottom_x_coords[center_idx])
133
 
134
+ # Find the bottommost pixel in the same x-column
135
  max_y_in_column = np.max(column_pixels[0])
136
  return (max_y_in_column, top_x)
137
 
138
+
139
+ def estimate_real_world_distance(depth_map, topmost_pixel, mask):
140
  """Estimate real-world distance between two pixels using depth information"""
141
+
142
+ if topmost_pixel is None or depth_map is None:
143
+ return None
144
+
145
+ # Find the bottommost pixel perpendicular to the topmost pixel
146
+ bottommost_pixel = find_bottommost_footpath_pixel(mask, topmost_pixel)
147
+
148
+ if bottommost_pixel is None:
149
  return None
150
 
151
  top_y, top_x = topmost_pixel
 
164
  print("Invalid depth values (NaN) found")
165
  return None
166
 
167
+ distance_meters = float(topmost_depth - bottommost_depth)
168
 
169
  print(f"Distance calculation:")
170
  print(f" Topmost pixel: ({top_y}, {top_x}) = {topmost_depth:.3f}m")
 
173
 
174
  return distance_meters
175
 
176
+
177
+
178
+
179
+
180
  # Initialize depth estimator globally
181
  print("Initializing Depth Pro pipeline...")
182
+ depth_model, image_processor = initialize_depth_pipeline()
183
+ depth_estimator = DepthEstimator(depth_model, image_processor)
184
 
185
  @app.get("/health")
186
  async def health_check():
 
198
  }
199
 
200
  @app.post("/estimate-depth")
201
+ async def estimate_depth_endpoint(file: UploadFile = File(...), mask: UploadFile = File(...)):
202
  """FastAPI endpoint for depth estimation and distance calculation"""
203
  try:
204
  # Save uploaded file temporarily
 
206
  content = await file.read()
207
  temp_file.write(content)
208
  temp_file_path = temp_file.name
209
+
210
+ # Save uploaded mask temporarily
211
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as mtemp_file:
212
+ content = await mask.read()
213
+ mtemp_file.write(content)
214
+ temp_file_path_mask = mtemp_file.name
215
+
216
  # Load image for pixel detection
217
  image = cv2.imread(temp_file_path)
218
+ mask = cv2.imread(temp_file_path_mask)
219
+ if image is None or mask is None:
220
  return JSONResponse(
221
  status_code=400,
222
+ content={"error": "Could not load image or mask"}
223
  )
224
 
225
  # Estimate depth
 
236
 
237
  # Find key pixels
238
  topmost_pixel = find_topmost_pixel(resized_image)
 
239
 
240
  # Calculate distance
241
+ distance_meters = estimate_real_world_distance(depth_map, topmost_pixel, mask)
242
 
243
  # Clean up
244
  os.unlink(temp_file_path)
245
+ os.unlink(temp_file_path_mask)
246
 
247
  result = {
248
  "depth_map_shape": depth_map.shape,
249
  "focal_length_px": float(focal_length_px) if focal_length_px is not None else None,
250
+ "topmost_pixel": [ int(topmost_pixel[0]), int(topmost_pixel[1])] if topmost_pixel else None,
 
251
  "distance_meters": distance_meters,
252
  "depth_stats": {
253
  "min_depth": float(np.min(depth_map)),
 
311
  background-color: #ecf0f1;
312
  }
313
  input[type="file"] {
314
+ margin: 10px 0;
315
  padding: 10px;
316
  border: 1px solid #bdc3c7;
317
  border-radius: 5px;
318
  }
319
+ .file-group {
320
+ margin: 20px 0;
321
+ }
322
+ .file-label {
323
+ display: block;
324
+ margin-bottom: 8px;
325
+ font-weight: bold;
326
+ color: #2c3e50;
327
+ }
328
  button {
329
  background-color: #3498db;
330
  color: white;
 
368
  <body>
369
  <div class="container">
370
  <h1>🔍 Depth Pro Distance Estimation</h1>
371
+ <p class="subtitle">Upload an image and a footpath mask to estimate depth and calculate distances using Apple's Depth Pro model</p>
372
 
373
  <div class="upload-section">
374
+ <h3>Upload Image and Mask</h3>
375
  <form id="uploadForm" enctype="multipart/form-data">
376
+ <div style="margin: 20px 0;">
377
+ <label for="imageFile" style="display: block; margin-bottom: 5px; font-weight: bold;">📸 Main Image:</label>
378
+ <input type="file" id="imageFile" name="file" accept="image/*" required style="width: 100%;">
379
+ </div>
380
+ <div style="margin: 20px 0;">
381
+ <label for="maskFile" style="display: block; margin-bottom: 5px; font-weight: bold;">🎭 Footpath Mask:</label>
382
+ <input type="file" id="maskFile" name="mask" accept="image/*" required style="width: 100%;">
383
+ </div>
384
+ <button type="submit">Analyze Image with Mask</button>
385
  </form>
386
 
387
  <div id="results" class="results">
 
392
 
393
  <div class="endpoint-info">
394
  <h3>🔗 API Endpoints</h3>
395
+ <p><strong>POST /estimate-depth</strong> - Upload image and footpath mask for depth estimation</p>
396
  <p><strong>GET /docs</strong> - API documentation</p>
397
  <p><strong>GET /health</strong> - Health check</p>
398
  </div>
 
401
  <h3>✨ Features</h3>
402
  <ul>
403
  <li>🎯 Monocular depth estimation using Depth Pro</li>
404
+ <li>🎭 Footpath mask-based analysis</li>
405
+ <li>📏 Real-world distance calculation between mask boundaries</li>
406
  <li>🖥️ CPU-optimized processing</li>
407
  <li>🚀 Fast inference suitable for real-time use</li>
408
  </ul>
 
414
  e.preventDefault();
415
 
416
  const fileInput = document.getElementById('imageFile');
417
+ const maskInput = document.getElementById('maskFile');
418
  const resultsDiv = document.getElementById('results');
419
  const resultsContent = document.getElementById('resultsContent');
420
 
421
  if (!fileInput.files[0]) {
422
+ alert('Please select a main image file');
423
+ return;
424
+ }
425
+
426
+ if (!maskInput.files[0]) {
427
+ alert('Please select a footpath mask file');
428
  return;
429
  }
430
 
431
  const formData = new FormData();
432
  formData.append('file', fileInput.files[0]);
433
+ formData.append('mask', maskInput.files[0]);
434
 
435
  try {
436
+ resultsContent.innerHTML = '<p>🔄 Processing image and mask...</p>';
437
  resultsDiv.style.display = 'block';
438
 
439
  const response = await fetch('/estimate-depth', {
 
448
  html += `<p><strong>📐 Distance:</strong> ${result.distance_meters ? result.distance_meters.toFixed(3) + ' meters' : 'N/A'}</p>`;
449
  html += `<p><strong>🎯 Focal Length:</strong> ${result.focal_length_px ? result.focal_length_px.toFixed(2) + ' pixels' : 'N/A'}</p>`;
450
  html += `<p><strong>📊 Depth Map Shape:</strong> ${result.depth_map_shape ? result.depth_map_shape.join(' x ') : 'N/A'}</p>`;
451
+ html += `<p><strong>🔝 Top Mask Pixel:</strong> ${result.topmost_pixel ? `(${result.topmost_pixel[0]}, ${result.topmost_pixel[1]})` : 'N/A'}</p>`;
 
452
 
453
  if (result.depth_stats) {
454
+ html += '<h4>📈 Depth Statistics:</h4>';
455
  html += `<p><strong>Min Depth:</strong> ${result.depth_stats.min_depth.toFixed(3)}m</p>`;
456
  html += `<p><strong>Max Depth:</strong> ${result.depth_stats.max_depth.toFixed(3)}m</p>`;
457
  html += `<p><strong>Mean Depth:</strong> ${result.depth_stats.mean_depth.toFixed(3)}m</p>`;
 
472
  """
473
  return HTMLResponse(content=html_content)
474
 
 
 
 
475
 
476
  # FastAPI app is ready to run
477
  if __name__ == "__main__":
depth_pro/utils.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ALL UTIL CREDITS TO DEPTH PRO TEAM
2
+
3
+ # Copyright (C) 2024 Apple Inc. All Rights Reserved.
4
+
5
+ import logging
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List, Tuple, Union
8
+
9
+ import numpy as np
10
+ import pillow_heif
11
+ from PIL import ExifTags, Image, TiffTags
12
+ from pillow_heif import register_heif_opener
13
+
14
+ register_heif_opener()
15
+ LOGGER = logging.getLogger(__name__)
16
+
17
+
18
+ def extract_exif(img_pil: Image) -> Dict[str, Any]:
19
+ """Return exif information as a dictionary.
20
+
21
+ Args:
22
+ ----
23
+ img_pil: A Pillow image.
24
+
25
+ Returns:
26
+ -------
27
+ A dictionary with extracted EXIF information.
28
+
29
+ """
30
+ # Get full exif description from get_ifd(0x8769):
31
+ # cf https://pillow.readthedocs.io/en/stable/releasenotes/8.2.0.html#image-getexif-exif-and-gps-ifd
32
+ img_exif = img_pil.getexif().get_ifd(0x8769)
33
+ exif_dict = {ExifTags.TAGS[k]: v for k, v in img_exif.items() if k in ExifTags.TAGS}
34
+
35
+ tiff_tags = img_pil.getexif()
36
+ tiff_dict = {
37
+ TiffTags.TAGS_V2[k].name: v
38
+ for k, v in tiff_tags.items()
39
+ if k in TiffTags.TAGS_V2
40
+ }
41
+ return {**exif_dict, **tiff_dict}
42
+
43
+
44
+ def fpx_from_f35(width: float, height: float, f_mm: float = 50) -> float:
45
+ """Convert a focal length given in mm (35mm film equivalent) to pixels."""
46
+ return f_mm * np.sqrt(width**2.0 + height**2.0) / np.sqrt(36**2 + 24**2)
47
+
48
+
49
+ def load_rgb(
50
+ path: Union[Path, str], auto_rotate: bool = True, remove_alpha: bool = True
51
+ ) -> Tuple[np.ndarray, List[bytes], float]:
52
+ """Load an RGB image.
53
+
54
+ Args:
55
+ ----
56
+ path: The url to the image to load.
57
+ auto_rotate: Rotate the image based on the EXIF data, default is True.
58
+ remove_alpha: Remove the alpha channel, default is True.
59
+
60
+ Returns:
61
+ -------
62
+ img: The image loaded as a numpy array.
63
+ icc_profile: The color profile of the image.
64
+ f_px: The optional focal length in pixels, extracting from the exif data.
65
+
66
+ """
67
+ LOGGER.debug(f"Loading image {path} ...")
68
+
69
+ path = Path(path)
70
+ if path.suffix.lower() in [".heic"]:
71
+ heif_file = pillow_heif.open_heif(path, convert_hdr_to_8bit=True)
72
+ img_pil = heif_file.to_pillow()
73
+ else:
74
+ img_pil = Image.open(path)
75
+
76
+ img_exif = extract_exif(img_pil)
77
+ icc_profile = img_pil.info.get("icc_profile", None)
78
+
79
+ # Rotate the image.
80
+ if auto_rotate:
81
+ exif_orientation = img_exif.get("Orientation", 1)
82
+ if exif_orientation == 3:
83
+ img_pil = img_pil.transpose(Image.ROTATE_180)
84
+ elif exif_orientation == 6:
85
+ img_pil = img_pil.transpose(Image.ROTATE_270)
86
+ elif exif_orientation == 8:
87
+ img_pil = img_pil.transpose(Image.ROTATE_90)
88
+ elif exif_orientation != 1:
89
+ LOGGER.warning(f"Ignoring image orientation {exif_orientation}.")
90
+
91
+ img = np.array(img_pil)
92
+ # Convert to RGB if single channel.
93
+ if img.ndim < 3 or img.shape[2] == 1:
94
+ img = np.dstack((img, img, img))
95
+
96
+ if remove_alpha:
97
+ img = img[:, :, :3]
98
+
99
+ LOGGER.debug(f"\tHxW: {img.shape[0]}x{img.shape[1]}")
100
+
101
+ # Extract the focal length from exif data.
102
+ f_35mm = img_exif.get(
103
+ "FocalLengthIn35mmFilm",
104
+ img_exif.get(
105
+ "FocalLenIn35mmFilm", img_exif.get("FocalLengthIn35mmFormat", None)
106
+ ),
107
+ )
108
+ if f_35mm is not None and f_35mm > 0:
109
+ LOGGER.debug(f"\tfocal length @ 35mm film: {f_35mm}mm")
110
+ f_px = fpx_from_f35(img.shape[1], img.shape[0], f_35mm)
111
+ else:
112
+ f_px = None
113
+
114
+ return img, icc_profile, f_px
requirements.txt CHANGED
@@ -7,4 +7,7 @@ numpy
7
  huggingface-hub
8
  requests
9
  python-multipart
10
- accelerate
 
 
 
 
7
  huggingface-hub
8
  requests
9
  python-multipart
10
+ accelerate
11
+ torch
12
+ torchvision
13
+ pillow_heif