diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -1,7 +1,7 @@ """ -Advanced 3D Reconstruction from Single/Multiple Images -Enhanced with Responsible AI features and multi-image support -Addresses: Privacy, Fairness, Explainability, Multiple Image Processing +Advanced 3D Reconstruction from Single or Multiple Images +Academic-grade pipeline with responsible AI considerations, multi-image support, +quality metrics, multiple export formats, and interactive visualization """ import gradio as gr @@ -18,116 +18,6 @@ import time from pathlib import Path import tempfile import zipfile -from datetime import datetime - -# ============================================================================ -# RESPONSIBLE AI DOCUMENTATION -# ============================================================================ -RESPONSIBLE_AI_TEXT = """ -## Responsible AI & Ethics - -### Model Limitations & Bias - -**Training Data Geographic Bias:** -- **GLPN**: Trained on NYU Depth V2 dataset (primarily New York City indoor scenes) - - **Performance**: Excellent for Western urban interiors, office spaces, apartments - - **Limitations**: May underperform on non-Western architecture, outdoor scenes, rural settings - -- **DPT**: Trained on mixed datasets (MIX 6 - multiple indoor/outdoor sources) - - **Performance**: Better generalization but still biased toward Western built environments - - **Limitations**: Less accurate for cultural artifacts, traditional architecture, natural landscapes - -**Scene Type Performance:** -| Scene Type | GLPN Accuracy | DPT Accuracy | Notes | -|------------|---------------|--------------|-------| -| Modern Indoor (Western) | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐⭐ | Optimal | -| Traditional Architecture | ⭐⭐⭐ | ⭐⭐⭐⭐ | May miss details | -| Outdoor/Natural | ⭐⭐ | ⭐⭐⭐⭐ | GLPN struggles | -| Reflective Surfaces | ⭐ | ⭐⭐ | Known failure case | -| Transparent Objects | ⭐ | ⭐ | Cannot estimate depth | - -### Privacy Considerations - -**Webcam Usage:** -- ⚠️ **Warning**: Webcam captures are processed locally but may inadvertently capture: - - Identifiable people in background - - Sensitive documents or screens - - Private spaces or property - -**Best Practices:** -- Only capture objects/spaces you have permission to document -- Ensure no people are in frame (or obtain consent) -- Avoid capturing sensitive information -- All processing is done locally - no images sent to external servers - -**Data Retention:** -- Images are processed in memory only -- No automatic storage or logging -- Downloaded files are user-controlled -- No telemetry or usage tracking - -### Explainability Features - -This app provides multiple explainability layers: - -1. **Depth Map Visualization**: Color-coded confidence in distance estimation -2. **Uncertainty Maps**: Shows where model is uncertain (darker = less confident) -3. **Quality Metrics**: Statistical measures of reconstruction reliability -4. **Outlier Detection**: Identifies and reports noisy predictions -5. **Model Comparison**: Compare GLPN vs DPT to understand model differences - -### Fairness & Accessibility - -**Accessibility Features:** -- File upload (primary method) - works for all users -- Webcam (optional) - for users with camera access -- Multiple format exports - compatible with free software -- Detailed documentation - no assumed prior knowledge - -**Known Limitations:** -- Requires visual input (not accessible to blind users for capture) -- Processing time varies by hardware (may disadvantage low-resource users) -- Models optimized for Western scenes (geographic bias) - -### Environmental Impact - -**Computational Cost:** -- **GLPN Processing**: ~2GB RAM, 0.3-2.5s CPU time -- **DPT Processing**: ~5GB RAM, 0.8-6.5s CPU time -- **Carbon Estimate**: ~0.001-0.005 kWh per reconstruction - -**Recommendations:** -- Use GLPN for most tasks (4x more efficient) -- Batch process multiple images to reduce overhead -- Consider hardware upgrade carbon cost vs processing efficiency - -### Dual-Use & Misuse Prevention - -**Prohibited Uses:** -- ❌ Unauthorized surveillance or monitoring -- ❌ Scanning people without explicit consent -- ❌ Documenting property without permission -- ❌ Creating deepfakes or deceptive content -- ❌ Any use that violates privacy or dignity - -**Intended Uses:** -- ✅ Educational research and learning -- ✅ Personal photography projects -- ✅ Architectural documentation (with permission) -- ✅ Product design and prototyping -- ✅ Cultural heritage preservation (authorized) - -### Terms of Use - -By using this application, you agree to: -1. Only process images you have rights to use -2. Not capture identifiable people without consent -3. Use outputs ethically and legally -4. Not use for surveillance or deceptive purposes -5. Understand model limitations and biases - -**If you observe misuse or have ethical concerns, please report them.** -""" # ============================================================================ # LITERATURE REVIEW & THEORETICAL BACKGROUND @@ -137,7 +27,7 @@ THEORY_TEXT = """ ## About This Tool -This application demonstrates how artificial intelligence can convert single 2D photographs into interactive 3D models automatically. +This application demonstrates how artificial intelligence can convert 2D photographs into interactive 3D models automatically, with a focus on responsible AI practices. ### What Makes This Special @@ -171,277 +61,187 @@ This tool uses state-of-the-art artificial intelligence models: - Best for: Wide-area urban landscapes, complex built environments - Geographic advantage: Superior accuracy for planning-grade documentation +### Multi-Image Reconstruction + +**Single Image Mode:** +- Fast processing +- Works with limited data +- Best for quick assessments +- Limitations: Single viewpoint, scale ambiguity + +**Multiple Image Mode (NEW):** +- Improved coverage and accuracy +- Combines depth maps from different viewpoints +- Reduces occlusion issues +- Better overall 3D representation +- Note: Images should be of the same object/scene from different angles + ### How It Works (Simple) -1. **AI looks at photo** → Recognizes objects, patterns, perspective +1. **AI looks at photo(s)** → Recognizes objects, patterns, perspective 2. **Estimates distance** → Figures out what's close, what's far 3. **Creates 3D points** → Places colored dots in 3D space 4. **Builds surface** → Connects dots into smooth shape +5. **Multi-view fusion** (if multiple images) → Combines information for better accuracy + +### Responsible AI Considerations + +This tool is designed with responsible AI principles in mind: + +**1. Privacy Protection:** +- All processing happens locally - no data sent to external servers +- No image storage or retention after processing +- No facial recognition or identity tracking +- Users maintain full control over their data +- Recommendation: Avoid uploading images with identifiable individuals + +**2. Explainability & Transparency:** +- Depth map visualization shows how AI "sees" the scene +- Quality metrics provide confidence indicators +- Processing steps are clearly documented +- Model limitations are explicitly stated +- Users can verify reconstruction quality + +**3. Fairness & Bias Awareness:** +- Models trained primarily on indoor/urban scenes +- May perform differently on underrepresented scene types +- Quality metrics help identify potential biases +- Users should validate results for critical applications + +**4. Intended Use & Limitations:** +- Designed for educational and research purposes +- Not suitable for: safety-critical applications, surveillance, or precise measurements +- Best for: visualization, preliminary analysis, teaching +- Scale ambiguity: requires ground control for absolute measurements + +**5. Data Governance:** +- Open-source models with documented training data +- No proprietary algorithms or black boxes +- Full transparency in reconstruction pipeline +- Users can audit and validate the process + +### Spatial Data Pipeline + +Our reconstruction pipeline generates geospatially-relevant data: + +**1. Monocular Depth Estimation** + - Challenge: Extracting 3D spatial information from 2D photographs + - Application: Similar to photogrammetry but from single images + - Output: Relative depth maps for spatial analysis + - Use case: Quick field assessment without specialized equipment + +**2. Point Cloud Generation (Spatial Coordinates)** + - Creates 3D coordinate system (X, Y, Z) from pixels + - Each point: Geographic location + RGB color information + - Compatible with: GIS software, CAD tools, spatial databases + - Use case: Integration with existing urban datasets + +**3. 3D Mesh Generation (Surface Models)** + - Creates continuous surface from discrete points + - Similar to: Digital terrain models (DTMs) for buildings + - Output formats: Compatible with ArcGIS, QGIS, SketchUp + - Use case: 3D city models, urban visualization + +### Spatial Quality Metrics + +**For Urban Planning Applications:** + +- **Point Cloud Density**: 290K+ points = high spatial resolution +- **Geometric Accuracy**: Manifold checks ensure valid topology +- **Surface Continuity**: Watertight meshes = complete volume calculations +- **Data Fidelity**: Triangle count indicates level of detail + +**Limitations for Geographic Applications:** + +1. **Scale Ambiguity**: Requires ground control points for absolute measurements +2. **Single Viewpoint**: Cannot capture occluded facades or hidden spaces (reduced with multi-image mode) +3. **No Georeferencing**: Outputs in local coordinates, not global (lat/lon) +4. **Weather Dependent**: Best results with clear, well-lit conditions -### Multi-Image Processing & Automatic Alignment (NEW!) - -**Single Image Mode:** -- Fast, works from one photo -- Relative depth only (no absolute scale) -- Hidden surfaces cannot be reconstructed - -**Multiple Image Mode:** -- Upload 2-8 images of same object/scene from different angles -- **Automatic Alignment**: Uses ICP (Iterative Closest Point) algorithm to align point clouds -- **Automatic Merging**: Combines aligned point clouds into unified 3D model -- No manual alignment needed - fully automated! - -**Alignment Pipeline:** -1. **Feature Extraction**: Computes FPFH (Fast Point Feature Histograms) for each point cloud -2. **Global Registration**: RANSAC-based matching to find initial alignment -3. **Refinement**: ICP (Iterative Closest Point) for precise alignment -4. **Merging**: Combines aligned clouds, removes duplicates, creates unified mesh - -**Why Multiple Images Help:** -- Complete 360° coverage (all sides visible) -- Better accuracy through redundancy -- More complete models -- Professional-grade results automatically! """ # ============================================================================ -# MODEL LOADING +# RESPONSIBLE AI HELPER FUNCTIONS # ============================================================================ -print("Loading GLPN model...") -glpn_processor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu") -glpn_model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu") -print("GLPN model loaded successfully!") - -# DPT will be loaded on demand -dpt_model = None -dpt_processor = None - -# ============================================================================ -# UNCERTAINTY ESTIMATION -# ============================================================================ - -def estimate_uncertainty(depth_map): +def check_image_privacy(image): """ - Estimate uncertainty in depth predictions - Higher values = less confident predictions + Check if image might contain sensitive information. + Returns warnings if potential privacy concerns detected. """ - # Compute local depth variance as proxy for uncertainty - from scipy.ndimage import generic_filter - - def local_std(values): - return np.std(values) + warnings = [] - # Compute local standard deviation - uncertainty = generic_filter(depth_map, local_std, size=5) + # Check image size - very high resolution might indicate detailed surveillance + width, height = image.size + if width * height > 4000 * 3000: + warnings.append("⚠️ High-resolution image detected. Ensure it doesn't contain identifiable individuals.") - # Normalize to 0-1 range - uncertainty = (uncertainty - uncertainty.min()) / (uncertainty.max() - uncertainty.min() + 1e-8) + # Check aspect ratio - some aspect ratios common in surveillance cameras + aspect_ratio = width / height + if aspect_ratio > 2.5 or aspect_ratio < 0.4: + warnings.append("ℹ️ Unusual aspect ratio detected. Common in security camera footage.") - return uncertainty - -# ============================================================================ -# FAILURE CASE DETECTION -# ============================================================================ + return warnings -def detect_challenging_conditions(image, depth_map): +def generate_explainability_report(metrics, depth_stats): """ - Detect challenging scenarios that may lead to poor reconstruction - Returns: List of warnings + Generate an explainability report for the reconstruction. + Helps users understand how the AI made decisions. """ - warnings = [] - - # Convert to numpy if needed - img_array = np.array(image) - - # 1. Check for very dark images - brightness = np.mean(img_array) - if brightness < 50: - warnings.append("⚠️ Very dark image - may reduce depth accuracy") + report = "### 🔍 AI Decision Explainability\n\n" - # 2. Check for low contrast - std_dev = np.std(img_array) - if std_dev < 30: - warnings.append("⚠️ Low contrast - uniform textures reduce accuracy") + # Depth estimation confidence + depth_range = depth_stats['max'] - depth_stats['min'] + depth_variation = depth_stats['std'] / depth_stats['mean'] - # 3. Check for potential reflective surfaces (high local variance in depth) - depth_variance = np.var(depth_map) - if depth_variance > np.percentile(np.var(depth_map.reshape(-1, 10), axis=1), 95): - warnings.append("⚠️ Possible reflective surfaces detected - depth may be inaccurate") + if depth_variation > 0.5: + report += "- **High depth variation detected**: Scene has significant depth differences (good for reconstruction)\n" + else: + report += "- **Low depth variation**: Scene is relatively flat (may limit 3D detail)\n" + + # Point cloud quality + outlier_ratio = metrics['outliers_removed'] / metrics['initial_points'] + if outlier_ratio < 0.05: + report += "- **Clean depth estimation**: AI is confident about depth predictions (< 5% outliers)\n" + elif outlier_ratio < 0.15: + report += "- **Moderate noise**: Some uncertainty in depth predictions (normal for complex scenes)\n" + else: + report += "- **High uncertainty**: AI struggled with this scene (> 15% outliers removed)\n" - # 4. Check for extreme depth discontinuities (potential transparent objects) - from scipy.ndimage import sobel - depth_edges = np.sqrt(sobel(depth_map, axis=0)**2 + sobel(depth_map, axis=1)**2) - if np.percentile(depth_edges, 99) > 3 * np.percentile(depth_edges, 95): - warnings.append("⚠️ Sharp depth discontinuities - may indicate transparent/reflective objects") + # Mesh quality + if metrics['is_watertight']: + report += "- **Complete surface reconstruction**: AI successfully closed all gaps\n" + else: + report += "- **Incomplete surface**: Some areas couldn't be reconstructed (occluded or ambiguous)\n" - # 5. Check image size - if image.width < 320 or image.height < 240: - warnings.append("⚠️ Low resolution image - use higher resolution for better results") + # Confidence level + if metrics['is_edge_manifold'] and outlier_ratio < 0.1: + report += "\n**Overall Confidence**: ✅ High - Results are reliable\n" + elif metrics['is_vertex_manifold']: + report += "\n**Overall Confidence**: ⚠️ Medium - Results are usable but verify quality\n" + else: + report += "\n**Overall Confidence**: ❌ Low - Results may need manual correction\n" - return warnings + return report # ============================================================================ -# AUTOMATIC ALIGNMENT FUNCTIONS +# MODEL LOADING # ============================================================================ -def align_point_clouds(point_clouds): - """ - Automatically align multiple point clouds using ICP (Iterative Closest Point) - Returns aligned point clouds and transformation matrices - """ - if len(point_clouds) <= 1: - return point_clouds, [] - - print("\n" + "="*60) - print("Starting Automatic Alignment (ICP)") - print("="*60) - - aligned_pcds = [point_clouds[0]] # First cloud is reference - transformations = [] - - for i in range(1, len(point_clouds)): - print(f"\nAligning point cloud {i+1} to reference...") - - source = point_clouds[i] - target = aligned_pcds[0] # Always align to first cloud - - # Initial alignment using global registration (faster, rough alignment) - print(f" Step 1: Computing FPFH features...") - source_down = source.voxel_down_sample(voxel_size=0.05) - target_down = target.voxel_down_sample(voxel_size=0.05) - - source_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30)) - target_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30)) - - source_fpfh = o3d.pipelines.registration.compute_fpfh_feature( - source_down, - o3d.geometry.KDTreeSearchParamHybrid(radius=0.25, max_nn=100) - ) - target_fpfh = o3d.pipelines.registration.compute_fpfh_feature( - target_down, - o3d.geometry.KDTreeSearchParamHybrid(radius=0.25, max_nn=100) - ) - - print(f" Step 2: Global registration (RANSAC)...") - result_ransac = o3d.pipelines.registration.registration_ransac_based_on_feature_matching( - source_down, target_down, source_fpfh, target_fpfh, - mutual_filter=True, - max_correspondence_distance=0.15, - estimation_method=o3d.pipelines.registration.TransformationEstimationPointToPoint(False), - ransac_n=3, - checkers=[ - o3d.pipelines.registration.CorrespondenceCheckerBasedOnEdgeLength(0.9), - o3d.pipelines.registration.CorrespondenceCheckerBasedOnDistance(0.15) - ], - criteria=o3d.pipelines.registration.RANSACConvergenceCriteria(100000, 0.999) - ) - - print(f" Global registration fitness: {result_ransac.fitness:.4f}") - - # Refine with ICP - print(f" Step 3: Refining with ICP...") - threshold = 0.02 - result_icp = o3d.pipelines.registration.registration_icp( - source, target, threshold, result_ransac.transformation, - o3d.pipelines.registration.TransformationEstimationPointToPlane() - ) - - print(f" ICP fitness: {result_icp.fitness:.4f}") - print(f" ICP RMSE: {result_icp.inlier_rmse:.6f}") - - # Apply transformation - source_aligned = source.transform(result_icp.transformation) - aligned_pcds.append(source_aligned) - transformations.append(result_icp.transformation) - - print(f" ✓ Point cloud {i+1} aligned successfully!") - - print("\n" + "="*60) - print(f"Alignment complete! All {len(point_clouds)} point clouds aligned.") - print("="*60 + "\n") - - return aligned_pcds, transformations - -def merge_point_clouds(aligned_pcds): - """ - Merge aligned point clouds into a single unified point cloud - """ - print("Merging aligned point clouds...") - merged = o3d.geometry.PointCloud() - - for pcd in aligned_pcds: - merged += pcd - - # Remove duplicate points and outliers - print("Cleaning merged point cloud...") - merged = merged.voxel_down_sample(voxel_size=0.01) - cl, ind = merged.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0) - merged = merged.select_by_index(ind) - - print(f"Merged point cloud: {len(merged.points)} points") - return merged +print("Loading GLPN model...") +glpn_processor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu") +glpn_model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu") +print("GLPN model loaded successfully!") -def create_mesh_from_merged_pointcloud(pcd): - """ - Create a high-quality mesh from merged point cloud - """ - print("Creating mesh from merged point cloud...") - - # Estimate normals - pcd.estimate_normals() - pcd.orient_normals_consistent_tangent_plane(100) - - # Poisson reconstruction - mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson( - pcd, depth=10, n_threads=-1 - ) - - # Remove low density vertices - vertices_to_remove = densities < np.quantile(densities, 0.01) - mesh.remove_vertices_by_mask(vertices_to_remove) - - # Transfer colors - print("Transferring colors to merged mesh...") - pcd_tree = o3d.geometry.KDTreeFlann(pcd) - mesh_colors = [] - for vertex in mesh.vertices: - [_, idx, _] = pcd_tree.search_knn_vector_3d(vertex, 1) - mesh_colors.append(pcd.colors[idx[0]]) - mesh.vertex_colors = o3d.utility.Vector3dVector(np.array(mesh_colors)) - - # Clean up - mesh.remove_degenerate_triangles() - mesh.remove_duplicated_triangles() - mesh.remove_duplicated_vertices() - mesh.remove_non_manifold_edges() - - print(f"Merged mesh: {len(mesh.vertices)} vertices, {len(mesh.triangles)} triangles") - return mesh +# DPT will be loaded on demand +dpt_model = None +dpt_processor = None # ============================================================================ # CORE 3D RECONSTRUCTION FUNCTIONS # ============================================================================ -def process_single_image(image, model_choice, image_idx=0, total_images=1): - """Process a single image and return depth map, point cloud, mesh, and metrics""" - - print(f"\n{'='*60}") - print(f"Processing image {image_idx+1}/{total_images}") - print(f"{'='*60}") - - # STEP 1: Preprocess image - print("Step 1: Preprocessing image...") - new_height = 480 if image.height > 480 else image.height - new_height -= (new_height % 32) - new_width = int(new_height * image.width / image.height) - diff = new_width % 32 - new_width = new_width - diff if diff < 16 else new_width + (32 - diff) - new_size = (new_width, new_height) - image = image.resize(new_size, Image.LANCZOS) - print(f"Image resized to: {new_size}") - - # STEP 2: Depth estimation - print("Step 2: Estimating depth...") +def estimate_depth_for_image(image, model_choice): + """Estimate depth for a single image""" if model_choice == "GLPN (Recommended)": processor = glpn_processor model = glpn_model @@ -456,658 +256,549 @@ def process_single_image(image, model_choice, image_idx=0, total_images=1): inputs = processor(images=image, return_tensors="pt") - start_time = time.time() with torch.no_grad(): outputs = model(**inputs) predicted_depth = outputs.predicted_depth - depth_time = time.time() - start_time - print(f"Depth estimation completed in {depth_time:.2f}s") - - # Process depth output - pad = 16 - output = predicted_depth.squeeze().cpu().numpy() * 1000.0 - output = output[pad:-pad, pad:-pad] - image_cropped = image.crop((pad, pad, image.width - pad, image.height - pad)) - - # Ensure depth and image have same dimensions - depth_height, depth_width = output.shape - img_width, img_height = image_cropped.size - - print(f"After crop - Depth shape: {output.shape}, Image size: {image_cropped.size}") - - # Resize depth to match image if needed - if depth_height != img_height or depth_width != img_width: - print(f"Resizing depth from ({depth_height}, {depth_width}) to ({img_height}, {img_width})") - from scipy import ndimage - zoom_factors = (img_height / depth_height, img_width / depth_width) - output = ndimage.zoom(output, zoom_factors, order=1) - print(f"Depth resized to: {output.shape}") - - image = image_cropped - - # STEP 3: Estimate uncertainty - print("Step 3: Estimating uncertainty...") - uncertainty_map = estimate_uncertainty(output) - - # STEP 4: Detect challenging conditions - print("Step 4: Detecting challenging conditions...") - warnings = detect_challenging_conditions(image, output) - - # STEP 5: Create point cloud - print("Step 5: Generating point cloud...") - width, height = image.size - - depth_image = (output * 255 / np.max(output)).astype(np.uint8) - image_array = np.array(image) - - print(f"Creating RGBD - Image: {image_array.shape}, Depth: {depth_image.shape}") - - depth_o3d = o3d.geometry.Image(depth_image) - image_o3d = o3d.geometry.Image(image_array) - rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth( - image_o3d, depth_o3d, convert_rgb_to_intensity=False - ) - - camera_intrinsic = o3d.camera.PinholeCameraIntrinsic() - camera_intrinsic.set_intrinsics(width, height, 500, 500, width/2, height/2) - - pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic) - initial_points = len(pcd.points) - print(f"Initial point cloud: {initial_points} points") - - # STEP 6: Clean point cloud - print("Step 6: Cleaning point cloud...") - cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0) - pcd = pcd.select_by_index(ind) - outliers_removed = initial_points - len(pcd.points) - print(f"Removed {outliers_removed} outliers") - - # STEP 7: Estimate normals - print("Step 7: Estimating normals...") - pcd.estimate_normals() - pcd.orient_normals_to_align_with_direction() - - # STEP 8: Create mesh - print("Step 8: Creating mesh...") - mesh_start = time.time() - mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson( - pcd, depth=10, n_threads=1 - )[0] - - # Transfer colors from point cloud to mesh vertices - print("Transferring colors to mesh...") - pcd_tree = o3d.geometry.KDTreeFlann(pcd) - mesh_colors = [] - for vertex in mesh.vertices: - [_, idx, _] = pcd_tree.search_knn_vector_3d(vertex, 1) - mesh_colors.append(pcd.colors[idx[0]]) - mesh.vertex_colors = o3d.utility.Vector3dVector(np.array(mesh_colors)) - - # Rotate mesh - rotation = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0)) - mesh.rotate(rotation, center=(0, 0, 0)) - mesh_time = time.time() - mesh_start - print(f"Mesh created in {mesh_time:.2f}s") + return predicted_depth + +def merge_point_clouds(point_clouds, colors_list): + """ + Merge multiple point clouds with basic alignment. + Note: This is a simple merging strategy. For better results, + consider using registration algorithms (ICP, etc.) + """ + all_points = [] + all_colors = [] - # STEP 9: Compute quality metrics - print("Step 9: Computing metrics...") - mesh.compute_vertex_normals() + for i, (points, colors) in enumerate(zip(point_clouds, colors_list)): + # Simple offset strategy to prevent complete overlap + offset = np.array([i * 0.5, 0, 0]) # Offset along X-axis + all_points.append(points + offset) + all_colors.append(colors) - metrics = { - 'image_index': image_idx + 1, - 'model_used': model_choice, - 'depth_estimation_time': f"{depth_time:.2f}s", - 'mesh_reconstruction_time': f"{mesh_time:.2f}s", - 'total_time': f"{depth_time + mesh_time:.2f}s", - 'initial_points': initial_points, - 'outliers_removed': outliers_removed, - 'final_points': len(pcd.points), - 'vertices': len(mesh.vertices), - 'triangles': len(mesh.triangles), - 'is_edge_manifold': mesh.is_edge_manifold(), - 'is_vertex_manifold': mesh.is_vertex_manifold(), - 'is_watertight': mesh.is_watertight(), - 'warnings': warnings, - 'avg_uncertainty': float(np.mean(uncertainty_map)) - } + merged_points = np.vstack(all_points) + merged_colors = np.vstack(all_colors) - # Compute surface area - try: - vertices = np.asarray(mesh.vertices) - triangles = np.asarray(mesh.triangles) - v0 = vertices[triangles[:, 0]] - v1 = vertices[triangles[:, 1]] - v2 = vertices[triangles[:, 2]] - cross = np.cross(v1 - v0, v2 - v0) - areas = 0.5 * np.linalg.norm(cross, axis=1) - total_area = np.sum(areas) - metrics['surface_area'] = float(total_area) - except: - metrics['surface_area'] = "Unable to compute" + return merged_points, merged_colors + +def process_image(images, model_choice="GLPN (Recommended)", visualization_type="mesh", enable_privacy_check=True): + """Main processing pipeline - supports single or multiple images""" - # Compute volume if watertight - try: - if mesh.is_watertight(): - volume = mesh.get_volume() - metrics['volume'] = float(volume) + def _generate_quality_assessment(metrics): + """Generate quality assessment based on metrics""" + assessment = [] + + # Check outlier removal + outlier_pct = (metrics['outliers_removed'] / metrics['initial_points']) * 100 + if outlier_pct < 5: + assessment.append("Very clean depth estimation (low noise)") + elif outlier_pct < 15: + assessment.append("Good depth quality (normal noise level)") else: - metrics['volume'] = None - except: - metrics['volume'] = None - - return { - 'image': image, - 'depth_map': output, - 'uncertainty_map': uncertainty_map, - 'point_cloud': pcd, - 'mesh': mesh, - 'metrics': metrics, - 'warnings': warnings - } - -def process_image(images, model_choice="GLPN (Recommended)", visualization_type="mesh", enable_alignment=True): - """Main processing pipeline - handles single or multiple images with automatic alignment""" + assessment.append("High noise in depth estimation") + + # Check manifold properties + if metrics['is_edge_manifold'] and metrics['is_vertex_manifold']: + assessment.append("Excellent topology - mesh is well-formed") + elif metrics['is_vertex_manifold']: + assessment.append("Good local topology but has some edge issues") + else: + assessment.append("Topology issues present - may need cleanup") + + # Check watertight + if metrics['is_watertight']: + assessment.append("Watertight mesh - ready for 3D printing!") + else: + assessment.append("Not watertight - use MeshLab's 'Close Holes' for 3D printing") + + # Check complexity + if metrics['triangles'] > 1000000: + assessment.append("Very detailed mesh - may be slow in some software") + elif metrics['triangles'] > 500000: + assessment.append("High detail mesh - good quality") + else: + assessment.append("Moderate detail - good balance of quality and performance") + + return "\n".join(f"- {item}" for item in assessment) if images is None or len(images) == 0: - return None, None, None, "Please upload at least one image.", None + return None, None, None, "Please upload at least one image.", None, None + + # Handle single image case + if not isinstance(images, list): + images = [images] try: - # Handle single image vs multiple images - if not isinstance(images, list): - images = [images] - num_images = len(images) - print(f"\n{'#'*60}") - print(f"Starting reconstruction with {num_images} image(s)") - print(f"Model: {model_choice}") - print(f"Automatic Alignment: {'Enabled' if enable_alignment and num_images > 1 else 'Disabled'}") - print(f"{'#'*60}\n") + print(f"Starting reconstruction with {num_images} image(s) using {model_choice}...") - # Process each image - results = [] - for idx, img in enumerate(images): - result = process_single_image(img, model_choice, idx, num_images) - results.append(result) + # Privacy checks if enabled + privacy_warnings = [] + if enable_privacy_check: + for idx, img in enumerate(images): + warnings = check_image_privacy(img) + if warnings: + privacy_warnings.extend([f"Image {idx+1}: {w}" for w in warnings]) - # AUTOMATIC ALIGNMENT for multiple images - aligned_pcds = None - merged_pcd = None - merged_mesh = None - alignment_info = "" + privacy_report = "" + if privacy_warnings: + privacy_report = "### 🔒 Privacy Considerations\n\n" + "\n".join(privacy_warnings) + "\n\n" - if num_images > 1 and enable_alignment: - try: - # Extract point clouds - point_clouds = [r['point_cloud'] for r in results] - - # Align them - aligned_pcds, transformations = align_point_clouds(point_clouds) - - # Merge into single point cloud - merged_pcd = merge_point_clouds(aligned_pcds) - - # Create unified mesh - merged_mesh = create_mesh_from_merged_pointcloud(merged_pcd) - - alignment_info = f""" -### ✨ Automatic Alignment Results - -Successfully aligned and merged {num_images} point clouds! - -**Alignment Quality:** -""" - for i, trans in enumerate(transformations): - translation = np.linalg.norm(trans[:3, 3]) - alignment_info += f"- Image {i+2} → Image 1: Translation distance = {translation:.3f} units\n" - - alignment_info += f""" -**Merged Model Statistics:** -- Total Points: {len(merged_pcd.points):,} -- Mesh Vertices: {len(merged_mesh.vertices):,} -- Mesh Triangles: {len(merged_mesh.triangles):,} -- Watertight: {'✓ Yes' if merged_mesh.is_watertight() else '✗ No (may need repair)'} - -*The merged model provides a complete 360° reconstruction!* -""" - except Exception as e: - print(f"Alignment failed: {e}") - import traceback - traceback.print_exc() - alignment_info = f""" -### ⚠️ Automatic Alignment Failed - -Error: {str(e)} - -**Fallback:** Individual models exported separately. You can try manual alignment in CloudCompare/MeshLab. - -**Common causes:** -- Insufficient overlap between images -- Very different viewpoints -- Lack of distinctive features -- Reflective/transparent surfaces -""" - - # Create combined visualizations - print("\n" + "="*60) - print("Creating visualizations...") - print("="*60) + # Process each image + all_point_clouds = [] + all_colors = [] + depth_visualizations = [] + depth_stats_list = [] + total_depth_time = 0 - # 1. DEPTH MAP COMPARISON (for first image or grid for multiple) - if num_images == 1: - # Single image visualization - result = results[0] - fig, ax = plt.subplots(1, 3, figsize=(18, 6)) - - ax[0].imshow(result['image']) - ax[0].set_title('Original Image', fontsize=14, fontweight='bold') + for idx, image in enumerate(images): + print(f"\n=== Processing Image {idx+1}/{num_images} ===") + + # STEP 1: Preprocess image + print(f"Image {idx+1}: Preprocessing...") + new_height = 480 if image.height > 480 else image.height + new_height -= (new_height % 32) + new_width = int(new_height * image.width / image.height) + diff = new_width % 32 + new_width = new_width - diff if diff < 16 else new_width + (32 - diff) + new_size = (new_width, new_height) + image = image.resize(new_size, Image.LANCZOS) + print(f"Image {idx+1} resized to: {new_size}") + + # STEP 2: Depth estimation + print(f"Image {idx+1}: Estimating depth...") + start_time = time.time() + predicted_depth = estimate_depth_for_image(image, model_choice) + depth_time = time.time() - start_time + total_depth_time += depth_time + print(f"Image {idx+1}: Depth estimation completed in {depth_time:.2f}s") + + # Process depth output + pad = 16 + output = predicted_depth.squeeze().cpu().numpy() * 1000.0 + output = output[pad:-pad, pad:-pad] + image_cropped = image.crop((pad, pad, image.width - pad, image.height - pad)) + + # Ensure depth and image have same dimensions + depth_height, depth_width = output.shape + img_width, img_height = image_cropped.size + + if depth_height != img_height or depth_width != img_width: + from scipy import ndimage + zoom_factors = (img_height / depth_height, img_width / depth_width) + output = ndimage.zoom(output, zoom_factors, order=1) + + image = image_cropped + + # Store depth statistics for explainability + depth_stats = { + 'min': float(np.min(output)), + 'max': float(np.max(output)), + 'mean': float(np.mean(output)), + 'std': float(np.std(output)) + } + depth_stats_list.append(depth_stats) + + # Create depth visualization + fig, ax = plt.subplots(1, 2, figsize=(14, 7)) + ax[0].imshow(image) + ax[0].set_title(f'Image {idx+1}: Original', fontsize=14, fontweight='bold') ax[0].axis('off') - im1 = ax[1].imshow(result['depth_map'], cmap='plasma') - ax[1].set_title('Depth Map', fontsize=14, fontweight='bold') + im = ax[1].imshow(output, cmap='plasma') + ax[1].set_title(f'Image {idx+1}: Depth Map', fontsize=14, fontweight='bold') ax[1].axis('off') - plt.colorbar(im1, ax=ax[1], fraction=0.046, pad=0.04) + plt.colorbar(im, ax=ax[1], fraction=0.046, pad=0.04) + plt.tight_layout() - im2 = ax[2].imshow(result['uncertainty_map'], cmap='Reds') - ax[2].set_title('Uncertainty Map (Red = Less Confident)', fontsize=14, fontweight='bold') - ax[2].axis('off') - plt.colorbar(im2, ax=ax[2], fraction=0.046, pad=0.04) + buf = io.BytesIO() + plt.savefig(buf, format='png', dpi=150, bbox_inches='tight') + buf.seek(0) + depth_viz = Image.open(buf) + depth_visualizations.append(depth_viz) + plt.close() + + # STEP 4: Create point cloud for this image + print(f"Image {idx+1}: Generating point cloud...") + width, height = image.size + + if output.shape != (height, width): + from scipy import ndimage + zoom_factors = (height / output.shape[0], width / output.shape[1]) + output = ndimage.zoom(output, zoom_factors, order=1) + + depth_image = (output * 255 / np.max(output)).astype(np.uint8) + image_array = np.array(image) + + depth_o3d = o3d.geometry.Image(depth_image) + image_o3d = o3d.geometry.Image(image_array) + rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth( + image_o3d, depth_o3d, convert_rgb_to_intensity=False + ) - plt.tight_layout() + camera_intrinsic = o3d.camera.PinholeCameraIntrinsic() + camera_intrinsic.set_intrinsics(width, height, 500, 500, width/2, height/2) + + pcd_temp = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic) + + # Store points and colors for merging + all_point_clouds.append(np.asarray(pcd_temp.points)) + all_colors.append(np.asarray(pcd_temp.colors)) + + print(f"Image {idx+1}: Generated {len(pcd_temp.points)} points") + + # Combine depth visualizations + if len(depth_visualizations) == 1: + combined_depth_viz = depth_visualizations[0] else: - # Multiple images - create grid - rows = (num_images + 1) // 2 - fig, axes = plt.subplots(rows, 6, figsize=(24, 4*rows)) + # Create a grid of depth visualizations + cols = min(2, len(depth_visualizations)) + rows = (len(depth_visualizations) + cols - 1) // cols + + fig, axes = plt.subplots(rows, cols, figsize=(14 * cols, 7 * rows)) if rows == 1: - axes = axes.reshape(1, -1) + axes = [axes] if cols == 1 else axes + else: + axes = axes.flatten() - for idx, result in enumerate(results): - row = idx // 2 - col = (idx % 2) * 3 - - axes[row, col].imshow(result['image']) - axes[row, col].set_title(f'Image {idx+1}', fontsize=12, fontweight='bold') - axes[row, col].axis('off') - - im1 = axes[row, col+1].imshow(result['depth_map'], cmap='plasma') - axes[row, col+1].set_title(f'Depth {idx+1}', fontsize=12, fontweight='bold') - axes[row, col+1].axis('off') - - im2 = axes[row, col+2].imshow(result['uncertainty_map'], cmap='Reds') - axes[row, col+2].set_title(f'Uncertainty {idx+1}', fontsize=12, fontweight='bold') - axes[row, col+2].axis('off') + for idx, depth_viz in enumerate(depth_visualizations): + axes[idx].imshow(depth_viz) + axes[idx].axis('off') + axes[idx].set_title(f'Image {idx+1}', fontsize=16, fontweight='bold') # Hide unused subplots - for idx in range(num_images, rows * 2): - row = idx // 2 - for col in range(3): - axes[row, (idx % 2) * 3 + col].axis('off') + for idx in range(len(depth_visualizations), len(axes)): + axes[idx].axis('off') plt.tight_layout() + buf = io.BytesIO() + plt.savefig(buf, format='png', dpi=150, bbox_inches='tight') + buf.seek(0) + combined_depth_viz = Image.open(buf) + plt.close() - buf = io.BytesIO() - plt.savefig(buf, format='png', dpi=150, bbox_inches='tight') - buf.seek(0) - depth_viz = Image.open(buf) - plt.close() + # STEP 5: Merge point clouds if multiple images + print(f"\nMerging {num_images} point cloud(s)...") + if num_images > 1: + merged_points, merged_colors = merge_point_clouds(all_point_clouds, all_colors) + else: + merged_points = all_point_clouds[0] + merged_colors = all_colors[0] - # 2. 3D VISUALIZATION - print("Creating 3D visualization...") + # Create combined point cloud + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(merged_points) + pcd.colors = o3d.utility.Vector3dVector(merged_colors) - if num_images == 1: - # Single visualization - result = results[0] - points = np.asarray(result['point_cloud'].points) - colors = np.asarray(result['point_cloud'].colors) - mesh = result['mesh'] - - if visualization_type == "point_cloud": - scatter = go.Scatter3d( - x=points[:, 0], y=points[:, 1], z=points[:, 2], - mode='markers', - marker=dict( - size=2, - color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) - for r, g, b in colors], - ), - name='Point Cloud' - ) - - plotly_fig = go.Figure(data=[scatter]) - plotly_fig.update_layout( - scene=dict( - xaxis=dict(visible=False), - yaxis=dict(visible=False), - zaxis=dict(visible=False), - aspectmode='data', - camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) - ), - margin=dict(l=0, r=0, t=30, b=0), - height=700, - title="Point Cloud" - ) - - elif visualization_type == "mesh": - vertices = np.asarray(mesh.vertices) - triangles = np.asarray(mesh.triangles) - - if mesh.has_vertex_colors(): - vertex_colors = np.asarray(mesh.vertex_colors) - colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) - for r, g, b in vertex_colors] - - mesh_trace = go.Mesh3d( - x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], - i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], - vertexcolor=colors_rgb, - opacity=0.95, - name='Mesh' - ) - else: - mesh_trace = go.Mesh3d( - x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], - i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], - color='lightblue', - opacity=0.9, - name='Mesh' - ) - - plotly_fig = go.Figure(data=[mesh_trace]) - plotly_fig.update_layout( - scene=dict( - xaxis=dict(visible=False), - yaxis=dict(visible=False), - zaxis=dict(visible=False), - aspectmode='data', - camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) - ), - margin=dict(l=0, r=0, t=30, b=0), - height=700, - title="3D Mesh" - ) - - else: # both - from plotly.subplots import make_subplots - + initial_points = len(pcd.points) + print(f"Combined point cloud: {initial_points} points") + + # STEP 6: Clean point cloud + print("Cleaning combined point cloud...") + cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0) + pcd = pcd.select_by_index(ind) + outliers_removed = initial_points - len(pcd.points) + print(f"Removed {outliers_removed} outliers") + + # STEP 7: Estimate normals + print("Estimating normals...") + pcd.estimate_normals() + pcd.orient_normals_to_align_with_direction() + + # STEP 8: Create mesh + print("Creating mesh...") + mesh_start = time.time() + mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson( + pcd, depth=10, n_threads=1 + )[0] + + # Transfer colors from point cloud to mesh vertices + print("Transferring colors to mesh...") + pcd_tree = o3d.geometry.KDTreeFlann(pcd) + mesh_colors = [] + for vertex in mesh.vertices: + [_, idx, _] = pcd_tree.search_knn_vector_3d(vertex, 1) + mesh_colors.append(pcd.colors[idx[0]]) + mesh.vertex_colors = o3d.utility.Vector3dVector(np.array(mesh_colors)) + + # Rotate mesh + rotation = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0)) + mesh.rotate(rotation, center=(0, 0, 0)) + mesh_time = time.time() - mesh_start + print(f"Mesh created in {mesh_time:.2f}s") + + # STEP 9: Compute quality metrics + print("Computing metrics...") + mesh.compute_vertex_normals() + + metrics = { + 'model_used': model_choice, + 'num_images': num_images, + 'depth_estimation_time': f"{total_depth_time:.2f}s", + 'mesh_reconstruction_time': f"{mesh_time:.2f}s", + 'total_time': f"{total_depth_time + mesh_time:.2f}s", + 'initial_points': initial_points, + 'outliers_removed': outliers_removed, + 'final_points': len(pcd.points), + 'vertices': len(mesh.vertices), + 'triangles': len(mesh.triangles), + 'is_edge_manifold': mesh.is_edge_manifold(), + 'is_vertex_manifold': mesh.is_vertex_manifold(), + 'is_watertight': mesh.is_watertight(), + } + + # Compute surface area + surface_area_computed = False + try: + surface_area = mesh.get_surface_area() + if surface_area > 0: + metrics['surface_area'] = float(surface_area) + surface_area_computed = True + except: + pass + + if not surface_area_computed: + try: vertices = np.asarray(mesh.vertices) triangles = np.asarray(mesh.triangles) - - scatter = go.Scatter3d( - x=points[:, 0], y=points[:, 1], z=points[:, 2], - mode='markers', - marker=dict( - size=2, - color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) - for r, g, b in colors], - ), - name='Point Cloud' - ) - - if mesh.has_vertex_colors(): - vertex_colors = np.asarray(mesh.vertex_colors) - colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) - for r, g, b in vertex_colors] - - mesh_trace = go.Mesh3d( - x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], - i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], - vertexcolor=colors_rgb, - opacity=0.95, - name='Mesh' - ) - else: - mesh_trace = go.Mesh3d( - x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], - i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], - color='lightblue', - opacity=0.9, - name='Mesh' - ) - - plotly_fig = make_subplots( - rows=1, cols=2, - specs=[[{'type': 'scatter3d'}, {'type': 'scatter3d'}]], - subplot_titles=('Point Cloud', '3D Mesh') - ) - - plotly_fig.add_trace(scatter, row=1, col=1) - plotly_fig.add_trace(mesh_trace, row=1, col=2) - - plotly_fig.update_layout( - scene=dict( - xaxis=dict(visible=False), - yaxis=dict(visible=False), - zaxis=dict(visible=False), - aspectmode='data', - camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) - ), - scene2=dict( - xaxis=dict(visible=False), - yaxis=dict(visible=False), - zaxis=dict(visible=False), - aspectmode='data', - camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) - ), - height=600, - showlegend=False, - margin=dict(l=0, r=0, t=50, b=0) - ) + v0 = vertices[triangles[:, 0]] + v1 = vertices[triangles[:, 1]] + v2 = vertices[triangles[:, 2]] + cross = np.cross(v1 - v0, v2 - v0) + areas = 0.5 * np.linalg.norm(cross, axis=1) + total_area = np.sum(areas) + metrics['surface_area'] = float(total_area) + surface_area_computed = True + except: + metrics['surface_area'] = "Unable to compute" - else: - # Multiple images - show all reconstructions - traces = [] + # Compute volume + try: + if mesh.is_watertight(): + volume = mesh.get_volume() + metrics['volume'] = float(volume) + else: + metrics['volume'] = None + except: + metrics['volume'] = None + + print("Metrics computed!") + + # STEP 10: Create 3D visualization + print("Creating 3D visualization...") + points = np.asarray(pcd.points) + colors = np.asarray(pcd.colors) + + if visualization_type == "point_cloud": + scatter = go.Scatter3d( + x=points[:, 0], y=points[:, 1], z=points[:, 2], + mode='markers', + marker=dict( + size=2, + color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) + for r, g, b in colors], + ), + name='Point Cloud' + ) - if merged_pcd is not None and merged_mesh is not None: - # Show the merged result - points = np.asarray(merged_pcd.points) - colors = np.asarray(merged_pcd.colors) - - if visualization_type == "point_cloud" or visualization_type == "both": - scatter = go.Scatter3d( - x=points[:, 0], y=points[:, 1], z=points[:, 2], - mode='markers', - marker=dict( - size=1.5, - color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) - for r, g, b in colors], - ), - name='Merged Point Cloud' - ) - traces.append(scatter) - - if visualization_type == "mesh" or visualization_type == "both": - vertices = np.asarray(merged_mesh.vertices) - triangles = np.asarray(merged_mesh.triangles) - - if merged_mesh.has_vertex_colors(): - vertex_colors = np.asarray(merged_mesh.vertex_colors) - colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) - for r, g, b in vertex_colors] - - mesh_trace = go.Mesh3d( - x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], - i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], - vertexcolor=colors_rgb, - opacity=0.95, - name='Merged Mesh', - lighting=dict(ambient=0.5, diffuse=0.8, specular=0.2), - lightposition=dict(x=100, y=100, z=100) - ) - else: - mesh_trace = go.Mesh3d( - x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], - i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], - color='lightblue', - opacity=0.9, - name='Merged Mesh' - ) - traces.append(mesh_trace) + layout = go.Layout( + scene=dict( + xaxis=dict(visible=False), + yaxis=dict(visible=False), + zaxis=dict(visible=False), + aspectmode='data', + camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) + ), + margin=dict(l=0, r=0, t=30, b=0), + height=700, + title="Point Cloud" + ) + + plotly_fig = go.Figure(data=[scatter], layout=layout) + + elif visualization_type == "mesh": + vertices = np.asarray(mesh.vertices) + triangles = np.asarray(mesh.triangles) + + if mesh.has_vertex_colors(): + vertex_colors = np.asarray(mesh.vertex_colors) + colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) + for r, g, b in vertex_colors] - plotly_fig = go.Figure(data=traces) - plotly_fig.update_layout( - scene=dict( - xaxis=dict(visible=False), - yaxis=dict(visible=False), - zaxis=dict(visible=False), - aspectmode='data', - camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) - ), - margin=dict(l=0, r=0, t=30, b=0), - height=700, - title=f"Merged Reconstruction from {num_images} Images" + mesh_trace = go.Mesh3d( + x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], + i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], + vertexcolor=colors_rgb, + opacity=0.95, + name='Mesh', + lighting=dict(ambient=0.5, diffuse=0.8, specular=0.2), + lightposition=dict(x=100, y=100, z=100) ) else: - # Fallback: show individual reconstructions side by side - for idx, result in enumerate(results): - points = np.asarray(result['point_cloud'].points) - colors = np.asarray(result['point_cloud'].colors) - - # Offset each point cloud to separate them - offset = idx * 2 - points[:, 0] += offset - - if visualization_type == "point_cloud" or visualization_type == "both": - scatter = go.Scatter3d( - x=points[:, 0], y=points[:, 1], z=points[:, 2], - mode='markers', - marker=dict( - size=2, - color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) - for r, g, b in colors], - ), - name=f'Point Cloud {idx+1}' - ) - traces.append(scatter) - - if visualization_type == "mesh" or visualization_type == "both": - mesh = result['mesh'] - vertices = np.asarray(mesh.vertices) - vertices[:, 0] += offset # Apply same offset - triangles = np.asarray(mesh.triangles) - - if mesh.has_vertex_colors(): - vertex_colors = np.asarray(mesh.vertex_colors) - colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) - for r, g, b in vertex_colors] - - mesh_trace = go.Mesh3d( - x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], - i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], - vertexcolor=colors_rgb, - opacity=0.95, - name=f'Mesh {idx+1}' - ) - else: - mesh_trace = go.Mesh3d( - x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], - i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], - color='lightblue', - opacity=0.9, - name=f'Mesh {idx+1}' - ) - traces.append(mesh_trace) - - plotly_fig = go.Figure(data=traces) - plotly_fig.update_layout( - scene=dict( - xaxis=dict(visible=False), - yaxis=dict(visible=False), - zaxis=dict(visible=False), - aspectmode='data', - camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) - ), - margin=dict(l=0, r=0, t=30, b=0), - height=700, - title=f"Individual Reconstructions (Side by Side)" + mesh_trace = go.Mesh3d( + x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], + i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], + color='lightblue', + opacity=0.9, + name='Mesh' ) - - # 3. EXPORT FILES - print("Exporting files...") - temp_dir = tempfile.mkdtemp() - - all_metrics = [] - for idx, result in enumerate(results): - prefix = f"image_{idx+1}_" if num_images > 1 else "" - # Save point cloud - pcd_path = Path(temp_dir) / f"{prefix}point_cloud.ply" - o3d.io.write_point_cloud(str(pcd_path), result['point_cloud']) + layout = go.Layout( + scene=dict( + xaxis=dict(visible=False), + yaxis=dict(visible=False), + zaxis=dict(visible=False), + aspectmode='data', + camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) + ), + margin=dict(l=0, r=0, t=30, b=0), + height=700, + title="3D Mesh" + ) - # Save mesh - mesh_path = Path(temp_dir) / f"{prefix}mesh.ply" - o3d.io.write_triangle_mesh(str(mesh_path), result['mesh']) + plotly_fig = go.Figure(data=[mesh_trace], layout=layout) - mesh_obj_path = Path(temp_dir) / f"{prefix}mesh.obj" - o3d.io.write_triangle_mesh(str(mesh_obj_path), result['mesh']) + else: # both + from plotly.subplots import make_subplots - mesh_stl_path = Path(temp_dir) / f"{prefix}mesh.stl" - o3d.io.write_triangle_mesh(str(mesh_stl_path), result['mesh']) + vertices = np.asarray(mesh.vertices) + triangles = np.asarray(mesh.triangles) - all_metrics.append(result['metrics']) - - # Save merged results if available - if merged_pcd is not None and merged_mesh is not None: - merged_pcd_path = Path(temp_dir) / "MERGED_point_cloud.ply" - o3d.io.write_point_cloud(str(merged_pcd_path), merged_pcd) + scatter = go.Scatter3d( + x=points[:, 0], y=points[:, 1], z=points[:, 2], + mode='markers', + marker=dict( + size=2, + color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) + for r, g, b in colors], + ), + name='Point Cloud' + ) - merged_mesh_path = Path(temp_dir) / "MERGED_mesh.ply" - o3d.io.write_triangle_mesh(str(merged_mesh_path), merged_mesh) + if mesh.has_vertex_colors(): + vertex_colors = np.asarray(mesh.vertex_colors) + colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) + for r, g, b in vertex_colors] + + mesh_trace = go.Mesh3d( + x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], + i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], + vertexcolor=colors_rgb, + opacity=0.95, + name='Mesh', + lighting=dict(ambient=0.5, diffuse=0.8, specular=0.2), + lightposition=dict(x=100, y=100, z=100) + ) + else: + mesh_trace = go.Mesh3d( + x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], + i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], + color='lightblue', + opacity=0.9, + name='Mesh' + ) - merged_obj_path = Path(temp_dir) / "MERGED_mesh.obj" - o3d.io.write_triangle_mesh(str(merged_obj_path), merged_mesh) + plotly_fig = make_subplots( + rows=1, cols=2, + specs=[[{'type': 'scatter3d'}, {'type': 'scatter3d'}]], + subplot_titles=('Point Cloud', '3D Mesh'), + horizontal_spacing=0.05 + ) - merged_stl_path = Path(temp_dir) / "MERGED_mesh.stl" - o3d.io.write_triangle_mesh(str(merged_stl_path), merged_mesh) + plotly_fig.add_trace(scatter, row=1, col=1) + plotly_fig.add_trace(mesh_trace, row=1, col=2) + + plotly_fig.update_layout( + scene=dict( + xaxis=dict(visible=False), + yaxis=dict(visible=False), + zaxis=dict(visible=False), + aspectmode='data', + camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) + ), + scene2=dict( + xaxis=dict(visible=False), + yaxis=dict(visible=False), + zaxis=dict(visible=False), + aspectmode='data', + camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) + ), + height=600, + showlegend=False, + margin=dict(l=0, r=0, t=50, b=0) + ) - # Save combined metrics - combined_metrics = { - 'total_images': num_images, - 'processing_date': datetime.now().isoformat(), - 'model_used': model_choice, - 'alignment_enabled': enable_alignment and num_images > 1, - 'alignment_successful': merged_pcd is not None, - 'individual_results': all_metrics - } + print("3D visualization created!") - if merged_mesh is not None: - combined_metrics['merged_stats'] = { - 'points': len(merged_pcd.points), - 'vertices': len(merged_mesh.vertices), - 'triangles': len(merged_mesh.triangles), - 'is_watertight': merged_mesh.is_watertight() - } + # STEP 11: Export files + print("Exporting files...") + temp_dir = tempfile.mkdtemp() + + # Save point cloud + pcd_path = Path(temp_dir) / "point_cloud.ply" + o3d.io.write_point_cloud(str(pcd_path), pcd) + + # Save mesh + mesh_path = Path(temp_dir) / "mesh.ply" + o3d.io.write_triangle_mesh(str(mesh_path), mesh) + + # Save mesh as OBJ + mesh_obj_path = Path(temp_dir) / "mesh.obj" + o3d.io.write_triangle_mesh(str(mesh_obj_path), mesh) + + # Save mesh as STL + mesh_stl_path = Path(temp_dir) / "mesh.stl" + o3d.io.write_triangle_mesh(str(mesh_stl_path), mesh) + # Save metrics metrics_path = Path(temp_dir) / "metrics.json" with open(metrics_path, 'w') as f: - json.dump(combined_metrics, f, indent=2, default=str) + json.dump(metrics, f, indent=2, default=str) # Create zip - zip_filename = f"reconstruction_{num_images}_images.zip" if num_images > 1 else "reconstruction_complete.zip" - zip_path = Path(temp_dir) / zip_filename + zip_path = Path(temp_dir) / "reconstruction_complete.zip" with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: - for file in Path(temp_dir).glob("*"): - if file.suffix != '.zip': - zipf.write(file, file.name) + zipf.write(pcd_path, pcd_path.name) + zipf.write(mesh_path, mesh_path.name) + zipf.write(mesh_obj_path, mesh_obj_path.name) + zipf.write(mesh_stl_path, mesh_stl_path.name) + zipf.write(metrics_path, metrics_path.name) print("Files exported!") - # 4. CREATE REPORT - if num_images == 1: - result = results[0] - metrics = result['metrics'] - warnings = result['warnings'] - - warnings_section = "" - if warnings: - warnings_section = "### ⚠️ Detected Challenging Conditions\n" + "\n".join(warnings) + "\n\n" - - report = f""" + # Create metrics report + assessment = _generate_quality_assessment(metrics) + + # Generate explainability report + avg_depth_stats = { + 'min': np.mean([d['min'] for d in depth_stats_list]), + 'max': np.mean([d['max'] for d in depth_stats_list]), + 'mean': np.mean([d['mean'] for d in depth_stats_list]), + 'std': np.mean([d['std'] for d in depth_stats_list]) + } + explainability = generate_explainability_report(metrics, avg_depth_stats) + + multi_image_note = "" + if num_images > 1: + multi_image_note = f""" +### 📸 Multi-Image Reconstruction +- **Number of Images**: {num_images} +- **Combined Points**: {initial_points:,} (before cleaning) +- **Advantage**: Better coverage and reduced occlusion compared to single image +- **Note**: Images were combined using simple spatial offset. For production use, consider advanced registration algorithms (ICP, feature matching). +""" + + report = f""" ## Reconstruction Complete! -{warnings_section} +{privacy_report} + +{multi_image_note} ### Performance Metrics - **Model Used**: {metrics['model_used']} +- **Number of Images**: {metrics['num_images']} - **Depth Estimation Time**: {metrics['depth_estimation_time']} - **Mesh Reconstruction Time**: {metrics['mesh_reconstruction_time']} - **Total Processing Time**: {metrics['total_time']} @@ -1126,10 +817,10 @@ Error: {str(e)} - **Surface Area**: {metrics['surface_area'] if isinstance(metrics['surface_area'], str) else f"{metrics['surface_area']:.2f}"} - **Volume**: {f"{metrics['volume']:.2f}" if metrics.get('volume') else 'N/A (not watertight)'} -### Explainability Metrics -- **Average Uncertainty**: {metrics['avg_uncertainty']:.3f} (lower is better) - - Uncertainty shows where the model is less confident - - Check the red heatmap for spatial distribution of uncertainty +### Quality Assessment +{assessment} + +{explainability} ### Files Exported - Point Cloud: PLY format @@ -1137,80 +828,16 @@ Error: {str(e)} - Quality Metrics: JSON **Download the complete package below!** - """ - else: - # Multiple images report - total_time = sum(float(r['metrics']['total_time'].replace('s', '')) for r in results) - total_points = sum(r['metrics']['final_points'] for r in results) - total_vertices = sum(r['metrics']['vertices'] for r in results) - - all_warnings = [] - for idx, result in enumerate(results): - if result['warnings']: - all_warnings.append(f"\n**Image {idx+1}:**\n" + "\n".join(result['warnings'])) - - warnings_section = "" - if all_warnings: - warnings_section = "### ⚠️ Detected Challenging Conditions\n" + "\n".join(all_warnings) + "\n\n" - - report = f""" -## Multi-Image Reconstruction Complete! - -Processed {num_images} images successfully. - -{alignment_info} - -{warnings_section} - -### Overall Statistics -- **Total Processing Time**: {total_time:.2f}s -- **Total Final Points** (individual): {total_points:,} -- **Total Vertices** (individual): {total_vertices:,} -- **Model Used**: {model_choice} - -### Individual Image Results - -""" - for idx, result in enumerate(results): - m = result['metrics'] - report += f""" -#### Image {idx+1} -- Points: {m['final_points']:,} -- Vertices: {m['vertices']:,} -- Triangles: {m['triangles']:,} -- Watertight: {'✓' if m['is_watertight'] else '✗'} -- Time: {m['total_time']} -- Avg Uncertainty: {m['avg_uncertainty']:.3f} - -""" - - report += f""" -### Files Exported -- {num_images} Individual Point Clouds (PLY format) -- {num_images} Individual Meshes (PLY, OBJ, STL formats)""" - - if merged_pcd is not None: - report += """ -- **MERGED_point_cloud.ply** - Unified aligned point cloud ⭐ -- **MERGED_mesh.ply/obj/stl** - Unified aligned mesh ⭐""" - - report += """ -- Combined Metrics (JSON) - -**Download the complete package below!** - """ - - # Create JSON output - json_output = json.dumps(combined_metrics, indent=2, default=str) + """ print("SUCCESS! Returning results...") - return depth_viz, plotly_fig, str(zip_path), report, json_output + return combined_depth_viz, plotly_fig, str(zip_path), report, json.dumps(metrics, indent=2, default=str), privacy_report except Exception as e: import traceback error_msg = f"Error during reconstruction:\n{str(e)}\n\nTraceback:\n{traceback.format_exc()}" print(error_msg) - return None, None, None, error_msg, None + return None, None, None, error_msg, None, None # ============================================================================ # GRADIO INTERFACE @@ -1219,22 +846,16 @@ Processed {num_images} images successfully. with gr.Blocks(title="Advanced 3D Reconstruction", theme=gr.themes.Soft()) as demo: gr.Markdown(""" - # 🏗️ 3D Urban Reconstruction from Images + # 🗿️ 3D Urban Reconstruction from Single or Multiple Images - Transform 2D photographs into 3D spatial models with Responsible AI features + Transform 2D photographs into 3D spatial models with **Responsible AI** practices - **NEW:** Multi-image support! Upload 1-8 images for more complete reconstructions. - """) + Upload one or multiple photographs to generate interactive 3D models with exportable spatial data. - # Responsible AI Warning Banner - gr.Markdown(""" -
- ⚠️ Responsible Use Notice
- • Only process images you have rights to use
- • Do not capture identifiable people without consent
- • Be aware of model biases (trained primarily on Western indoor scenes)
- • Check the "Responsible AI" tab for detailed ethical guidelines -
+ **New Features:** + - ✨ **Multi-image support** for better coverage and accuracy + - 🔒 **Privacy protection** with local processing + - 🔍 **AI explainability** to understand reconstruction decisions """) with gr.Tabs(): @@ -1243,47 +864,44 @@ with gr.Blocks(title="Advanced 3D Reconstruction", theme=gr.themes.Soft()) as de with gr.Tab("🔧 Reconstruction"): with gr.Row(): with gr.Column(scale=1): - gr.Markdown("### 📸 Input Images") - input_image = gr.File( + gr.Markdown(""" + ### Upload Images + Upload **1-5 images** of the same object/scene from different angles for best results. + - Single image: Fast processing + - Multiple images: Better coverage, improved quality + """) + + input_images = gr.File( file_count="multiple", file_types=["image"], - label="Upload 1-8 Images (Single image for quick test, multiple for complete coverage)" + label="Upload Image(s) - Supports: JPG, PNG, BMP", + type="filepath" ) - gr.Markdown(""" - **Tips for multiple images:** - - Capture object from different angles (360° coverage) - - Ensure 30-50% overlap between views - - Use consistent lighting across all shots - - Keep camera distance similar - - Automatic alignment will merge them into one model! - """) - - gr.Markdown("### ⚙️ Model Settings") + gr.Markdown("### Model Settings") model_choice = gr.Radio( choices=["GLPN (Recommended)", "DPT (High Quality)"], value="GLPN (Recommended)", - label="Depth Estimation Model", - info="GLPN: Faster, good for indoor. DPT: Slower, better quality" + label="Depth Estimation Model" ) visualization_type = gr.Radio( choices=["mesh", "point_cloud", "both"], value="mesh", - label="3D Visualization Type", - info="Mesh recommended for most users" + label="3D Visualization Type" ) - enable_alignment = gr.Checkbox( + gr.Markdown("### Responsible AI Settings") + privacy_check = gr.Checkbox( value=True, - label="Enable Automatic Alignment (for multiple images)", - info="Uses ICP to automatically align and merge point clouds" + label="Enable privacy checks (recommended)", + info="Warns if images might contain sensitive information" ) reconstruct_btn = gr.Button("🚀 Start Reconstruction", variant="primary", size="lg") with gr.Column(scale=2): - depth_output = gr.Image(label="Depth Maps & Uncertainty Analysis") + depth_output = gr.Image(label="Depth Map Visualization") viewer_3d = gr.Plot(label="Interactive 3D Viewer (Rotate, Zoom, Pan)") with gr.Row(): @@ -1295,47 +913,218 @@ with gr.Blocks(title="Advanced 3D Reconstruction", theme=gr.themes.Soft()) as de with gr.Row(): download_output = gr.File(label="📦 Download Complete Package (ZIP)") - # Process function needs to handle file objects from gr.File - def process_uploaded_files(files, model, viz_type, align): - if files is None: - return None, None, None, "Please upload at least one image.", None + # Process function wrapper to handle file uploads + def process_uploaded_files(files, model, viz_type, privacy): + if files is None or len(files) == 0: + return None, None, None, "Please upload at least one image.", None, None - # Convert file objects to PIL Images + # Load images from file paths images = [] - for file in files: - img = Image.open(file.name) + for file_path in files: + img = Image.open(file_path) images.append(img) - return process_image(images, model, viz_type, align) + return process_image(images, model, viz_type, privacy) reconstruct_btn.click( fn=process_uploaded_files, - inputs=[input_image, model_choice, visualization_type, enable_alignment], - outputs=[depth_output, viewer_3d, download_output, metrics_output, json_output] + inputs=[input_images, model_choice, visualization_type, privacy_check], + outputs=[depth_output, viewer_3d, download_output, metrics_output, json_output, gr.Textbox(visible=False)] ) # ========== RESPONSIBLE AI TAB ========== - with gr.Tab("🛡️ Responsible AI & Ethics"): - gr.Markdown(RESPONSIBLE_AI_TEXT) - + with gr.Tab("🛡️ Responsible AI"): gr.Markdown(""" - ## Report Issues + ## Responsible AI Framework + + This application implements responsible AI principles to ensure ethical and safe use of AI technology. + + ### 1. Privacy Protection 🔒 + + **What we do:** + - **Local Processing Only**: All computation happens in your browser/server - no data sent to external APIs + - **No Data Retention**: Images are processed in memory and deleted immediately after reconstruction + - **No Tracking**: We don't collect, store, or analyze user data + - **Privacy Warnings**: System alerts you if uploaded images might contain sensitive information + + **User Responsibilities:** + - Avoid uploading images with identifiable individuals without consent + - Don't use for surveillance or unauthorized monitoring + - Be mindful of private/sensitive locations + - Follow local privacy laws and regulations + + **Technical Safeguards:** + - No facial recognition algorithms + - No identity tracking features + - No cloud storage or external data transmission + - User maintains full data ownership + + --- + + ### 2. Explainability & Transparency 🔍 + + **Understanding AI Decisions:** + + The system provides multiple layers of explainability: + + **Depth Map Visualization:** + - Shows exactly how AI interprets scene depth + - Color coding reveals AI's confidence (yellow/red = far, purple/blue = near) + - Allows manual verification of depth estimates + + **Quality Metrics:** + - **Outlier Percentage**: Shows AI uncertainty (< 5% = high confidence) + - **Manifold Properties**: Indicates reconstruction reliability + - **Watertight Status**: Reveals completeness of 3D model + + **Explainability Report:** + - Plain-language explanation of AI decisions + - Confidence levels for reconstruction quality + - Warnings about potential issues + + **Model Transparency:** + - Open-source models (GLPN, DPT) with published papers + - Documented training data (NYU Depth V2, etc.) + - Known limitations explicitly stated + + --- + + ### 3. Fairness & Bias Awareness ⚖️ + + **Known Biases:** + + Our AI models have inherent biases based on their training data: + + **Geographic Bias:** + - Trained primarily on urban/indoor scenes from developed countries + - May underperform on architectural styles from underrepresented regions + - Less accurate for non-Western building structures + + **Scene Type Bias:** + - Optimized for indoor environments + - Better performance on structured scenes (rooms, buildings) + - May struggle with natural landscapes, outdoor scenes + + **Lighting Bias:** + - Trained on well-lit images + - Reduced accuracy in low-light conditions + - May fail on images with extreme shadows + + **Mitigation Strategies:** + - Quality metrics help identify poor reconstructions + - Multiple model options (GLPN vs DPT) for different scenarios + - User can validate results visually + - Clear documentation of limitations + + --- + + ### 4. Intended Use & Limitations ⚠️ + + **Appropriate Uses:** + - ✅ Educational demonstrations and learning + - ✅ Research and academic projects + - ✅ Preliminary architectural visualization + - ✅ Art and creative projects + - ✅ Rapid prototyping and concept exploration + + **Inappropriate Uses:** + - ❌ Safety-critical applications (structural engineering, medical) + - ❌ Surveillance or unauthorized monitoring + - ❌ Precise measurements without ground truth validation + - ❌ Legal evidence or forensic analysis + - ❌ Automated decision-making affecting individuals + + **Key Limitations:** + + 1. **Scale Ambiguity**: Outputs are relative, not absolute measurements + 2. **Single Viewpoint**: Cannot see occluded/hidden areas (reduced with multi-image) + 3. **No Georeferencing**: Local coordinates, not GPS/global positioning + 4. **Monocular Limitations**: Less accurate than stereo or LiDAR systems + 5. **Training Data Constraints**: Best for similar scenes to training data + + --- + + ### 5. Data Governance & Transparency 📊 + + **Model Provenance:** + + All AI models used in this application are fully transparent: + + | Model | Source | Training Data | License | Paper | + |-------|--------|---------------|---------|-------| + | GLPN | Hugging Face | NYU Depth V2 | Apache 2.0 | Kim et al., CVPR 2022 | + | DPT | Intel/Hugging Face | Mixed datasets | Apache 2.0 | Ranftl et al., ICCV 2021 | + + **Training Data:** + - NYU Depth V2: Indoor scenes from New York apartments + - MIX 6: Mixed indoor/outdoor scenes + - Primarily North American and European locations + - Limited representation of other regions - If you observe: - - Misuse of this technology - - Significant bias in results - - Privacy violations - - Ethical concerns + **No Proprietary Black Boxes:** + - All models are open-source + - Architecture and weights publicly available + - No hidden proprietary algorithms + - Users can audit model behavior - Please contact: [Your institution's ethics board/contact] + --- - ## Acknowledgment of Limitations + ### 6. Environmental Considerations 🌍 - This tool is provided for educational and research purposes. Users must: - - Understand model limitations and biases - - Use responsibly and ethically - - Verify results with ground truth when critical - - Not rely solely on AI for important decisions + **Computational Efficiency:** + - Optimized for CPU inference (no GPU required) + - GLPN model: Fast processing (~0.3-2.5s per image) + - Minimal energy consumption compared to cloud-based solutions + - Local processing reduces data transfer energy costs + + --- + + ### 7. Ethical Guidelines for Users 📖 + + **Before Using This Tool:** + + 1. **Consent**: Ensure you have rights to process uploaded images + 2. **Privacy**: Verify images don't contain identifiable individuals without consent + 3. **Purpose**: Confirm your use case aligns with intended applications + 4. **Validation**: Don't rely solely on AI outputs for critical decisions + 5. **Attribution**: Credit the open-source models and datasets used + + **Reporting Issues:** + + If you discover: + - Unexpected biases or failure modes + - Privacy concerns or vulnerabilities + - Misuse potential or ethical issues + + Please report to the development team for continuous improvement. + + --- + + ### 8. Continuous Improvement 🔄 + + **How We're Working to Improve:** + + - Expanding training data diversity + - Developing bias detection metrics + - Improving explainability features + - Adding more privacy safeguards + - Documenting edge cases and limitations + + **User Feedback:** + Your feedback helps us improve responsible AI practices. Please share: + - Unexpected results or biases observed + - Suggestions for better explainability + - Privacy concerns or recommendations + - Use cases we haven't considered + + --- + + ## References + + - [Responsible AI Practices](https://ai.google/responsibilities/responsible-ai-practices/) + - [Microsoft Responsible AI Principles](https://www.microsoft.com/en-us/ai/responsible-ai) + - [Partnership on AI](https://partnershiponai.org/) + - [Montreal Declaration for Responsible AI](https://www.montrealdeclaration-responsibleai.com/) """) # ========== THEORY TAB ========== @@ -1345,43 +1134,49 @@ with gr.Blocks(title="Advanced 3D Reconstruction", theme=gr.themes.Soft()) as de gr.Markdown(""" ## Reconstruction Pipeline Details - This application uses an **enhanced 13-step automated pipeline** (with alignment): + This application uses an **11-step automated pipeline**: - **For Each Image:** 1. **Image Preprocessing**: Resize to model requirements (divisible by 32) - 2. **Depth Estimation**: Neural network inference (GLPN or DPT) - 3. **Uncertainty Estimation**: Compute local depth variance as confidence measure - 4. **Failure Detection**: Identify challenging conditions (reflections, low contrast, etc.) - 5. **Point Cloud Generation**: Back-project using pinhole camera model + 2. **Depth Estimation**: Neural network inference (GLPN or DPT) for each image + 3. **Depth Visualization**: Create comparison images + 4. **Point Cloud Generation**: Back-project using pinhole camera model + 5. **Multi-View Fusion**: Merge point clouds from multiple images (if applicable) 6. **Outlier Removal**: Statistical filtering (20 neighbors, 2.0 std ratio) 7. **Normal Estimation**: Local plane fitting for surface orientation 8. **Mesh Reconstruction**: Poisson surface reconstruction (depth=10) 9. **Quality Metrics**: Compute manifold properties and geometric measures + 10. **3D Visualization**: Create interactive Plotly figure + 11. **File Export**: Generate PLY, OBJ, STL formats - **For Multiple Images (Automatic Alignment):** - 10. **Feature Computation**: Extract FPFH descriptors from each point cloud - 11. **Global Registration**: RANSAC-based correspondence matching - 12. **ICP Refinement**: Iterative Closest Point for precise alignment - 13. **Merging & Export**: Combine aligned clouds, create unified mesh, export all formats + ### Multi-Image Processing - ### Automatic Alignment Algorithm + When multiple images are provided: + - Each image is processed independently for depth estimation + - Point clouds are generated from each image + - Simple spatial offset applied to prevent overlap + - Combined point cloud undergoes unified cleaning and meshing - **ICP (Iterative Closest Point):** - - Industry-standard algorithm for point cloud registration - - Iteratively minimizes distance between corresponding points - - Achieves sub-millimeter accuracy in ideal conditions + **Note**: Current implementation uses basic merging. Production systems would use: + - Feature matching (SIFT, ORB) for correspondence + - Structure-from-Motion (SfM) for camera pose estimation + - Iterative Closest Point (ICP) for fine alignment + - Bundle adjustment for global optimization - **Process:** - 1. Downsample point clouds for speed (voxel size = 0.05) - 2. Compute FPFH features (Fast Point Feature Histograms) - 3. Find initial transformation with RANSAC (100,000 iterations) - 4. Refine with point-to-plane ICP (threshold = 0.02) - 5. Apply transformation and merge + ### Default Parameters Used - **Quality Metrics:** - - **Fitness**: Ratio of inlier correspondences (higher = better alignment) - - **RMSE**: Root mean squared error of aligned points (lower = better) - - Typical good values: Fitness > 0.7, RMSE < 0.05 + - **Poisson Depth**: 10 (balanced detail vs speed) + - **Outlier Neighbors**: 20 points + - **Outlier Std Ratio**: 2.0 + - **Focal Length**: 500 (pixels) + - **Normal Radius**: 0.1 (search radius) + + These parameters are optimized for general use cases and provide good results for most indoor scenes. + + ## Key References + + 1. **Kim, D., et al. (2022)**. "Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth." *CVPR 2022* + 2. **Ranftl, R., et al. (2021)**. "Vision Transformers for Dense Prediction." *ICCV 2021* + 3. **Kazhdan, M., et al. (2006)**. "Poisson Surface Reconstruction." *Eurographics Symposium on Geometry Processing* ## Model Comparison @@ -1391,14 +1186,21 @@ with gr.Blocks(title="Advanced 3D Reconstruction", theme=gr.themes.Soft()) as de | **Quality** | Good | Excellent | | **Memory** | Low (~2GB) | High (~5GB) | | **Best For** | Indoor scenes, Real-time | Complex scenes, Highest quality | - | **Training** | NYU Depth V2 (NYC indoors) | Multiple datasets | - | **Geographic Bias** | High (Western indoor) | Moderate (more diverse) | + | **Training** | NYU Depth V2 | Multiple datasets | - ## Key References + ### When to Use Each Model: - 1. **Kim, D., et al. (2022)**. "Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth." *CVPR 2022* - 2. **Ranftl, R., et al. (2021)**. "Vision Transformers for Dense Prediction." *ICCV 2021* - 3. **Kazhdan, M., et al. (2006)**. "Poisson Surface Reconstruction." *Eurographics Symposium on Geometry Processing* + **Choose GLPN if:** + - Processing indoor scenes (rooms, furniture) + - Speed is important + - Running on limited hardware + - Need real-time performance + + **Choose DPT if:** + - Need highest quality results + - Processing complex/outdoor scenes + - Speed is not critical + - Have sufficient memory/GPU """) # ========== USAGE GUIDE TAB ========== @@ -1409,165 +1211,157 @@ with gr.Blocks(title="Advanced 3D Reconstruction", theme=gr.themes.Soft()) as de ### Step 1: Upload Image(s) **Single Image Mode:** - - Upload one JPG, PNG, or BMP file - - Best for: Quick tests, simple objects, proof of concept - - Limitation: Cannot see hidden surfaces - - **Multiple Image Mode (NEW!):** - - Upload 2-8 images of the same object/scene - - Take photos from different angles (30-50% overlap recommended) - - Best for: Complete 360° coverage, professional projects - - Limitation: Requires manual alignment in external software - - **Recommended Image Settings:** - - Resolution: 512-1024px (optimal balance) - - Lighting: Even, diffused (avoid harsh shadows) - - Focus: Sharp, no motion blur - - Scene: Textured objects with clear depth cues - - ### Step 2: Choose Model - - **GLPN (Recommended):** - - ✅ Fast processing (~0.3-2.5s) - - ✅ Low memory requirements - - ✅ Great for indoor scenes - - ⚠️ Trained on NYC apartments (geographic bias) - - Best for: Quick iterations, indoor furniture, rooms - - **DPT (High Quality):** - - ✅ Superior quality - - ✅ Better generalization - - ✅ Handles complex scenes - - ⚠️ Slower processing (~0.8-6.5s) - - ⚠️ Higher memory usage (~5GB) - - Best for: Final outputs, outdoor scenes, detailed work - - ### Step 3: Select Visualization - - **Mesh**: Solid 3D surface (most intuitive) - - **Point Cloud**: Individual colored 3D points (shows raw data) + - Click on the upload area and select one image + - Best for: Quick reconstruction, simple objects + - Processing time: Fast + + **Multiple Image Mode (NEW):** + - Select 2-5 images of the same object from different angles + - Best for: Better coverage, complex objects, reduced occlusions + - Processing time: Longer (scales with number of images) + - **Tip**: Take photos from 45-90 degree intervals around the object + + **Image Requirements:** + - **Format**: JPG, PNG, or BMP + - **Resolution**: 512-1024px recommended + - **Lighting**: Well-lit, minimal shadows + - **Content**: Objects with texture, clear depth cues + + **Multi-Image Tips:** + - Keep camera distance roughly consistent + - Overlap between views improves reconstruction + - Avoid motion blur between shots + - Same lighting conditions across all images + + --- + + ### Step 2: Configure Settings + + **Model Selection:** + - **GLPN (Recommended)**: Fast, good for indoor scenes + - **DPT (High Quality)**: Slower but higher quality + + **Visualization Type:** + - **Mesh**: Solid 3D surface (recommended) + - **Point Cloud**: Individual 3D points - **Both**: Side-by-side comparison - ### Step 4: Review Results - - **NEW: Uncertainty Maps** - - Red areas = Model is less confident - - Blue areas = Model is more confident - - Use to identify problematic regions + **Privacy Settings:** + - Keep "Enable privacy checks" ON (recommended) + - System will warn about potential privacy concerns - **NEW: Automatic Warnings** - The system now detects: - - Very dark images - - Low contrast/uniform textures - - Potential reflective surfaces - - Sharp discontinuities (transparent objects) - - Low resolution inputs - - ### Step 5: Download & Use Files + --- - **For Single Image:** - - Download ZIP file with point cloud, mesh (PLY/OBJ/STL), and metrics + ### Step 3: Start Reconstruction + - Click "🚀 Start Reconstruction" + - Wait for processing (10-90 seconds depending on number of images) + - Results appear automatically - **For Multiple Images with Alignment:** - - Download ZIP file containing: - - Individual reconstructions (image_1_*, image_2_*, etc.) - - **MERGED files** (automatically aligned and combined!) ⭐ - - All formats: PLY, OBJ, STL - - Metrics JSON with alignment quality + --- - **The MERGED files are ready to use immediately - no manual alignment needed!** + ### Step 4: Explore Results - ### Understanding Alignment Results + **Depth Map(s):** + - Shows original image(s) next to depth estimates + - Color coding: Yellow/Red = Far, Purple/Blue = Near + - Multiple images show grid of all depth maps - **In the Report:** - - **Translation Distance**: How far each image was moved to align (in arbitrary units) - - **Merged Statistics**: Total points/vertices in unified model - - **Watertight Status**: Whether merged mesh is 3D-printable + **Interactive 3D Viewer:** + - **Rotate**: Click and drag + - **Zoom**: Scroll wheel + - **Pan**: Right-click and drag + - **Reset**: Double-click - **If Alignment Fails:** - - Not enough overlap between images - - Very different viewpoints - - Lack of distinctive features - - Reflective/transparent surfaces - - **Solution**: Retake photos with more overlap, or use manual alignment in CloudCompare + **Reconstruction Report:** + - Performance metrics + - Quality assessment + - AI explainability (confidence levels) + - Privacy warnings (if any) - ## Understanding Explainability Features + --- - ### Uncertainty Visualization - - **What it shows**: Where the model is guessing vs confident - - **How to use**: Avoid relying on high-uncertainty regions for measurements - - **Threshold**: >0.7 uncertainty = very uncertain, <0.3 = confident + ### Step 5: Download Results - ### Automatic Warning System - The app now detects and warns about: + ZIP package contains: + - `point_cloud.ply` - 3D points with colors + - `mesh.ply` - Full mesh with metadata + - `mesh.obj` - Standard format (most compatible) + - `mesh.stl` - For 3D printing + - `metrics.json` - All quality metrics - 1. **Dark Images**: May reduce depth accuracy - - Solution: Brighten image or use flash + --- - 2. **Low Contrast**: Uniform textures confuse depth estimation - - Solution: Add textured reference objects + ## Viewing Downloaded Files - 3. **Reflective Surfaces**: Mirrors, glass, polished metal - - Solution: Use matte spray or avoid these materials + **Free Software:** + - **MeshLab**: Best for beginners - https://www.meshlab.net/ + - **Blender**: Advanced 3D modeling - https://www.blender.org/ + - **CloudCompare**: Point cloud analysis - https://www.cloudcompare.org/ - 4. **Transparent Objects**: Glass, water, clear plastic - - Solution: These cannot be reconstructed reliably + **Online Viewers:** + - https://3dviewer.net/ + - https://www.creators3d.com/online-viewer - 5. **Low Resolution**: <320x240 pixels - - Solution: Use higher resolution camera + --- ## Tips for Best Results - ### DO: - - ✅ Use well-lit images (natural diffused light best) - - ✅ Include visible depth cues (corners, edges) - - ✅ Use textured surfaces - - ✅ Take multiple angles for complete coverage - - ✅ Check uncertainty maps for problem areas - - ✅ Read warnings and adjust accordingly - - ### AVOID: - - ❌ Motion blur or defocused images - - ❌ Reflective surfaces (mirrors, polished metal) - - ❌ Transparent objects (glass, clear plastic) - - ❌ Completely uniform textures (blank walls) - - ❌ Harsh shadows or backlighting - - ❌ Extreme close-ups or distant scenes + ### Single Image Mode: + - Use well-lit images + - Include depth cues (corners, edges) + - Avoid reflective surfaces + - Indoor scenes work best + + ### Multiple Image Mode: + - Take 3-5 photos from different angles + - Maintain 45-90 degree spacing + - Keep consistent distance from object + - Ensure 30-50% overlap between views + - Use same lighting for all shots + + ### What to Avoid: + - Motion blur + - Extreme close-ups + - Transparent objects + - Mirrors or glass + - Uniform textures + - Very dark images + + --- ## Troubleshooting - **High uncertainty in depth map:** - - Check warnings for specific issues - - Try different lighting - - Add textured objects for reference - - Use DPT model instead of GLPN - - **Poor alignment with multiple images:** - - Ensure sufficient overlap (30-50%) - - Use consistent lighting across all images - - Maintain similar camera distance - - Include distinctive features for matching - - Avoid moving objects in scene - - Try disabling alignment checkbox and use manual methods if needed - - **Alignment takes too long:** - - Normal for 4+ images (can take 2-5 minutes) - - FPFH feature computation is intensive - - Disable alignment if you prefer manual methods - - Use fewer images for faster processing - - **Model seems biased:** - - Check "Responsible AI" tab for known limitations - - GLPN works best on Western indoor scenes - - Try DPT for non-Western or outdoor scenes - - Document and report significant bias + **"Please upload at least one image"** + - Ensure files are selected before clicking reconstruct + - Check file format (JPG, PNG, BMP only) + + **Mesh has holes/artifacts** + - Normal for single-view reconstruction + - Try multiple images for better coverage + - Use MeshLab's "Close Holes" tool if needed + + **Processing is slow** + - Use GLPN model instead of DPT + - Reduce number of images + - Use smaller image resolution + + **"Not watertight" warning** + - Common for complex scenes + - Still usable for visualization + - For 3D printing: use mesh repair tools + + **Privacy warnings** + - Review uploaded images + - Remove identifiable information if needed + - Disable privacy checks if false positive """) # ========== CITATION TAB ========== with gr.Tab("📄 Citation & Credits"): gr.Markdown(""" - ## Citation + ## Academic Citation - If you use this tool in research, please cite: + If you use this tool in your research or projects, please cite the underlying models: ### For GLPN Model: ```bibtex @@ -1597,61 +1391,36 @@ with gr.Blocks(title="Advanced 3D Reconstruction", theme=gr.themes.Soft()) as de - **PyTorch**: Deep learning framework - **Plotly**: Interactive 3D visualization - **Gradio**: Web interface - - **SciPy**: Uncertainty estimation - - **Matplotlib**: Visualization + - **NumPy & SciPy**: Scientific computing ## Acknowledgments - - **NYU Depth V2 Dataset**: Training data for GLPN - - **MIX 6 Dataset**: Training data for DPT - - **Anthropic**: Responsible AI framework inspiration - - **Open source community**: Essential tools and libraries - - ## Version History - - **v2.0 (Current)** - Enhanced Responsible AI Version with Automatic Alignment - - ✨ Multi-image support (1-8 images) - - ✨ **Automatic alignment using ICP** (no manual work needed!) - - ✨ **Automatic merging** into unified 3D model - - ✨ Uncertainty estimation and visualization - - ✨ Automatic failure case detection - - ✨ Comprehensive warning system - - ✨ Responsible AI documentation - - ✨ Geographic bias disclosure - - ✨ Privacy guidelines - - ✨ Enhanced explainability - - **v1.0** - Initial Release - - Single image processing - - GLPN and DPT models - - Basic quality metrics - - Multiple export formats + - NYU Depth V2 dataset creators + - Open3D development team + - Hugging Face community + - Academic researchers advancing monocular depth estimation + + ## License & Terms + + - Models: Apache 2.0 License + - This application: Educational and research use + - Commercial use: Verify model licenses + - No warranty provided for accuracy or fitness for purpose + + ## Contact & Feedback + + For questions, bug reports, or suggestions regarding responsible AI implementation, + please contact the development team. """) # ========== FOOTER ========== gr.Markdown(""" --- + **🔒 Privacy Notice**: All processing happens locally. No data is transmitted to external servers. - ## 🌟 Enhanced Features in This Version - - **Multi-Image Support**: Process 1-8 images for comprehensive coverage - - **Automatic Alignment**: ICP-based alignment automatically merges point clouds (no manual work!) - - **Explainability**: Uncertainty maps show model confidence spatially - - **Fairness**: Geographic bias documented, model limitations disclosed - - **Privacy**: Clear guidelines, local processing, no data retention - - **Safety**: Automatic detection of challenging conditions with warnings - - --- - - **⚖️ Ethical Use Policy**: This tool is provided for educational and research purposes. - Users are responsible for ensuring ethical and legal use of this technology. + **⚠️ Disclaimer**: This tool is for educational and research purposes. Not suitable for safety-critical applications or precise measurements. - **📧 Feedback**: Report issues, bias, or ethical concerns to your institution's ethics board. + **📊 Responsible AI**: Built with privacy protection, explainability, and fairness considerations. """) # ============================================================================