diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -1,7 +1,7 @@ """ -Advanced 3D Reconstruction from Single/Multiple Images -Enhanced with Responsible AI features and multi-image support -Addresses: Privacy, Fairness, Explainability, Multiple Image Processing +Advanced 3D Reconstruction from Single or Multiple Images +Academic-grade pipeline with responsible AI considerations, multi-image support, +quality metrics, multiple export formats, and interactive visualization """ import gradio as gr @@ -18,116 +18,6 @@ import time from pathlib import Path import tempfile import zipfile -from datetime import datetime - -# ============================================================================ -# RESPONSIBLE AI DOCUMENTATION -# ============================================================================ -RESPONSIBLE_AI_TEXT = """ -## Responsible AI & Ethics - -### Model Limitations & Bias - -**Training Data Geographic Bias:** -- **GLPN**: Trained on NYU Depth V2 dataset (primarily New York City indoor scenes) - - **Performance**: Excellent for Western urban interiors, office spaces, apartments - - **Limitations**: May underperform on non-Western architecture, outdoor scenes, rural settings - -- **DPT**: Trained on mixed datasets (MIX 6 - multiple indoor/outdoor sources) - - **Performance**: Better generalization but still biased toward Western built environments - - **Limitations**: Less accurate for cultural artifacts, traditional architecture, natural landscapes - -**Scene Type Performance:** -| Scene Type | GLPN Accuracy | DPT Accuracy | Notes | -|------------|---------------|--------------|-------| -| Modern Indoor (Western) | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐⭐ | Optimal | -| Traditional Architecture | ⭐⭐⭐ | ⭐⭐⭐⭐ | May miss details | -| Outdoor/Natural | ⭐⭐ | ⭐⭐⭐⭐ | GLPN struggles | -| Reflective Surfaces | ⭐ | ⭐⭐ | Known failure case | -| Transparent Objects | ⭐ | ⭐ | Cannot estimate depth | - -### Privacy Considerations - -**Webcam Usage:** -- ⚠️ **Warning**: Webcam captures are processed locally but may inadvertently capture: - - Identifiable people in background - - Sensitive documents or screens - - Private spaces or property - -**Best Practices:** -- Only capture objects/spaces you have permission to document -- Ensure no people are in frame (or obtain consent) -- Avoid capturing sensitive information -- All processing is done locally - no images sent to external servers - -**Data Retention:** -- Images are processed in memory only -- No automatic storage or logging -- Downloaded files are user-controlled -- No telemetry or usage tracking - -### Explainability Features - -This app provides multiple explainability layers: - -1. **Depth Map Visualization**: Color-coded confidence in distance estimation -2. **Uncertainty Maps**: Shows where model is uncertain (darker = less confident) -3. **Quality Metrics**: Statistical measures of reconstruction reliability -4. **Outlier Detection**: Identifies and reports noisy predictions -5. **Model Comparison**: Compare GLPN vs DPT to understand model differences - -### Fairness & Accessibility - -**Accessibility Features:** -- File upload (primary method) - works for all users -- Webcam (optional) - for users with camera access -- Multiple format exports - compatible with free software -- Detailed documentation - no assumed prior knowledge - -**Known Limitations:** -- Requires visual input (not accessible to blind users for capture) -- Processing time varies by hardware (may disadvantage low-resource users) -- Models optimized for Western scenes (geographic bias) - -### Environmental Impact - -**Computational Cost:** -- **GLPN Processing**: ~2GB RAM, 0.3-2.5s CPU time -- **DPT Processing**: ~5GB RAM, 0.8-6.5s CPU time -- **Carbon Estimate**: ~0.001-0.005 kWh per reconstruction - -**Recommendations:** -- Use GLPN for most tasks (4x more efficient) -- Batch process multiple images to reduce overhead -- Consider hardware upgrade carbon cost vs processing efficiency - -### Dual-Use & Misuse Prevention - -**Prohibited Uses:** -- ❌ Unauthorized surveillance or monitoring -- ❌ Scanning people without explicit consent -- ❌ Documenting property without permission -- ❌ Creating deepfakes or deceptive content -- ❌ Any use that violates privacy or dignity - -**Intended Uses:** -- ✅ Educational research and learning -- ✅ Personal photography projects -- ✅ Architectural documentation (with permission) -- ✅ Product design and prototyping -- ✅ Cultural heritage preservation (authorized) - -### Terms of Use - -By using this application, you agree to: -1. Only process images you have rights to use -2. Not capture identifiable people without consent -3. Use outputs ethically and legally -4. Not use for surveillance or deceptive purposes -5. Understand model limitations and biases - -**If you observe misuse or have ethical concerns, please report them.** -""" # ============================================================================ # LITERATURE REVIEW & THEORETICAL BACKGROUND @@ -137,7 +27,7 @@ THEORY_TEXT = """ ## About This Tool -This application demonstrates how artificial intelligence can convert single 2D photographs into interactive 3D models automatically. +This application demonstrates how artificial intelligence can convert 2D photographs into interactive 3D models automatically, with a focus on responsible AI practices. ### What Makes This Special @@ -171,277 +61,187 @@ This tool uses state-of-the-art artificial intelligence models: - Best for: Wide-area urban landscapes, complex built environments - Geographic advantage: Superior accuracy for planning-grade documentation +### Multi-Image Reconstruction + +**Single Image Mode:** +- Fast processing +- Works with limited data +- Best for quick assessments +- Limitations: Single viewpoint, scale ambiguity + +**Multiple Image Mode (NEW):** +- Improved coverage and accuracy +- Combines depth maps from different viewpoints +- Reduces occlusion issues +- Better overall 3D representation +- Note: Images should be of the same object/scene from different angles + ### How It Works (Simple) -1. **AI looks at photo** → Recognizes objects, patterns, perspective +1. **AI looks at photo(s)** → Recognizes objects, patterns, perspective 2. **Estimates distance** → Figures out what's close, what's far 3. **Creates 3D points** → Places colored dots in 3D space 4. **Builds surface** → Connects dots into smooth shape +5. **Multi-view fusion** (if multiple images) → Combines information for better accuracy + +### Responsible AI Considerations + +This tool is designed with responsible AI principles in mind: + +**1. Privacy Protection:** +- All processing happens locally - no data sent to external servers +- No image storage or retention after processing +- No facial recognition or identity tracking +- Users maintain full control over their data +- Recommendation: Avoid uploading images with identifiable individuals + +**2. Explainability & Transparency:** +- Depth map visualization shows how AI "sees" the scene +- Quality metrics provide confidence indicators +- Processing steps are clearly documented +- Model limitations are explicitly stated +- Users can verify reconstruction quality + +**3. Fairness & Bias Awareness:** +- Models trained primarily on indoor/urban scenes +- May perform differently on underrepresented scene types +- Quality metrics help identify potential biases +- Users should validate results for critical applications + +**4. Intended Use & Limitations:** +- Designed for educational and research purposes +- Not suitable for: safety-critical applications, surveillance, or precise measurements +- Best for: visualization, preliminary analysis, teaching +- Scale ambiguity: requires ground control for absolute measurements + +**5. Data Governance:** +- Open-source models with documented training data +- No proprietary algorithms or black boxes +- Full transparency in reconstruction pipeline +- Users can audit and validate the process + +### Spatial Data Pipeline + +Our reconstruction pipeline generates geospatially-relevant data: + +**1. Monocular Depth Estimation** + - Challenge: Extracting 3D spatial information from 2D photographs + - Application: Similar to photogrammetry but from single images + - Output: Relative depth maps for spatial analysis + - Use case: Quick field assessment without specialized equipment + +**2. Point Cloud Generation (Spatial Coordinates)** + - Creates 3D coordinate system (X, Y, Z) from pixels + - Each point: Geographic location + RGB color information + - Compatible with: GIS software, CAD tools, spatial databases + - Use case: Integration with existing urban datasets + +**3. 3D Mesh Generation (Surface Models)** + - Creates continuous surface from discrete points + - Similar to: Digital terrain models (DTMs) for buildings + - Output formats: Compatible with ArcGIS, QGIS, SketchUp + - Use case: 3D city models, urban visualization + +### Spatial Quality Metrics + +**For Urban Planning Applications:** + +- **Point Cloud Density**: 290K+ points = high spatial resolution +- **Geometric Accuracy**: Manifold checks ensure valid topology +- **Surface Continuity**: Watertight meshes = complete volume calculations +- **Data Fidelity**: Triangle count indicates level of detail + +**Limitations for Geographic Applications:** + +1. **Scale Ambiguity**: Requires ground control points for absolute measurements +2. **Single Viewpoint**: Cannot capture occluded facades or hidden spaces (reduced with multi-image mode) +3. **No Georeferencing**: Outputs in local coordinates, not global (lat/lon) +4. **Weather Dependent**: Best results with clear, well-lit conditions -### Multi-Image Processing & Automatic Alignment (NEW!) - -**Single Image Mode:** -- Fast, works from one photo -- Relative depth only (no absolute scale) -- Hidden surfaces cannot be reconstructed - -**Multiple Image Mode:** -- Upload 2-8 images of same object/scene from different angles -- **Automatic Alignment**: Uses ICP (Iterative Closest Point) algorithm to align point clouds -- **Automatic Merging**: Combines aligned point clouds into unified 3D model -- No manual alignment needed - fully automated! - -**Alignment Pipeline:** -1. **Feature Extraction**: Computes FPFH (Fast Point Feature Histograms) for each point cloud -2. **Global Registration**: RANSAC-based matching to find initial alignment -3. **Refinement**: ICP (Iterative Closest Point) for precise alignment -4. **Merging**: Combines aligned clouds, removes duplicates, creates unified mesh - -**Why Multiple Images Help:** -- Complete 360° coverage (all sides visible) -- Better accuracy through redundancy -- More complete models -- Professional-grade results automatically! """ # ============================================================================ -# MODEL LOADING +# RESPONSIBLE AI HELPER FUNCTIONS # ============================================================================ -print("Loading GLPN model...") -glpn_processor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu") -glpn_model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu") -print("GLPN model loaded successfully!") - -# DPT will be loaded on demand -dpt_model = None -dpt_processor = None - -# ============================================================================ -# UNCERTAINTY ESTIMATION -# ============================================================================ - -def estimate_uncertainty(depth_map): +def check_image_privacy(image): """ - Estimate uncertainty in depth predictions - Higher values = less confident predictions + Check if image might contain sensitive information. + Returns warnings if potential privacy concerns detected. """ - # Compute local depth variance as proxy for uncertainty - from scipy.ndimage import generic_filter - - def local_std(values): - return np.std(values) + warnings = [] - # Compute local standard deviation - uncertainty = generic_filter(depth_map, local_std, size=5) + # Check image size - very high resolution might indicate detailed surveillance + width, height = image.size + if width * height > 4000 * 3000: + warnings.append("⚠️ High-resolution image detected. Ensure it doesn't contain identifiable individuals.") - # Normalize to 0-1 range - uncertainty = (uncertainty - uncertainty.min()) / (uncertainty.max() - uncertainty.min() + 1e-8) + # Check aspect ratio - some aspect ratios common in surveillance cameras + aspect_ratio = width / height + if aspect_ratio > 2.5 or aspect_ratio < 0.4: + warnings.append("ℹ️ Unusual aspect ratio detected. Common in security camera footage.") - return uncertainty - -# ============================================================================ -# FAILURE CASE DETECTION -# ============================================================================ + return warnings -def detect_challenging_conditions(image, depth_map): +def generate_explainability_report(metrics, depth_stats): """ - Detect challenging scenarios that may lead to poor reconstruction - Returns: List of warnings + Generate an explainability report for the reconstruction. + Helps users understand how the AI made decisions. """ - warnings = [] - - # Convert to numpy if needed - img_array = np.array(image) - - # 1. Check for very dark images - brightness = np.mean(img_array) - if brightness < 50: - warnings.append("⚠️ Very dark image - may reduce depth accuracy") + report = "### 🔍 AI Decision Explainability\n\n" - # 2. Check for low contrast - std_dev = np.std(img_array) - if std_dev < 30: - warnings.append("⚠️ Low contrast - uniform textures reduce accuracy") + # Depth estimation confidence + depth_range = depth_stats['max'] - depth_stats['min'] + depth_variation = depth_stats['std'] / depth_stats['mean'] - # 3. Check for potential reflective surfaces (high local variance in depth) - depth_variance = np.var(depth_map) - if depth_variance > np.percentile(np.var(depth_map.reshape(-1, 10), axis=1), 95): - warnings.append("⚠️ Possible reflective surfaces detected - depth may be inaccurate") + if depth_variation > 0.5: + report += "- **High depth variation detected**: Scene has significant depth differences (good for reconstruction)\n" + else: + report += "- **Low depth variation**: Scene is relatively flat (may limit 3D detail)\n" + + # Point cloud quality + outlier_ratio = metrics['outliers_removed'] / metrics['initial_points'] + if outlier_ratio < 0.05: + report += "- **Clean depth estimation**: AI is confident about depth predictions (< 5% outliers)\n" + elif outlier_ratio < 0.15: + report += "- **Moderate noise**: Some uncertainty in depth predictions (normal for complex scenes)\n" + else: + report += "- **High uncertainty**: AI struggled with this scene (> 15% outliers removed)\n" - # 4. Check for extreme depth discontinuities (potential transparent objects) - from scipy.ndimage import sobel - depth_edges = np.sqrt(sobel(depth_map, axis=0)**2 + sobel(depth_map, axis=1)**2) - if np.percentile(depth_edges, 99) > 3 * np.percentile(depth_edges, 95): - warnings.append("⚠️ Sharp depth discontinuities - may indicate transparent/reflective objects") + # Mesh quality + if metrics['is_watertight']: + report += "- **Complete surface reconstruction**: AI successfully closed all gaps\n" + else: + report += "- **Incomplete surface**: Some areas couldn't be reconstructed (occluded or ambiguous)\n" - # 5. Check image size - if image.width < 320 or image.height < 240: - warnings.append("⚠️ Low resolution image - use higher resolution for better results") + # Confidence level + if metrics['is_edge_manifold'] and outlier_ratio < 0.1: + report += "\n**Overall Confidence**: ✅ High - Results are reliable\n" + elif metrics['is_vertex_manifold']: + report += "\n**Overall Confidence**: ⚠️ Medium - Results are usable but verify quality\n" + else: + report += "\n**Overall Confidence**: ❌ Low - Results may need manual correction\n" - return warnings + return report # ============================================================================ -# AUTOMATIC ALIGNMENT FUNCTIONS +# MODEL LOADING # ============================================================================ -def align_point_clouds(point_clouds): - """ - Automatically align multiple point clouds using ICP (Iterative Closest Point) - Returns aligned point clouds and transformation matrices - """ - if len(point_clouds) <= 1: - return point_clouds, [] - - print("\n" + "="*60) - print("Starting Automatic Alignment (ICP)") - print("="*60) - - aligned_pcds = [point_clouds[0]] # First cloud is reference - transformations = [] - - for i in range(1, len(point_clouds)): - print(f"\nAligning point cloud {i+1} to reference...") - - source = point_clouds[i] - target = aligned_pcds[0] # Always align to first cloud - - # Initial alignment using global registration (faster, rough alignment) - print(f" Step 1: Computing FPFH features...") - source_down = source.voxel_down_sample(voxel_size=0.05) - target_down = target.voxel_down_sample(voxel_size=0.05) - - source_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30)) - target_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30)) - - source_fpfh = o3d.pipelines.registration.compute_fpfh_feature( - source_down, - o3d.geometry.KDTreeSearchParamHybrid(radius=0.25, max_nn=100) - ) - target_fpfh = o3d.pipelines.registration.compute_fpfh_feature( - target_down, - o3d.geometry.KDTreeSearchParamHybrid(radius=0.25, max_nn=100) - ) - - print(f" Step 2: Global registration (RANSAC)...") - result_ransac = o3d.pipelines.registration.registration_ransac_based_on_feature_matching( - source_down, target_down, source_fpfh, target_fpfh, - mutual_filter=True, - max_correspondence_distance=0.15, - estimation_method=o3d.pipelines.registration.TransformationEstimationPointToPoint(False), - ransac_n=3, - checkers=[ - o3d.pipelines.registration.CorrespondenceCheckerBasedOnEdgeLength(0.9), - o3d.pipelines.registration.CorrespondenceCheckerBasedOnDistance(0.15) - ], - criteria=o3d.pipelines.registration.RANSACConvergenceCriteria(100000, 0.999) - ) - - print(f" Global registration fitness: {result_ransac.fitness:.4f}") - - # Refine with ICP - print(f" Step 3: Refining with ICP...") - threshold = 0.02 - result_icp = o3d.pipelines.registration.registration_icp( - source, target, threshold, result_ransac.transformation, - o3d.pipelines.registration.TransformationEstimationPointToPlane() - ) - - print(f" ICP fitness: {result_icp.fitness:.4f}") - print(f" ICP RMSE: {result_icp.inlier_rmse:.6f}") - - # Apply transformation - source_aligned = source.transform(result_icp.transformation) - aligned_pcds.append(source_aligned) - transformations.append(result_icp.transformation) - - print(f" ✓ Point cloud {i+1} aligned successfully!") - - print("\n" + "="*60) - print(f"Alignment complete! All {len(point_clouds)} point clouds aligned.") - print("="*60 + "\n") - - return aligned_pcds, transformations - -def merge_point_clouds(aligned_pcds): - """ - Merge aligned point clouds into a single unified point cloud - """ - print("Merging aligned point clouds...") - merged = o3d.geometry.PointCloud() - - for pcd in aligned_pcds: - merged += pcd - - # Remove duplicate points and outliers - print("Cleaning merged point cloud...") - merged = merged.voxel_down_sample(voxel_size=0.01) - cl, ind = merged.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0) - merged = merged.select_by_index(ind) - - print(f"Merged point cloud: {len(merged.points)} points") - return merged +print("Loading GLPN model...") +glpn_processor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu") +glpn_model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu") +print("GLPN model loaded successfully!") -def create_mesh_from_merged_pointcloud(pcd): - """ - Create a high-quality mesh from merged point cloud - """ - print("Creating mesh from merged point cloud...") - - # Estimate normals - pcd.estimate_normals() - pcd.orient_normals_consistent_tangent_plane(100) - - # Poisson reconstruction - mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson( - pcd, depth=10, n_threads=-1 - ) - - # Remove low density vertices - vertices_to_remove = densities < np.quantile(densities, 0.01) - mesh.remove_vertices_by_mask(vertices_to_remove) - - # Transfer colors - print("Transferring colors to merged mesh...") - pcd_tree = o3d.geometry.KDTreeFlann(pcd) - mesh_colors = [] - for vertex in mesh.vertices: - [_, idx, _] = pcd_tree.search_knn_vector_3d(vertex, 1) - mesh_colors.append(pcd.colors[idx[0]]) - mesh.vertex_colors = o3d.utility.Vector3dVector(np.array(mesh_colors)) - - # Clean up - mesh.remove_degenerate_triangles() - mesh.remove_duplicated_triangles() - mesh.remove_duplicated_vertices() - mesh.remove_non_manifold_edges() - - print(f"Merged mesh: {len(mesh.vertices)} vertices, {len(mesh.triangles)} triangles") - return mesh +# DPT will be loaded on demand +dpt_model = None +dpt_processor = None # ============================================================================ # CORE 3D RECONSTRUCTION FUNCTIONS # ============================================================================ -def process_single_image(image, model_choice, image_idx=0, total_images=1): - """Process a single image and return depth map, point cloud, mesh, and metrics""" - - print(f"\n{'='*60}") - print(f"Processing image {image_idx+1}/{total_images}") - print(f"{'='*60}") - - # STEP 1: Preprocess image - print("Step 1: Preprocessing image...") - new_height = 480 if image.height > 480 else image.height - new_height -= (new_height % 32) - new_width = int(new_height * image.width / image.height) - diff = new_width % 32 - new_width = new_width - diff if diff < 16 else new_width + (32 - diff) - new_size = (new_width, new_height) - image = image.resize(new_size, Image.LANCZOS) - print(f"Image resized to: {new_size}") - - # STEP 2: Depth estimation - print("Step 2: Estimating depth...") +def estimate_depth_for_image(image, model_choice): + """Estimate depth for a single image""" if model_choice == "GLPN (Recommended)": processor = glpn_processor model = glpn_model @@ -456,658 +256,549 @@ def process_single_image(image, model_choice, image_idx=0, total_images=1): inputs = processor(images=image, return_tensors="pt") - start_time = time.time() with torch.no_grad(): outputs = model(**inputs) predicted_depth = outputs.predicted_depth - depth_time = time.time() - start_time - print(f"Depth estimation completed in {depth_time:.2f}s") - - # Process depth output - pad = 16 - output = predicted_depth.squeeze().cpu().numpy() * 1000.0 - output = output[pad:-pad, pad:-pad] - image_cropped = image.crop((pad, pad, image.width - pad, image.height - pad)) - - # Ensure depth and image have same dimensions - depth_height, depth_width = output.shape - img_width, img_height = image_cropped.size - - print(f"After crop - Depth shape: {output.shape}, Image size: {image_cropped.size}") - - # Resize depth to match image if needed - if depth_height != img_height or depth_width != img_width: - print(f"Resizing depth from ({depth_height}, {depth_width}) to ({img_height}, {img_width})") - from scipy import ndimage - zoom_factors = (img_height / depth_height, img_width / depth_width) - output = ndimage.zoom(output, zoom_factors, order=1) - print(f"Depth resized to: {output.shape}") - - image = image_cropped - - # STEP 3: Estimate uncertainty - print("Step 3: Estimating uncertainty...") - uncertainty_map = estimate_uncertainty(output) - - # STEP 4: Detect challenging conditions - print("Step 4: Detecting challenging conditions...") - warnings = detect_challenging_conditions(image, output) - - # STEP 5: Create point cloud - print("Step 5: Generating point cloud...") - width, height = image.size - - depth_image = (output * 255 / np.max(output)).astype(np.uint8) - image_array = np.array(image) - - print(f"Creating RGBD - Image: {image_array.shape}, Depth: {depth_image.shape}") - - depth_o3d = o3d.geometry.Image(depth_image) - image_o3d = o3d.geometry.Image(image_array) - rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth( - image_o3d, depth_o3d, convert_rgb_to_intensity=False - ) - - camera_intrinsic = o3d.camera.PinholeCameraIntrinsic() - camera_intrinsic.set_intrinsics(width, height, 500, 500, width/2, height/2) - - pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic) - initial_points = len(pcd.points) - print(f"Initial point cloud: {initial_points} points") - - # STEP 6: Clean point cloud - print("Step 6: Cleaning point cloud...") - cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0) - pcd = pcd.select_by_index(ind) - outliers_removed = initial_points - len(pcd.points) - print(f"Removed {outliers_removed} outliers") - - # STEP 7: Estimate normals - print("Step 7: Estimating normals...") - pcd.estimate_normals() - pcd.orient_normals_to_align_with_direction() - - # STEP 8: Create mesh - print("Step 8: Creating mesh...") - mesh_start = time.time() - mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson( - pcd, depth=10, n_threads=1 - )[0] - - # Transfer colors from point cloud to mesh vertices - print("Transferring colors to mesh...") - pcd_tree = o3d.geometry.KDTreeFlann(pcd) - mesh_colors = [] - for vertex in mesh.vertices: - [_, idx, _] = pcd_tree.search_knn_vector_3d(vertex, 1) - mesh_colors.append(pcd.colors[idx[0]]) - mesh.vertex_colors = o3d.utility.Vector3dVector(np.array(mesh_colors)) - - # Rotate mesh - rotation = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0)) - mesh.rotate(rotation, center=(0, 0, 0)) - mesh_time = time.time() - mesh_start - print(f"Mesh created in {mesh_time:.2f}s") + return predicted_depth + +def merge_point_clouds(point_clouds, colors_list): + """ + Merge multiple point clouds with basic alignment. + Note: This is a simple merging strategy. For better results, + consider using registration algorithms (ICP, etc.) + """ + all_points = [] + all_colors = [] - # STEP 9: Compute quality metrics - print("Step 9: Computing metrics...") - mesh.compute_vertex_normals() + for i, (points, colors) in enumerate(zip(point_clouds, colors_list)): + # Simple offset strategy to prevent complete overlap + offset = np.array([i * 0.5, 0, 0]) # Offset along X-axis + all_points.append(points + offset) + all_colors.append(colors) - metrics = { - 'image_index': image_idx + 1, - 'model_used': model_choice, - 'depth_estimation_time': f"{depth_time:.2f}s", - 'mesh_reconstruction_time': f"{mesh_time:.2f}s", - 'total_time': f"{depth_time + mesh_time:.2f}s", - 'initial_points': initial_points, - 'outliers_removed': outliers_removed, - 'final_points': len(pcd.points), - 'vertices': len(mesh.vertices), - 'triangles': len(mesh.triangles), - 'is_edge_manifold': mesh.is_edge_manifold(), - 'is_vertex_manifold': mesh.is_vertex_manifold(), - 'is_watertight': mesh.is_watertight(), - 'warnings': warnings, - 'avg_uncertainty': float(np.mean(uncertainty_map)) - } + merged_points = np.vstack(all_points) + merged_colors = np.vstack(all_colors) - # Compute surface area - try: - vertices = np.asarray(mesh.vertices) - triangles = np.asarray(mesh.triangles) - v0 = vertices[triangles[:, 0]] - v1 = vertices[triangles[:, 1]] - v2 = vertices[triangles[:, 2]] - cross = np.cross(v1 - v0, v2 - v0) - areas = 0.5 * np.linalg.norm(cross, axis=1) - total_area = np.sum(areas) - metrics['surface_area'] = float(total_area) - except: - metrics['surface_area'] = "Unable to compute" + return merged_points, merged_colors + +def process_image(images, model_choice="GLPN (Recommended)", visualization_type="mesh", enable_privacy_check=True): + """Main processing pipeline - supports single or multiple images""" - # Compute volume if watertight - try: - if mesh.is_watertight(): - volume = mesh.get_volume() - metrics['volume'] = float(volume) + def _generate_quality_assessment(metrics): + """Generate quality assessment based on metrics""" + assessment = [] + + # Check outlier removal + outlier_pct = (metrics['outliers_removed'] / metrics['initial_points']) * 100 + if outlier_pct < 5: + assessment.append("Very clean depth estimation (low noise)") + elif outlier_pct < 15: + assessment.append("Good depth quality (normal noise level)") else: - metrics['volume'] = None - except: - metrics['volume'] = None - - return { - 'image': image, - 'depth_map': output, - 'uncertainty_map': uncertainty_map, - 'point_cloud': pcd, - 'mesh': mesh, - 'metrics': metrics, - 'warnings': warnings - } - -def process_image(images, model_choice="GLPN (Recommended)", visualization_type="mesh", enable_alignment=True): - """Main processing pipeline - handles single or multiple images with automatic alignment""" + assessment.append("High noise in depth estimation") + + # Check manifold properties + if metrics['is_edge_manifold'] and metrics['is_vertex_manifold']: + assessment.append("Excellent topology - mesh is well-formed") + elif metrics['is_vertex_manifold']: + assessment.append("Good local topology but has some edge issues") + else: + assessment.append("Topology issues present - may need cleanup") + + # Check watertight + if metrics['is_watertight']: + assessment.append("Watertight mesh - ready for 3D printing!") + else: + assessment.append("Not watertight - use MeshLab's 'Close Holes' for 3D printing") + + # Check complexity + if metrics['triangles'] > 1000000: + assessment.append("Very detailed mesh - may be slow in some software") + elif metrics['triangles'] > 500000: + assessment.append("High detail mesh - good quality") + else: + assessment.append("Moderate detail - good balance of quality and performance") + + return "\n".join(f"- {item}" for item in assessment) if images is None or len(images) == 0: - return None, None, None, "Please upload at least one image.", None + return None, None, None, "Please upload at least one image.", None, None + + # Handle single image case + if not isinstance(images, list): + images = [images] try: - # Handle single image vs multiple images - if not isinstance(images, list): - images = [images] - num_images = len(images) - print(f"\n{'#'*60}") - print(f"Starting reconstruction with {num_images} image(s)") - print(f"Model: {model_choice}") - print(f"Automatic Alignment: {'Enabled' if enable_alignment and num_images > 1 else 'Disabled'}") - print(f"{'#'*60}\n") + print(f"Starting reconstruction with {num_images} image(s) using {model_choice}...") - # Process each image - results = [] - for idx, img in enumerate(images): - result = process_single_image(img, model_choice, idx, num_images) - results.append(result) + # Privacy checks if enabled + privacy_warnings = [] + if enable_privacy_check: + for idx, img in enumerate(images): + warnings = check_image_privacy(img) + if warnings: + privacy_warnings.extend([f"Image {idx+1}: {w}" for w in warnings]) - # AUTOMATIC ALIGNMENT for multiple images - aligned_pcds = None - merged_pcd = None - merged_mesh = None - alignment_info = "" + privacy_report = "" + if privacy_warnings: + privacy_report = "### 🔒 Privacy Considerations\n\n" + "\n".join(privacy_warnings) + "\n\n" - if num_images > 1 and enable_alignment: - try: - # Extract point clouds - point_clouds = [r['point_cloud'] for r in results] - - # Align them - aligned_pcds, transformations = align_point_clouds(point_clouds) - - # Merge into single point cloud - merged_pcd = merge_point_clouds(aligned_pcds) - - # Create unified mesh - merged_mesh = create_mesh_from_merged_pointcloud(merged_pcd) - - alignment_info = f""" -### ✨ Automatic Alignment Results - -Successfully aligned and merged {num_images} point clouds! - -**Alignment Quality:** -""" - for i, trans in enumerate(transformations): - translation = np.linalg.norm(trans[:3, 3]) - alignment_info += f"- Image {i+2} → Image 1: Translation distance = {translation:.3f} units\n" - - alignment_info += f""" -**Merged Model Statistics:** -- Total Points: {len(merged_pcd.points):,} -- Mesh Vertices: {len(merged_mesh.vertices):,} -- Mesh Triangles: {len(merged_mesh.triangles):,} -- Watertight: {'✓ Yes' if merged_mesh.is_watertight() else '✗ No (may need repair)'} - -*The merged model provides a complete 360° reconstruction!* -""" - except Exception as e: - print(f"Alignment failed: {e}") - import traceback - traceback.print_exc() - alignment_info = f""" -### ⚠️ Automatic Alignment Failed - -Error: {str(e)} - -**Fallback:** Individual models exported separately. You can try manual alignment in CloudCompare/MeshLab. - -**Common causes:** -- Insufficient overlap between images -- Very different viewpoints -- Lack of distinctive features -- Reflective/transparent surfaces -""" - - # Create combined visualizations - print("\n" + "="*60) - print("Creating visualizations...") - print("="*60) + # Process each image + all_point_clouds = [] + all_colors = [] + depth_visualizations = [] + depth_stats_list = [] + total_depth_time = 0 - # 1. DEPTH MAP COMPARISON (for first image or grid for multiple) - if num_images == 1: - # Single image visualization - result = results[0] - fig, ax = plt.subplots(1, 3, figsize=(18, 6)) - - ax[0].imshow(result['image']) - ax[0].set_title('Original Image', fontsize=14, fontweight='bold') + for idx, image in enumerate(images): + print(f"\n=== Processing Image {idx+1}/{num_images} ===") + + # STEP 1: Preprocess image + print(f"Image {idx+1}: Preprocessing...") + new_height = 480 if image.height > 480 else image.height + new_height -= (new_height % 32) + new_width = int(new_height * image.width / image.height) + diff = new_width % 32 + new_width = new_width - diff if diff < 16 else new_width + (32 - diff) + new_size = (new_width, new_height) + image = image.resize(new_size, Image.LANCZOS) + print(f"Image {idx+1} resized to: {new_size}") + + # STEP 2: Depth estimation + print(f"Image {idx+1}: Estimating depth...") + start_time = time.time() + predicted_depth = estimate_depth_for_image(image, model_choice) + depth_time = time.time() - start_time + total_depth_time += depth_time + print(f"Image {idx+1}: Depth estimation completed in {depth_time:.2f}s") + + # Process depth output + pad = 16 + output = predicted_depth.squeeze().cpu().numpy() * 1000.0 + output = output[pad:-pad, pad:-pad] + image_cropped = image.crop((pad, pad, image.width - pad, image.height - pad)) + + # Ensure depth and image have same dimensions + depth_height, depth_width = output.shape + img_width, img_height = image_cropped.size + + if depth_height != img_height or depth_width != img_width: + from scipy import ndimage + zoom_factors = (img_height / depth_height, img_width / depth_width) + output = ndimage.zoom(output, zoom_factors, order=1) + + image = image_cropped + + # Store depth statistics for explainability + depth_stats = { + 'min': float(np.min(output)), + 'max': float(np.max(output)), + 'mean': float(np.mean(output)), + 'std': float(np.std(output)) + } + depth_stats_list.append(depth_stats) + + # Create depth visualization + fig, ax = plt.subplots(1, 2, figsize=(14, 7)) + ax[0].imshow(image) + ax[0].set_title(f'Image {idx+1}: Original', fontsize=14, fontweight='bold') ax[0].axis('off') - im1 = ax[1].imshow(result['depth_map'], cmap='plasma') - ax[1].set_title('Depth Map', fontsize=14, fontweight='bold') + im = ax[1].imshow(output, cmap='plasma') + ax[1].set_title(f'Image {idx+1}: Depth Map', fontsize=14, fontweight='bold') ax[1].axis('off') - plt.colorbar(im1, ax=ax[1], fraction=0.046, pad=0.04) + plt.colorbar(im, ax=ax[1], fraction=0.046, pad=0.04) + plt.tight_layout() - im2 = ax[2].imshow(result['uncertainty_map'], cmap='Reds') - ax[2].set_title('Uncertainty Map (Red = Less Confident)', fontsize=14, fontweight='bold') - ax[2].axis('off') - plt.colorbar(im2, ax=ax[2], fraction=0.046, pad=0.04) + buf = io.BytesIO() + plt.savefig(buf, format='png', dpi=150, bbox_inches='tight') + buf.seek(0) + depth_viz = Image.open(buf) + depth_visualizations.append(depth_viz) + plt.close() + + # STEP 4: Create point cloud for this image + print(f"Image {idx+1}: Generating point cloud...") + width, height = image.size + + if output.shape != (height, width): + from scipy import ndimage + zoom_factors = (height / output.shape[0], width / output.shape[1]) + output = ndimage.zoom(output, zoom_factors, order=1) + + depth_image = (output * 255 / np.max(output)).astype(np.uint8) + image_array = np.array(image) + + depth_o3d = o3d.geometry.Image(depth_image) + image_o3d = o3d.geometry.Image(image_array) + rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth( + image_o3d, depth_o3d, convert_rgb_to_intensity=False + ) - plt.tight_layout() + camera_intrinsic = o3d.camera.PinholeCameraIntrinsic() + camera_intrinsic.set_intrinsics(width, height, 500, 500, width/2, height/2) + + pcd_temp = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic) + + # Store points and colors for merging + all_point_clouds.append(np.asarray(pcd_temp.points)) + all_colors.append(np.asarray(pcd_temp.colors)) + + print(f"Image {idx+1}: Generated {len(pcd_temp.points)} points") + + # Combine depth visualizations + if len(depth_visualizations) == 1: + combined_depth_viz = depth_visualizations[0] else: - # Multiple images - create grid - rows = (num_images + 1) // 2 - fig, axes = plt.subplots(rows, 6, figsize=(24, 4*rows)) + # Create a grid of depth visualizations + cols = min(2, len(depth_visualizations)) + rows = (len(depth_visualizations) + cols - 1) // cols + + fig, axes = plt.subplots(rows, cols, figsize=(14 * cols, 7 * rows)) if rows == 1: - axes = axes.reshape(1, -1) + axes = [axes] if cols == 1 else axes + else: + axes = axes.flatten() - for idx, result in enumerate(results): - row = idx // 2 - col = (idx % 2) * 3 - - axes[row, col].imshow(result['image']) - axes[row, col].set_title(f'Image {idx+1}', fontsize=12, fontweight='bold') - axes[row, col].axis('off') - - im1 = axes[row, col+1].imshow(result['depth_map'], cmap='plasma') - axes[row, col+1].set_title(f'Depth {idx+1}', fontsize=12, fontweight='bold') - axes[row, col+1].axis('off') - - im2 = axes[row, col+2].imshow(result['uncertainty_map'], cmap='Reds') - axes[row, col+2].set_title(f'Uncertainty {idx+1}', fontsize=12, fontweight='bold') - axes[row, col+2].axis('off') + for idx, depth_viz in enumerate(depth_visualizations): + axes[idx].imshow(depth_viz) + axes[idx].axis('off') + axes[idx].set_title(f'Image {idx+1}', fontsize=16, fontweight='bold') # Hide unused subplots - for idx in range(num_images, rows * 2): - row = idx // 2 - for col in range(3): - axes[row, (idx % 2) * 3 + col].axis('off') + for idx in range(len(depth_visualizations), len(axes)): + axes[idx].axis('off') plt.tight_layout() + buf = io.BytesIO() + plt.savefig(buf, format='png', dpi=150, bbox_inches='tight') + buf.seek(0) + combined_depth_viz = Image.open(buf) + plt.close() - buf = io.BytesIO() - plt.savefig(buf, format='png', dpi=150, bbox_inches='tight') - buf.seek(0) - depth_viz = Image.open(buf) - plt.close() + # STEP 5: Merge point clouds if multiple images + print(f"\nMerging {num_images} point cloud(s)...") + if num_images > 1: + merged_points, merged_colors = merge_point_clouds(all_point_clouds, all_colors) + else: + merged_points = all_point_clouds[0] + merged_colors = all_colors[0] - # 2. 3D VISUALIZATION - print("Creating 3D visualization...") + # Create combined point cloud + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(merged_points) + pcd.colors = o3d.utility.Vector3dVector(merged_colors) - if num_images == 1: - # Single visualization - result = results[0] - points = np.asarray(result['point_cloud'].points) - colors = np.asarray(result['point_cloud'].colors) - mesh = result['mesh'] - - if visualization_type == "point_cloud": - scatter = go.Scatter3d( - x=points[:, 0], y=points[:, 1], z=points[:, 2], - mode='markers', - marker=dict( - size=2, - color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) - for r, g, b in colors], - ), - name='Point Cloud' - ) - - plotly_fig = go.Figure(data=[scatter]) - plotly_fig.update_layout( - scene=dict( - xaxis=dict(visible=False), - yaxis=dict(visible=False), - zaxis=dict(visible=False), - aspectmode='data', - camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) - ), - margin=dict(l=0, r=0, t=30, b=0), - height=700, - title="Point Cloud" - ) - - elif visualization_type == "mesh": - vertices = np.asarray(mesh.vertices) - triangles = np.asarray(mesh.triangles) - - if mesh.has_vertex_colors(): - vertex_colors = np.asarray(mesh.vertex_colors) - colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) - for r, g, b in vertex_colors] - - mesh_trace = go.Mesh3d( - x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], - i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], - vertexcolor=colors_rgb, - opacity=0.95, - name='Mesh' - ) - else: - mesh_trace = go.Mesh3d( - x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], - i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], - color='lightblue', - opacity=0.9, - name='Mesh' - ) - - plotly_fig = go.Figure(data=[mesh_trace]) - plotly_fig.update_layout( - scene=dict( - xaxis=dict(visible=False), - yaxis=dict(visible=False), - zaxis=dict(visible=False), - aspectmode='data', - camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) - ), - margin=dict(l=0, r=0, t=30, b=0), - height=700, - title="3D Mesh" - ) - - else: # both - from plotly.subplots import make_subplots - + initial_points = len(pcd.points) + print(f"Combined point cloud: {initial_points} points") + + # STEP 6: Clean point cloud + print("Cleaning combined point cloud...") + cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0) + pcd = pcd.select_by_index(ind) + outliers_removed = initial_points - len(pcd.points) + print(f"Removed {outliers_removed} outliers") + + # STEP 7: Estimate normals + print("Estimating normals...") + pcd.estimate_normals() + pcd.orient_normals_to_align_with_direction() + + # STEP 8: Create mesh + print("Creating mesh...") + mesh_start = time.time() + mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson( + pcd, depth=10, n_threads=1 + )[0] + + # Transfer colors from point cloud to mesh vertices + print("Transferring colors to mesh...") + pcd_tree = o3d.geometry.KDTreeFlann(pcd) + mesh_colors = [] + for vertex in mesh.vertices: + [_, idx, _] = pcd_tree.search_knn_vector_3d(vertex, 1) + mesh_colors.append(pcd.colors[idx[0]]) + mesh.vertex_colors = o3d.utility.Vector3dVector(np.array(mesh_colors)) + + # Rotate mesh + rotation = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0)) + mesh.rotate(rotation, center=(0, 0, 0)) + mesh_time = time.time() - mesh_start + print(f"Mesh created in {mesh_time:.2f}s") + + # STEP 9: Compute quality metrics + print("Computing metrics...") + mesh.compute_vertex_normals() + + metrics = { + 'model_used': model_choice, + 'num_images': num_images, + 'depth_estimation_time': f"{total_depth_time:.2f}s", + 'mesh_reconstruction_time': f"{mesh_time:.2f}s", + 'total_time': f"{total_depth_time + mesh_time:.2f}s", + 'initial_points': initial_points, + 'outliers_removed': outliers_removed, + 'final_points': len(pcd.points), + 'vertices': len(mesh.vertices), + 'triangles': len(mesh.triangles), + 'is_edge_manifold': mesh.is_edge_manifold(), + 'is_vertex_manifold': mesh.is_vertex_manifold(), + 'is_watertight': mesh.is_watertight(), + } + + # Compute surface area + surface_area_computed = False + try: + surface_area = mesh.get_surface_area() + if surface_area > 0: + metrics['surface_area'] = float(surface_area) + surface_area_computed = True + except: + pass + + if not surface_area_computed: + try: vertices = np.asarray(mesh.vertices) triangles = np.asarray(mesh.triangles) - - scatter = go.Scatter3d( - x=points[:, 0], y=points[:, 1], z=points[:, 2], - mode='markers', - marker=dict( - size=2, - color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) - for r, g, b in colors], - ), - name='Point Cloud' - ) - - if mesh.has_vertex_colors(): - vertex_colors = np.asarray(mesh.vertex_colors) - colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) - for r, g, b in vertex_colors] - - mesh_trace = go.Mesh3d( - x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], - i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], - vertexcolor=colors_rgb, - opacity=0.95, - name='Mesh' - ) - else: - mesh_trace = go.Mesh3d( - x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], - i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], - color='lightblue', - opacity=0.9, - name='Mesh' - ) - - plotly_fig = make_subplots( - rows=1, cols=2, - specs=[[{'type': 'scatter3d'}, {'type': 'scatter3d'}]], - subplot_titles=('Point Cloud', '3D Mesh') - ) - - plotly_fig.add_trace(scatter, row=1, col=1) - plotly_fig.add_trace(mesh_trace, row=1, col=2) - - plotly_fig.update_layout( - scene=dict( - xaxis=dict(visible=False), - yaxis=dict(visible=False), - zaxis=dict(visible=False), - aspectmode='data', - camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) - ), - scene2=dict( - xaxis=dict(visible=False), - yaxis=dict(visible=False), - zaxis=dict(visible=False), - aspectmode='data', - camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) - ), - height=600, - showlegend=False, - margin=dict(l=0, r=0, t=50, b=0) - ) + v0 = vertices[triangles[:, 0]] + v1 = vertices[triangles[:, 1]] + v2 = vertices[triangles[:, 2]] + cross = np.cross(v1 - v0, v2 - v0) + areas = 0.5 * np.linalg.norm(cross, axis=1) + total_area = np.sum(areas) + metrics['surface_area'] = float(total_area) + surface_area_computed = True + except: + metrics['surface_area'] = "Unable to compute" - else: - # Multiple images - show all reconstructions - traces = [] + # Compute volume + try: + if mesh.is_watertight(): + volume = mesh.get_volume() + metrics['volume'] = float(volume) + else: + metrics['volume'] = None + except: + metrics['volume'] = None + + print("Metrics computed!") + + # STEP 10: Create 3D visualization + print("Creating 3D visualization...") + points = np.asarray(pcd.points) + colors = np.asarray(pcd.colors) + + if visualization_type == "point_cloud": + scatter = go.Scatter3d( + x=points[:, 0], y=points[:, 1], z=points[:, 2], + mode='markers', + marker=dict( + size=2, + color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) + for r, g, b in colors], + ), + name='Point Cloud' + ) - if merged_pcd is not None and merged_mesh is not None: - # Show the merged result - points = np.asarray(merged_pcd.points) - colors = np.asarray(merged_pcd.colors) - - if visualization_type == "point_cloud" or visualization_type == "both": - scatter = go.Scatter3d( - x=points[:, 0], y=points[:, 1], z=points[:, 2], - mode='markers', - marker=dict( - size=1.5, - color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) - for r, g, b in colors], - ), - name='Merged Point Cloud' - ) - traces.append(scatter) - - if visualization_type == "mesh" or visualization_type == "both": - vertices = np.asarray(merged_mesh.vertices) - triangles = np.asarray(merged_mesh.triangles) - - if merged_mesh.has_vertex_colors(): - vertex_colors = np.asarray(merged_mesh.vertex_colors) - colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) - for r, g, b in vertex_colors] - - mesh_trace = go.Mesh3d( - x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], - i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], - vertexcolor=colors_rgb, - opacity=0.95, - name='Merged Mesh', - lighting=dict(ambient=0.5, diffuse=0.8, specular=0.2), - lightposition=dict(x=100, y=100, z=100) - ) - else: - mesh_trace = go.Mesh3d( - x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], - i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], - color='lightblue', - opacity=0.9, - name='Merged Mesh' - ) - traces.append(mesh_trace) + layout = go.Layout( + scene=dict( + xaxis=dict(visible=False), + yaxis=dict(visible=False), + zaxis=dict(visible=False), + aspectmode='data', + camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) + ), + margin=dict(l=0, r=0, t=30, b=0), + height=700, + title="Point Cloud" + ) + + plotly_fig = go.Figure(data=[scatter], layout=layout) + + elif visualization_type == "mesh": + vertices = np.asarray(mesh.vertices) + triangles = np.asarray(mesh.triangles) + + if mesh.has_vertex_colors(): + vertex_colors = np.asarray(mesh.vertex_colors) + colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) + for r, g, b in vertex_colors] - plotly_fig = go.Figure(data=traces) - plotly_fig.update_layout( - scene=dict( - xaxis=dict(visible=False), - yaxis=dict(visible=False), - zaxis=dict(visible=False), - aspectmode='data', - camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) - ), - margin=dict(l=0, r=0, t=30, b=0), - height=700, - title=f"Merged Reconstruction from {num_images} Images" + mesh_trace = go.Mesh3d( + x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], + i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], + vertexcolor=colors_rgb, + opacity=0.95, + name='Mesh', + lighting=dict(ambient=0.5, diffuse=0.8, specular=0.2), + lightposition=dict(x=100, y=100, z=100) ) else: - # Fallback: show individual reconstructions side by side - for idx, result in enumerate(results): - points = np.asarray(result['point_cloud'].points) - colors = np.asarray(result['point_cloud'].colors) - - # Offset each point cloud to separate them - offset = idx * 2 - points[:, 0] += offset - - if visualization_type == "point_cloud" or visualization_type == "both": - scatter = go.Scatter3d( - x=points[:, 0], y=points[:, 1], z=points[:, 2], - mode='markers', - marker=dict( - size=2, - color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) - for r, g, b in colors], - ), - name=f'Point Cloud {idx+1}' - ) - traces.append(scatter) - - if visualization_type == "mesh" or visualization_type == "both": - mesh = result['mesh'] - vertices = np.asarray(mesh.vertices) - vertices[:, 0] += offset # Apply same offset - triangles = np.asarray(mesh.triangles) - - if mesh.has_vertex_colors(): - vertex_colors = np.asarray(mesh.vertex_colors) - colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) - for r, g, b in vertex_colors] - - mesh_trace = go.Mesh3d( - x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], - i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], - vertexcolor=colors_rgb, - opacity=0.95, - name=f'Mesh {idx+1}' - ) - else: - mesh_trace = go.Mesh3d( - x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], - i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], - color='lightblue', - opacity=0.9, - name=f'Mesh {idx+1}' - ) - traces.append(mesh_trace) - - plotly_fig = go.Figure(data=traces) - plotly_fig.update_layout( - scene=dict( - xaxis=dict(visible=False), - yaxis=dict(visible=False), - zaxis=dict(visible=False), - aspectmode='data', - camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) - ), - margin=dict(l=0, r=0, t=30, b=0), - height=700, - title=f"Individual Reconstructions (Side by Side)" + mesh_trace = go.Mesh3d( + x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], + i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], + color='lightblue', + opacity=0.9, + name='Mesh' ) - - # 3. EXPORT FILES - print("Exporting files...") - temp_dir = tempfile.mkdtemp() - - all_metrics = [] - for idx, result in enumerate(results): - prefix = f"image_{idx+1}_" if num_images > 1 else "" - # Save point cloud - pcd_path = Path(temp_dir) / f"{prefix}point_cloud.ply" - o3d.io.write_point_cloud(str(pcd_path), result['point_cloud']) + layout = go.Layout( + scene=dict( + xaxis=dict(visible=False), + yaxis=dict(visible=False), + zaxis=dict(visible=False), + aspectmode='data', + camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) + ), + margin=dict(l=0, r=0, t=30, b=0), + height=700, + title="3D Mesh" + ) - # Save mesh - mesh_path = Path(temp_dir) / f"{prefix}mesh.ply" - o3d.io.write_triangle_mesh(str(mesh_path), result['mesh']) + plotly_fig = go.Figure(data=[mesh_trace], layout=layout) - mesh_obj_path = Path(temp_dir) / f"{prefix}mesh.obj" - o3d.io.write_triangle_mesh(str(mesh_obj_path), result['mesh']) + else: # both + from plotly.subplots import make_subplots - mesh_stl_path = Path(temp_dir) / f"{prefix}mesh.stl" - o3d.io.write_triangle_mesh(str(mesh_stl_path), result['mesh']) + vertices = np.asarray(mesh.vertices) + triangles = np.asarray(mesh.triangles) - all_metrics.append(result['metrics']) - - # Save merged results if available - if merged_pcd is not None and merged_mesh is not None: - merged_pcd_path = Path(temp_dir) / "MERGED_point_cloud.ply" - o3d.io.write_point_cloud(str(merged_pcd_path), merged_pcd) + scatter = go.Scatter3d( + x=points[:, 0], y=points[:, 1], z=points[:, 2], + mode='markers', + marker=dict( + size=2, + color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) + for r, g, b in colors], + ), + name='Point Cloud' + ) - merged_mesh_path = Path(temp_dir) / "MERGED_mesh.ply" - o3d.io.write_triangle_mesh(str(merged_mesh_path), merged_mesh) + if mesh.has_vertex_colors(): + vertex_colors = np.asarray(mesh.vertex_colors) + colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255)) + for r, g, b in vertex_colors] + + mesh_trace = go.Mesh3d( + x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], + i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], + vertexcolor=colors_rgb, + opacity=0.95, + name='Mesh', + lighting=dict(ambient=0.5, diffuse=0.8, specular=0.2), + lightposition=dict(x=100, y=100, z=100) + ) + else: + mesh_trace = go.Mesh3d( + x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2], + i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2], + color='lightblue', + opacity=0.9, + name='Mesh' + ) - merged_obj_path = Path(temp_dir) / "MERGED_mesh.obj" - o3d.io.write_triangle_mesh(str(merged_obj_path), merged_mesh) + plotly_fig = make_subplots( + rows=1, cols=2, + specs=[[{'type': 'scatter3d'}, {'type': 'scatter3d'}]], + subplot_titles=('Point Cloud', '3D Mesh'), + horizontal_spacing=0.05 + ) - merged_stl_path = Path(temp_dir) / "MERGED_mesh.stl" - o3d.io.write_triangle_mesh(str(merged_stl_path), merged_mesh) + plotly_fig.add_trace(scatter, row=1, col=1) + plotly_fig.add_trace(mesh_trace, row=1, col=2) + + plotly_fig.update_layout( + scene=dict( + xaxis=dict(visible=False), + yaxis=dict(visible=False), + zaxis=dict(visible=False), + aspectmode='data', + camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) + ), + scene2=dict( + xaxis=dict(visible=False), + yaxis=dict(visible=False), + zaxis=dict(visible=False), + aspectmode='data', + camera=dict(eye=dict(x=1.5, y=1.5, z=1.5)) + ), + height=600, + showlegend=False, + margin=dict(l=0, r=0, t=50, b=0) + ) - # Save combined metrics - combined_metrics = { - 'total_images': num_images, - 'processing_date': datetime.now().isoformat(), - 'model_used': model_choice, - 'alignment_enabled': enable_alignment and num_images > 1, - 'alignment_successful': merged_pcd is not None, - 'individual_results': all_metrics - } + print("3D visualization created!") - if merged_mesh is not None: - combined_metrics['merged_stats'] = { - 'points': len(merged_pcd.points), - 'vertices': len(merged_mesh.vertices), - 'triangles': len(merged_mesh.triangles), - 'is_watertight': merged_mesh.is_watertight() - } + # STEP 11: Export files + print("Exporting files...") + temp_dir = tempfile.mkdtemp() + + # Save point cloud + pcd_path = Path(temp_dir) / "point_cloud.ply" + o3d.io.write_point_cloud(str(pcd_path), pcd) + + # Save mesh + mesh_path = Path(temp_dir) / "mesh.ply" + o3d.io.write_triangle_mesh(str(mesh_path), mesh) + + # Save mesh as OBJ + mesh_obj_path = Path(temp_dir) / "mesh.obj" + o3d.io.write_triangle_mesh(str(mesh_obj_path), mesh) + + # Save mesh as STL + mesh_stl_path = Path(temp_dir) / "mesh.stl" + o3d.io.write_triangle_mesh(str(mesh_stl_path), mesh) + # Save metrics metrics_path = Path(temp_dir) / "metrics.json" with open(metrics_path, 'w') as f: - json.dump(combined_metrics, f, indent=2, default=str) + json.dump(metrics, f, indent=2, default=str) # Create zip - zip_filename = f"reconstruction_{num_images}_images.zip" if num_images > 1 else "reconstruction_complete.zip" - zip_path = Path(temp_dir) / zip_filename + zip_path = Path(temp_dir) / "reconstruction_complete.zip" with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: - for file in Path(temp_dir).glob("*"): - if file.suffix != '.zip': - zipf.write(file, file.name) + zipf.write(pcd_path, pcd_path.name) + zipf.write(mesh_path, mesh_path.name) + zipf.write(mesh_obj_path, mesh_obj_path.name) + zipf.write(mesh_stl_path, mesh_stl_path.name) + zipf.write(metrics_path, metrics_path.name) print("Files exported!") - # 4. CREATE REPORT - if num_images == 1: - result = results[0] - metrics = result['metrics'] - warnings = result['warnings'] - - warnings_section = "" - if warnings: - warnings_section = "### ⚠️ Detected Challenging Conditions\n" + "\n".join(warnings) + "\n\n" - - report = f""" + # Create metrics report + assessment = _generate_quality_assessment(metrics) + + # Generate explainability report + avg_depth_stats = { + 'min': np.mean([d['min'] for d in depth_stats_list]), + 'max': np.mean([d['max'] for d in depth_stats_list]), + 'mean': np.mean([d['mean'] for d in depth_stats_list]), + 'std': np.mean([d['std'] for d in depth_stats_list]) + } + explainability = generate_explainability_report(metrics, avg_depth_stats) + + multi_image_note = "" + if num_images > 1: + multi_image_note = f""" +### 📸 Multi-Image Reconstruction +- **Number of Images**: {num_images} +- **Combined Points**: {initial_points:,} (before cleaning) +- **Advantage**: Better coverage and reduced occlusion compared to single image +- **Note**: Images were combined using simple spatial offset. For production use, consider advanced registration algorithms (ICP, feature matching). +""" + + report = f""" ## Reconstruction Complete! -{warnings_section} +{privacy_report} + +{multi_image_note} ### Performance Metrics - **Model Used**: {metrics['model_used']} +- **Number of Images**: {metrics['num_images']} - **Depth Estimation Time**: {metrics['depth_estimation_time']} - **Mesh Reconstruction Time**: {metrics['mesh_reconstruction_time']} - **Total Processing Time**: {metrics['total_time']} @@ -1126,10 +817,10 @@ Error: {str(e)} - **Surface Area**: {metrics['surface_area'] if isinstance(metrics['surface_area'], str) else f"{metrics['surface_area']:.2f}"} - **Volume**: {f"{metrics['volume']:.2f}" if metrics.get('volume') else 'N/A (not watertight)'} -### Explainability Metrics -- **Average Uncertainty**: {metrics['avg_uncertainty']:.3f} (lower is better) - - Uncertainty shows where the model is less confident - - Check the red heatmap for spatial distribution of uncertainty +### Quality Assessment +{assessment} + +{explainability} ### Files Exported - Point Cloud: PLY format @@ -1137,80 +828,16 @@ Error: {str(e)} - Quality Metrics: JSON **Download the complete package below!** - """ - else: - # Multiple images report - total_time = sum(float(r['metrics']['total_time'].replace('s', '')) for r in results) - total_points = sum(r['metrics']['final_points'] for r in results) - total_vertices = sum(r['metrics']['vertices'] for r in results) - - all_warnings = [] - for idx, result in enumerate(results): - if result['warnings']: - all_warnings.append(f"\n**Image {idx+1}:**\n" + "\n".join(result['warnings'])) - - warnings_section = "" - if all_warnings: - warnings_section = "### ⚠️ Detected Challenging Conditions\n" + "\n".join(all_warnings) + "\n\n" - - report = f""" -## Multi-Image Reconstruction Complete! - -Processed {num_images} images successfully. - -{alignment_info} - -{warnings_section} - -### Overall Statistics -- **Total Processing Time**: {total_time:.2f}s -- **Total Final Points** (individual): {total_points:,} -- **Total Vertices** (individual): {total_vertices:,} -- **Model Used**: {model_choice} - -### Individual Image Results - -""" - for idx, result in enumerate(results): - m = result['metrics'] - report += f""" -#### Image {idx+1} -- Points: {m['final_points']:,} -- Vertices: {m['vertices']:,} -- Triangles: {m['triangles']:,} -- Watertight: {'✓' if m['is_watertight'] else '✗'} -- Time: {m['total_time']} -- Avg Uncertainty: {m['avg_uncertainty']:.3f} - -""" - - report += f""" -### Files Exported -- {num_images} Individual Point Clouds (PLY format) -- {num_images} Individual Meshes (PLY, OBJ, STL formats)""" - - if merged_pcd is not None: - report += """ -- **MERGED_point_cloud.ply** - Unified aligned point cloud ⭐ -- **MERGED_mesh.ply/obj/stl** - Unified aligned mesh ⭐""" - - report += """ -- Combined Metrics (JSON) - -**Download the complete package below!** - """ - - # Create JSON output - json_output = json.dumps(combined_metrics, indent=2, default=str) + """ print("SUCCESS! Returning results...") - return depth_viz, plotly_fig, str(zip_path), report, json_output + return combined_depth_viz, plotly_fig, str(zip_path), report, json.dumps(metrics, indent=2, default=str), privacy_report except Exception as e: import traceback error_msg = f"Error during reconstruction:\n{str(e)}\n\nTraceback:\n{traceback.format_exc()}" print(error_msg) - return None, None, None, error_msg, None + return None, None, None, error_msg, None, None # ============================================================================ # GRADIO INTERFACE @@ -1219,22 +846,16 @@ Processed {num_images} images successfully. with gr.Blocks(title="Advanced 3D Reconstruction", theme=gr.themes.Soft()) as demo: gr.Markdown(""" - # 🏗️ 3D Urban Reconstruction from Images + # 🗿️ 3D Urban Reconstruction from Single or Multiple Images - Transform 2D photographs into 3D spatial models with Responsible AI features + Transform 2D photographs into 3D spatial models with **Responsible AI** practices - **NEW:** Multi-image support! Upload 1-8 images for more complete reconstructions. - """) + Upload one or multiple photographs to generate interactive 3D models with exportable spatial data. - # Responsible AI Warning Banner - gr.Markdown(""" -