3D-World-Generator

Running on Zero

App Files Files Community

3D-World-Generator / src /models /utils /priors.py

ZhenweiWang

Upload folder using huggingface_hub

0ca05b5 verified 19 days ago

raw

history blame contribute delete

6.44 kB

	import torch


	def normalize_poses(extrinsics, padding=0.1, return_stats=False):
	"""
	Normalize camera positions to unit cube, processing each batch separately

	Args:
	extrinsics: Camera extrinsic matrices with shape (B, S, 3, 4)
	padding: Boundary space within [0,1] range to prevent values near boundaries
	return_stats: Whether to return normalization statistics

	Returns:
	normalized_extrinsics: Normalized extrinsic matrices
	(optional) stats: Dictionary containing scale and translation information
	"""
	B, S, _, _ = extrinsics.shape
	device = extrinsics.device

	# Check input validity and handle NaN/Inf values
	for i in range(B):
	if torch.isnan(extrinsics[i]).any() or torch.isinf(extrinsics[i]).any():
	print(f"Warning: dataset sample has NaN/Inf in extrinsics")
	extrinsics[i] = torch.nan_to_num(
	extrinsics[i], nan=0.0, posinf=1e6, neginf=-1e6
	)

	normalized_extrinsics = extrinsics.clone()

	# Store normalization parameters if needed
	if return_stats:
	stats = {
	'scale_factors': torch.zeros(B, device=device),
	'translation_vectors': torch.zeros(B, 3, device=device)
	}

	for b in range(B):
	# Extract camera positions for this batch
	positions = extrinsics[b, :, :3, 3] # (S, 3)

	# Filter valid positions to ignore outliers
	valid_mask = torch.isfinite(positions).all(dim=1) # (S,)

	if valid_mask.sum() == 0:
	# No valid positions, use default values
	print(f"Warning: Batch {b} has no valid camera positions")
	normalized_extrinsics[b, :, :3, 3] = 0.5 # Place at center
	if return_stats:
	stats['scale_factors'][b] = 1.0
	stats['translation_vectors'][b] = 0.0
	continue

	valid_positions = positions[valid_mask]

	# Calculate bounds using percentiles for robustness
	if valid_positions.shape[0] > 10:
	# Use 5% and 95% percentiles instead of min/max
	min_pos = torch.quantile(valid_positions, 0.05, dim=0)
	max_pos = torch.quantile(valid_positions, 0.95, dim=0)
	else:
	# Too few samples, use min/max
	min_pos = torch.min(valid_positions, dim=0)[0]
	max_pos = torch.max(valid_positions, dim=0)[0]

	# Calculate scale factor considering all dimensions
	pos_range = max_pos - min_pos

	# Add small epsilon to prevent dimension collapse
	eps = torch.maximum(
	torch.tensor(1e-6, device=device),
	torch.abs(max_pos) * 1e-6
	)
	pos_range = torch.maximum(pos_range, eps)

	# Use maximum range as scale factor for uniform scaling
	scale_factor = torch.max(pos_range)
	scale_factor = torch.clamp(scale_factor, min=1e-6, max=1e6)

	# Calculate center point for centering
	center = (min_pos + max_pos) / 2.0

	# Normalize: center first, then scale with padding
	actual_scale = scale_factor / (1 - 2 * padding)
	normalized_positions = (positions - center) / actual_scale + 0.5

	# Ensure all values are within valid range
	normalized_positions = torch.clamp(normalized_positions, 0.0, 1.0)

	# Handle invalid positions by setting them to scene center
	invalid_mask = ~torch.isfinite(positions).all(dim=1)
	if invalid_mask.any():
	normalized_positions[invalid_mask] = 0.5

	normalized_extrinsics[b, :, :3, 3] = normalized_positions

	if return_stats:
	stats['scale_factors'][b] = actual_scale
	stats['translation_vectors'][b] = center

	# Final validation
	assert torch.isfinite(normalized_extrinsics).all(), "Output contains non-finite values"

	if return_stats:
	return normalized_extrinsics, stats
	return normalized_extrinsics


	def normalize_depth(depth, eps=1e-6, min_percentile=1, max_percentile=99):
	"""
	Normalize depth values to [0, 1] range using percentile-based scaling.

	Args:
	depth: Input depth tensor with shape (B, S, H, W)
	eps: Small epsilon value to prevent division by zero
	min_percentile: Lower percentile for robust min calculation (default: 1)
	max_percentile: Upper percentile for robust max calculation (default: 99)

	Returns:
	normalized_depth: Depth tensor normalized to [0, 1] range with same shape (B, S, H, W)
	"""
	B, S, H, W = depth.shape
	depth = depth.flatten(0,1) # [B*S, H, W]

	# Handle invalid values
	depth = torch.nan_to_num(depth, nan=0.0, posinf=1e6, neginf=0.0)

	normalized_list = []
	for i in range(depth.shape[0]):
	depth_img = depth[i] # [H, W]
	depth_flat = depth_img.flatten()

	# Filter out zero values if needed
	non_zero_mask = depth_flat > 0
	if non_zero_mask.sum() > 0:
	values_to_use = depth_flat[non_zero_mask]
	else:
	values_to_use = depth_flat

	# Only calculate percentiles when there are enough values
	if values_to_use.numel() > 100: # Ensure enough samples for percentile calculation
	# Calculate min and max percentiles
	depth_min = torch.quantile(values_to_use, min_percentile/100.0)
	depth_max = torch.quantile(values_to_use, max_percentile/100.0)
	else:
	# If too few samples, use min/max values
	depth_min = values_to_use.min()
	depth_max = values_to_use.max()

	# Handle case where max equals min
	if depth_max == depth_min:
	depth_max = depth_min + 1.0

	# Use relative epsilon
	scale = torch.abs(depth_max - depth_min)
	eps_val = max(eps, scale.item() * eps)

	# Perform normalization
	depth_norm_img = (depth_img - depth_min) / (depth_max - depth_min + eps_val)

	# Ensure output is within [0,1] range
	depth_norm_img = torch.clamp(depth_norm_img, 0.0, 1.0)

	normalized_list.append(depth_norm_img)

	# Recombine all normalized images
	depth_norm = torch.stack(normalized_list)

	return depth_norm.reshape(B, S, H, W)