Spaces:
Running
on
Zero
Running
on
Zero
| import argparse | |
| import os | |
| import cv2 | |
| import json | |
| import torch | |
| from tqdm import tqdm | |
| import numpy as np | |
| from video_depth_anything.video_depth import VideoDepthAnything | |
| from utils.dc_utils import read_video_frames | |
| if __name__ == '__main__': | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('--infer_path', type=str, default='') | |
| parser.add_argument('--json_file', type=str, default='') | |
| parser.add_argument('--datasets', type=str, nargs='+', default=['scannet', 'nyuv2']) | |
| parser.add_argument('--input_size', type=int, default=518) | |
| parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitl']) | |
| args = parser.parse_args() | |
| for dataset in args.datasets: | |
| with open(args.json_file, 'r') as fs: | |
| path_json = json.load(fs) | |
| DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| model_configs = { | |
| 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, | |
| 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}, | |
| } | |
| video_depth_anything = VideoDepthAnything(**model_configs[args.encoder]) | |
| video_depth_anything.load_state_dict(torch.load(f'./checkpoints/video_depth_anything_{args.encoder}.pth', map_location='cpu'), strict=True) | |
| video_depth_anything = video_depth_anything.to(DEVICE).eval() | |
| json_data = path_json[dataset] | |
| root_path = os.path.dirname(args.json_file) | |
| for data in tqdm(json_data): | |
| for key in data.keys(): | |
| value = data[key] | |
| infer_paths = [] | |
| videos = [] | |
| for images in value: | |
| image_path = os.path.join(root_path, images['image']) | |
| infer_path = (args.infer_path + '/'+ dataset + '/' + images['image']).replace('.jpg', '.npy').replace('.png', '.npy') | |
| infer_paths.append(infer_path) | |
| img = cv2.imread(image_path) | |
| videos.append(img) | |
| videos = np.stack(videos, axis=0) | |
| target_fps=1 | |
| depths, fps = video_depth_anything.infer_video_depth(videos, target_fps, input_size=args.input_size, device=DEVICE, fp32=True) | |
| for i in range(len(infer_paths)): | |
| infer_path = infer_paths[i] | |
| os.makedirs(os.path.dirname(infer_path), exist_ok=True) | |
| depth = depths[i] | |
| np.save(infer_path, depth) | |