Spaces:
Running
on
L40S
Running
on
L40S
Delete folder ./third_party/weights with huggingface_hub
Browse files
third_party/weights/DUSt3R_ViTLarge_BaseDecoder_512_dpt/README.md
DELETED
|
@@ -1,117 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
tags:
|
| 3 |
-
- vision
|
| 4 |
-
---
|
| 5 |
-
|
| 6 |
-
## DUSt3R
|
| 7 |
-
|
| 8 |
-
# Model info
|
| 9 |
-
|
| 10 |
-
Project page: https://dust3r.europe.naverlabs.com/
|
| 11 |
-
|
| 12 |
-
# How to use
|
| 13 |
-
|
| 14 |
-
Here's how to load the model (after [installing](https://github.com/naver/dust3r?tab=readme-ov-file#installation) the dust3r package):
|
| 15 |
-
|
| 16 |
-
```python
|
| 17 |
-
from dust3r.model import AsymmetricCroCo3DStereo
|
| 18 |
-
import torch
|
| 19 |
-
|
| 20 |
-
model = AsymmetricCroCo3DStereo.from_pretrained("nielsr/DUSt3R_ViTLarge_BaseDecoder_512_dpt")
|
| 21 |
-
|
| 22 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 23 |
-
model.to(device)
|
| 24 |
-
```
|
| 25 |
-
|
| 26 |
-
Next, one can run inference as follows:
|
| 27 |
-
|
| 28 |
-
```
|
| 29 |
-
from dust3r.inference import inference
|
| 30 |
-
from dust3r.utils.image import load_images
|
| 31 |
-
from dust3r.image_pairs import make_pairs
|
| 32 |
-
from dust3r.cloud_opt import global_aligner, GlobalAlignerMode
|
| 33 |
-
|
| 34 |
-
if __name__ == '__main__':
|
| 35 |
-
batch_size = 1
|
| 36 |
-
schedule = 'cosine'
|
| 37 |
-
lr = 0.01
|
| 38 |
-
niter = 300
|
| 39 |
-
|
| 40 |
-
# load_images can take a list of images or a directory
|
| 41 |
-
images = load_images(['croco/assets/Chateau1.png', 'croco/assets/Chateau2.png'], size=512)
|
| 42 |
-
pairs = make_pairs(images, scene_graph='complete', prefilter=None, symmetrize=True)
|
| 43 |
-
output = inference(pairs, model, device, batch_size=batch_size)
|
| 44 |
-
|
| 45 |
-
# at this stage, you have the raw dust3r predictions
|
| 46 |
-
view1, pred1 = output['view1'], output['pred1']
|
| 47 |
-
view2, pred2 = output['view2'], output['pred2']
|
| 48 |
-
# here, view1, pred1, view2, pred2 are dicts of lists of len(2)
|
| 49 |
-
# -> because we symmetrize we have (im1, im2) and (im2, im1) pairs
|
| 50 |
-
# in each view you have:
|
| 51 |
-
# an integer image identifier: view1['idx'] and view2['idx']
|
| 52 |
-
# the img: view1['img'] and view2['img']
|
| 53 |
-
# the image shape: view1['true_shape'] and view2['true_shape']
|
| 54 |
-
# an instance string output by the dataloader: view1['instance'] and view2['instance']
|
| 55 |
-
# pred1 and pred2 contains the confidence values: pred1['conf'] and pred2['conf']
|
| 56 |
-
# pred1 contains 3D points for view1['img'] in view1['img'] space: pred1['pts3d']
|
| 57 |
-
# pred2 contains 3D points for view2['img'] in view1['img'] space: pred2['pts3d_in_other_view']
|
| 58 |
-
|
| 59 |
-
# next we'll use the global_aligner to align the predictions
|
| 60 |
-
# depending on your task, you may be fine with the raw output and not need it
|
| 61 |
-
# with only two input images, you could use GlobalAlignerMode.PairViewer: it would just convert the output
|
| 62 |
-
# if using GlobalAlignerMode.PairViewer, no need to run compute_global_alignment
|
| 63 |
-
scene = global_aligner(output, device=device, mode=GlobalAlignerMode.PointCloudOptimizer)
|
| 64 |
-
loss = scene.compute_global_alignment(init="mst", niter=niter, schedule=schedule, lr=lr)
|
| 65 |
-
|
| 66 |
-
# retrieve useful values from scene:
|
| 67 |
-
imgs = scene.imgs
|
| 68 |
-
focals = scene.get_focals()
|
| 69 |
-
poses = scene.get_im_poses()
|
| 70 |
-
pts3d = scene.get_pts3d()
|
| 71 |
-
confidence_masks = scene.get_masks()
|
| 72 |
-
|
| 73 |
-
# visualize reconstruction
|
| 74 |
-
scene.show()
|
| 75 |
-
|
| 76 |
-
# find 2D-2D matches between the two images
|
| 77 |
-
from dust3r.utils.geometry import find_reciprocal_matches, xy_grid
|
| 78 |
-
pts2d_list, pts3d_list = [], []
|
| 79 |
-
for i in range(2):
|
| 80 |
-
conf_i = confidence_masks[i].cpu().numpy()
|
| 81 |
-
pts2d_list.append(xy_grid(*imgs[i].shape[:2][::-1])[conf_i]) # imgs[i].shape[:2] = (H, W)
|
| 82 |
-
pts3d_list.append(pts3d[i].detach().cpu().numpy()[conf_i])
|
| 83 |
-
reciprocal_in_P2, nn2_in_P1, num_matches = find_reciprocal_matches(*pts3d_list)
|
| 84 |
-
print(f'found {num_matches} matches')
|
| 85 |
-
matches_im1 = pts2d_list[1][reciprocal_in_P2]
|
| 86 |
-
matches_im0 = pts2d_list[0][nn2_in_P1][reciprocal_in_P2]
|
| 87 |
-
|
| 88 |
-
# visualize a few matches
|
| 89 |
-
import numpy as np
|
| 90 |
-
from matplotlib import pyplot as pl
|
| 91 |
-
n_viz = 10
|
| 92 |
-
match_idx_to_viz = np.round(np.linspace(0, num_matches-1, n_viz)).astype(int)
|
| 93 |
-
viz_matches_im0, viz_matches_im1 = matches_im0[match_idx_to_viz], matches_im1[match_idx_to_viz]
|
| 94 |
-
|
| 95 |
-
H0, W0, H1, W1 = *imgs[0].shape[:2], *imgs[1].shape[:2]
|
| 96 |
-
img0 = np.pad(imgs[0], ((0, max(H1 - H0, 0)), (0, 0), (0, 0)), 'constant', constant_values=0)
|
| 97 |
-
img1 = np.pad(imgs[1], ((0, max(H0 - H1, 0)), (0, 0), (0, 0)), 'constant', constant_values=0)
|
| 98 |
-
img = np.concatenate((img0, img1), axis=1)
|
| 99 |
-
pl.figure()
|
| 100 |
-
pl.imshow(img)
|
| 101 |
-
cmap = pl.get_cmap('jet')
|
| 102 |
-
for i in range(n_viz):
|
| 103 |
-
(x0, y0), (x1, y1) = viz_matches_im0[i].T, viz_matches_im1[i].T
|
| 104 |
-
pl.plot([x0, x1 + W0], [y0, y1], '-+', color=cmap(i / (n_viz - 1)), scalex=False, scaley=False)
|
| 105 |
-
pl.show(block=True)
|
| 106 |
-
|
| 107 |
-
```
|
| 108 |
-
|
| 109 |
-
### BibTeX entry and citation info
|
| 110 |
-
|
| 111 |
-
```bibtex
|
| 112 |
-
@journal{dust3r2023,
|
| 113 |
-
title={{DUSt3R: Geometric 3D Vision Made Easy}},
|
| 114 |
-
author={{Wang, Shuzhe and Leroy, Vincent and Cabon, Yohann and Chidlovskii, Boris and Revaud Jerome}},
|
| 115 |
-
journal={arXiv preprint 2312.14132},
|
| 116 |
-
year={2023}}
|
| 117 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
third_party/weights/DUSt3R_ViTLarge_BaseDecoder_512_dpt/config.json
DELETED
|
@@ -1,28 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"output_mode": "pts3d",
|
| 3 |
-
"head_type": "dpt",
|
| 4 |
-
"depth_mode": [
|
| 5 |
-
"exp",
|
| 6 |
-
-Infinity,
|
| 7 |
-
Infinity
|
| 8 |
-
],
|
| 9 |
-
"conf_mode": [
|
| 10 |
-
"exp",
|
| 11 |
-
1,
|
| 12 |
-
Infinity
|
| 13 |
-
],
|
| 14 |
-
"freeze": "none",
|
| 15 |
-
"landscape_only": false,
|
| 16 |
-
"patch_embed_cls": "PatchEmbedDust3R",
|
| 17 |
-
"enc_depth": 24,
|
| 18 |
-
"dec_depth": 12,
|
| 19 |
-
"enc_embed_dim": 1024,
|
| 20 |
-
"dec_embed_dim": 768,
|
| 21 |
-
"enc_num_heads": 16,
|
| 22 |
-
"dec_num_heads": 12,
|
| 23 |
-
"pos_embed": "RoPE100",
|
| 24 |
-
"img_size": [
|
| 25 |
-
512,
|
| 26 |
-
512
|
| 27 |
-
]
|
| 28 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
third_party/weights/DUSt3R_ViTLarge_BaseDecoder_512_dpt/model.safetensors
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:cdbd4c6d7e91df3f3dc3551a0aadc7983bc85ed9e02794fba633eb1ed10174b5
|
| 3 |
-
size 2284790056
|
|
|
|
|
|
|
|
|
|
|
|