Spaces:

wlyu
/

FaceLift

Running on Zero

App Files Files Community

weijielyu commited on Oct 20

Commit

fa06469

1 Parent(s): 59ae2c2

Update demo

Browse files

Files changed (1) hide show

app.py +383 -1

app.py CHANGED Viewed

@@ -94,4 +94,386 @@ except ImportError:
         else:
             # Build stage may not see a GPU on HF Spaces: compile a cross-arch set
             env["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6;8.9;9.0+PTX"
-    except Excep

         else:
             # Build stage may not see a GPU on HF Spaces: compile a cross-arch set
             env["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6;8.9;9.0+PTX"
+    except Exception:
+        env["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6;8.9;9.0+PTX"
+    # (Optional) side-step allocator+NVML quirks in restrictive containers
+    env.setdefault("PYTORCH_NO_CUDA_MEMORY_CACHING", "1")
+    subprocess.check_call(
+        [sys.executable, "-m", "pip", "install",
+         "git+https://github.com/graphdeco-inria/diff-gaussian-rasterization"],
+        env=env,
+    )
+    import diff_gaussian_rasterization  # noqa: F401
+from gslrm.model.gaussians_renderer import render_turntable, imageseq2video
+from mvdiffusion.pipelines.pipeline_mvdiffusion_unclip import StableUnCLIPImg2ImgPipeline
+from utils_folder.face_utils import preprocess_image, preprocess_image_without_cropping
+# HuggingFace repository configuration
+HF_REPO_ID = "wlyu/OpenFaceLift"
+def download_weights_from_hf() -> Path:
+    """Download model weights from HuggingFace if not already present.
+    Returns:
+        Path to the downloaded repository
+    """
+    workspace_dir = Path(__file__).parent
+    # Check if weights already exist locally
+    mvdiffusion_path = workspace_dir / "checkpoints/mvdiffusion/pipeckpts"
+    gslrm_path = workspace_dir / "checkpoints/gslrm/ckpt_0000000000021125.pt"
+    if mvdiffusion_path.exists() and gslrm_path.exists():
+        print("Using local model weights")
+        return workspace_dir
+    print(f"Downloading model weights from HuggingFace: {HF_REPO_ID}")
+    print("This may take a few minutes on first run...")
+    # Download to local directory
+    snapshot_download(
+        repo_id=HF_REPO_ID,
+        local_dir=str(workspace_dir / "checkpoints"),
+        local_dir_use_symlinks=False,
+    )
+    print("Model weights downloaded successfully!")
+    return workspace_dir
+class FaceLiftPipeline:
+    """Pipeline for FaceLift 3D head generation from single images."""
+    def __init__(self):
+        # Download weights from HuggingFace if needed
+        workspace_dir = download_weights_from_hf()
+        # Setup paths
+        self.output_dir = workspace_dir / "outputs"
+        self.examples_dir = workspace_dir / "examples"
+        self.output_dir.mkdir(exist_ok=True)
+        # Parameters
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self.image_size = 512
+        self.camera_indices = [2, 1, 0, 5, 4, 3]
+        # Load models (keep on CPU for ZeroGPU compatibility)
+        print("Loading models...")
+        try:
+            self.mvdiffusion_pipeline = StableUnCLIPImg2ImgPipeline.from_pretrained(
+                str(workspace_dir / "checkpoints/mvdiffusion/pipeckpts"),
+                torch_dtype=torch.float16,
+            )
+            # Don't move to device or enable xformers here - will be done in GPU-decorated function
+            self._models_on_gpu = False
+            with open(workspace_dir / "configs/gslrm.yaml", "r") as f:
+                config = edict(yaml.safe_load(f))
+            module_name, class_name = config.model.class_name.rsplit(".", 1)
+            module = __import__(module_name, fromlist=[class_name])
+            ModelClass = getattr(module, class_name)
+            self.gs_lrm_model = ModelClass(config)
+            checkpoint = torch.load(
+                workspace_dir / "checkpoints/gslrm/ckpt_0000000000021125.pt",
+                map_location="cpu"
+            )
+            # Filter out loss_calculator weights (training-only, not needed for inference)
+            state_dict = {k: v for k, v in checkpoint["model"].items()
+                          if not k.startswith("loss_calculator.")}
+            self.gs_lrm_model.load_state_dict(state_dict)
+            # Keep on CPU initially - will move to GPU in decorated function
+            self.color_prompt_embedding = torch.load(
+                workspace_dir / "mvdiffusion/fixed_prompt_embeds_6view/clr_embeds.pt",
+                map_location="cpu"
+            )
+            with open(workspace_dir / "utils_folder/opencv_cameras.json", 'r') as f:
+                self.cameras_data = json.load(f)["frames"]
+            print("Models loaded successfully!")
+        except Exception as e:
+            print(f"Error loading models: {e}")
+            import traceback
+            traceback.print_exc()
+            raise
+    def _move_models_to_gpu(self):
+        """Move models to GPU and enable optimizations. Called within @spaces.GPU context."""
+        if not self._models_on_gpu and torch.cuda.is_available():
+            print("Moving models to GPU...")
+            self.device = torch.device("cuda:0")
+            self.mvdiffusion_pipeline.to(self.device)
+            self.mvdiffusion_pipeline.unet.enable_xformers_memory_efficient_attention()
+            self.gs_lrm_model.to(self.device)
+            self.gs_lrm_model.eval()  # Set to eval mode
+            self.color_prompt_embedding = self.color_prompt_embedding.to(self.device)
+            self._models_on_gpu = True
+            torch.cuda.empty_cache()  # Clear cache after moving models
+            print("Models on GPU, xformers enabled!")
+    @spaces.GPU(duration=120)
+    def generate_3d_head(self, image_path, auto_crop=True, guidance_scale=3.0,
+                         random_seed=4, num_steps=50):
+        """Generate 3D head from single image."""
+        try:
+            # Move models to GPU now that we're in the GPU context
+            self._move_models_to_gpu()
+            # Setup output directory
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            output_dir = self.output_dir / timestamp
+            output_dir.mkdir(exist_ok=True)
+            # Preprocess input
+            original_img = np.array(Image.open(image_path))
+            input_image = preprocess_image(original_img) if auto_crop else \
+                         preprocess_image_without_cropping(original_img)
+            if input_image.size != (self.image_size, self.image_size):
+                input_image = input_image.resize((self.image_size, self.image_size))
+            input_path = output_dir / "input.png"
+            input_image.save(input_path)
+            # Generate multi-view images
+            generator = torch.Generator(device=self.mvdiffusion_pipeline.unet.device)
+            generator.manual_seed(random_seed)
+            result = self.mvdiffusion_pipeline(
+                input_image, None,
+                prompt_embeds=self.color_prompt_embedding,
+                height=self.image_size,
+                width=self.image_size,
+                guidance_scale=guidance_scale,
+                num_images_per_prompt=1,
+                num_inference_steps=num_steps,
+                generator=generator,
+                eta=1.0,
+            )
+            selected_views = result.images[:6]
+            # Save multi-view composite
+            multiview_image = Image.new("RGB", (self.image_size * 6, self.image_size))
+            for i, view in enumerate(selected_views):
+                multiview_image.paste(view, (self.image_size * i, 0))
+            multiview_path = output_dir / "multiview.png"
+            multiview_image.save(multiview_path)
+            # Move diffusion model to CPU to free GPU memory for GS-LRM
+            print("Moving diffusion model to CPU to free memory...")
+            self.mvdiffusion_pipeline.to("cpu")
+            # Delete intermediate variables to free memory
+            del result, generator
+            torch.cuda.empty_cache()
+            torch.cuda.synchronize()
+            # Prepare 3D reconstruction input
+            view_arrays = [np.array(view) for view in selected_views]
+            lrm_input = torch.from_numpy(np.stack(view_arrays, axis=0)).float()
+            lrm_input = lrm_input[None].to(self.device) / 255.0
+            lrm_input = rearrange(lrm_input, "b v h w c -> b v c h w")
+            # Prepare camera parameters
+            selected_cameras = [self.cameras_data[i] for i in self.camera_indices]
+            fxfycxcy_list = [[c["fx"], c["fy"], c["cx"], c["cy"]] for c in selected_cameras]
+            c2w_list = [np.linalg.inv(np.array(c["w2c"])) for c in selected_cameras]
+            fxfycxcy = torch.from_numpy(np.stack(fxfycxcy_list, axis=0).astype(np.float32))
+            c2w = torch.from_numpy(np.stack(c2w_list, axis=0).astype(np.float32))
+            fxfycxcy = fxfycxcy[None].to(self.device)
+            c2w = c2w[None].to(self.device)
+            batch_indices = torch.stack([
+                torch.zeros(lrm_input.size(1)).long(),
+                torch.arange(lrm_input.size(1)).long(),
+            ], dim=-1)[None].to(self.device)
+            batch = edict({
+                "image": lrm_input,
+                "c2w": c2w,
+                "fxfycxcy": fxfycxcy,
+                "index": batch_indices,
+            })
+            # Ensure GS-LRM model is on GPU
+            if next(self.gs_lrm_model.parameters()).device.type == "cpu":
+                print("Moving GS-LRM model to GPU...")
+                self.gs_lrm_model.to(self.device)
+                torch.cuda.empty_cache()
+            # Final memory cleanup before reconstruction
+            torch.cuda.empty_cache()
+            # Run 3D reconstruction
+            with torch.no_grad(), torch.autocast(enabled=True, device_type="cuda", dtype=torch.float16):
+                result = self.gs_lrm_model.forward(batch, create_visual=False, split_data=True)
+            comp_image = result.render[0].unsqueeze(0).detach()
+            gaussians = result.gaussians[0]
+            # Clear CUDA cache after reconstruction
+            torch.cuda.empty_cache()
+            # Save filtered gaussians
+            filtered_gaussians = gaussians.apply_all_filters(
+                cam_origins=None,
+                opacity_thres=0.04,
+                scaling_thres=0.2,
+                floater_thres=0.75,
+                crop_bbx=[-0.91, 0.91, -0.91, 0.91, -1.0, 1.0],
+                nearfar_percent=(0.0001, 1.0),
+            )
+            ply_path = output_dir / "gaussians.ply"
+            filtered_gaussians.save_ply(str(ply_path))
+            # Save output image
+            comp_image = rearrange(comp_image, "x v c h w -> (x h) (v w) c")
+            comp_image = (comp_image.cpu().numpy() * 255.0).clip(0, 255).astype(np.uint8)
+            output_path = output_dir / "output.png"
+            Image.fromarray(comp_image).save(output_path)
+            # Generate turntable video
+            turntable_resolution = 512
+            num_turntable_views = 180
+            turntable_frames = render_turntable(gaussians, rendering_resolution=turntable_resolution,
+                                               num_views=num_turntable_views)
+            turntable_frames = rearrange(turntable_frames, "h (v w) c -> v h w c", v=num_turntable_views)
+            turntable_frames = np.ascontiguousarray(turntable_frames)
+            turntable_path = output_dir / "turntable.mp4"
+            imageseq2video(turntable_frames, str(turntable_path), fps=30)
+            # Final CUDA cache clear
+            torch.cuda.empty_cache()
+            return str(input_path), str(multiview_path), str(output_path), \
+                   str(turntable_path), str(ply_path)
+        except Exception as e:
+            import traceback
+            error_details = traceback.format_exc()
+            print(f"Error details:\n{error_details}")
+            raise gr.Error(f"Generation failed: {str(e)}")
+# -----------------------------
+# gsplat.js viewer (Option A)
+# -----------------------------
+GSPLAT_HEAD = """
+<script type="module">
+  import * as SPLAT from "https://cdn.jsdelivr.net/npm/gsplat@1.2.9/+esm";
+  let renderer, scene, camera, controls;
+  function ensureViewer() {
+    if (renderer) return;
+    const container = document.getElementById("splat-container");
+    renderer  = new SPLAT.WebGLRenderer();
+    container.appendChild(renderer.canvas);
+    scene   = new SPLAT.Scene();
+    camera  = new SPLAT.Camera();
+    controls = new SPLAT.OrbitControls(camera, renderer.canvas);
+    const loop = () => { controls.update(); renderer.render(scene, camera); requestAnimationFrame(loop); };
+    requestAnimationFrame(loop);
+  }
+  async function loadSplat(url) {
+    ensureViewer();
+    // clear previous
+    scene.children.length = 0;
+    await SPLAT.Loader.LoadAsync(url, scene, ()=>{});
+  }
+  // Expose callable function for Gradio
+  window.__load_splat__ = loadSplat;
+</script>
+"""
+def main():
+    """Run the FaceLift application with an embedded gsplat.js viewer and per-session files."""
+    pipeline = FaceLiftPipeline()
+    # Prepare examples (same as before)
+    examples = []
+    if pipeline.examples_dir.exists():
+        examples = [[str(f), True, 3.0, 4, 50] for f in sorted(pipeline.examples_dir.iterdir())
+                   if f.suffix.lower() in {'.png', '.jpg', '.jpeg'}]
+    with gr.Blocks(head=GSPLAT_HEAD, title="FaceLift: Single Image 3D Face Reconstruction") as demo:
+        session = gr.State()
+        # Light GC + session init
+        def _init_session():
+            cleanup_old_sessions()
+            return new_session_id()
+        # After generation: copy ply into per-session folder and return viewer URL
+        def _prep_viewer_url(ply_path: str, session_id: str) -> str:
+            if not ply_path or not os.path.exists(ply_path):
+                return ""
+            return copy_to_session_and_get_url(ply_path, session_id)
+        gr.Markdown("## FaceLift: Single Image 3D Face Reconstruction\nTurn a single portrait image into a 3D head model and preview it interactively.")
+        with gr.Row():
+            with gr.Column(scale=1):
+                in_image  = gr.Image(type="filepath", label="Input Portrait Image")
+                auto_crop = gr.Checkbox(value=True, label="Auto Cropping")
+                guidance  = gr.Slider(1.0, 10.0, 3.0, step=0.1, label="Guidance Scale")
+                seed      = gr.Number(value=4, label="Random Seed")
+                steps     = gr.Slider(10, 100, 50, step=5, label="Generation Steps")
+                run_btn   = gr.Button("Generate 3D Head", variant="primary")
+                # Examples (match input signature)
+                if examples:
+                    gr.Examples(
+                        examples=examples,
+                        inputs=[in_image, auto_crop, guidance, seed, steps],
+                        examples_per_page=8,
+                    )
+            with gr.Column(scale=1):
+                out_proc = gr.Image(label="Processed Input")
+                out_multi = gr.Image(label="Multi-view Generation")
+                out_recon = gr.Image(label="3D Reconstruction")
+                out_video = gr.PlayableVideo(label="Turntable Animation")
+                out_ply   = gr.File(label="3D Model (.ply)")
+        gr.Markdown("### Interactive Gaussian Splat Viewer")
+        with gr.Row():
+            url_box = gr.Textbox(label="Scene URL (auto-filled)", interactive=False)
+        viewer = gr.HTML("<div id='splat-container' style='width:100%;height:640px'></div>")
+        reload_btn = gr.Button("Reload Viewer")
+        # Initialize per-browser session
+        demo.load(fn=_init_session, inputs=None, outputs=session)
+        # Chain: run → show outputs → prepare viewer URL → load viewer (JS)
+        run_btn.click(
+            fn=pipeline.generate_3d_head,
+            inputs=[in_image, auto_crop, guidance, seed, steps],
+            outputs=[out_proc, out_multi, out_recon, out_video, out_ply],
+        ).then(
+            fn=_prep_viewer_url,
+            inputs=[out_ply, session],
+            outputs=url_box,
+        ).then(
+            fn=None, inputs=url_box, outputs=None,
+            js="(url)=>window.__load_splat__(url)"
+        )
+        # Manual reload if needed
+        reload_btn.click(fn=None, inputs=url_box, outputs=None, js="(url)=>window.__load_splat__(url)")
+        demo.queue(max_size=10)
+        demo.launch(share=True, server_name="0.0.0.0", server_port=7860, show_error=True)
+if __name__ == "__main__":
+    main()