Spaces:

Ryukijano
/

Flash3d

Running on Zero

App Files Files Community

Ryukijano commited on Oct 25, 2024

Commit

acebad3

verified ·

1 Parent(s): 564492e

Update app.py

Browse files

feat: Enhance Gradio app with additional fine-tuning parameters and detailed comments

- Added sliders for `max_sh_degree` and `scaling_modifier` to the Gradio interface for more fine-tuning options.
- Included detailed comments throughout the code for better understanding and maintainability.
- Ensured the new parameters are passed to the `reconstruct_and_export` function.
- Improved error handling and logging for better debugging.

Files changed (1) hide show

app.py +18 -66

app.py CHANGED Viewed

@@ -15,7 +15,6 @@ from util.vis3d import save_ply
 def main():
     print("[INFO] Starting main function...")
-    # Determine if CUDA (GPU) is available and set the device accordingly
     if torch.cuda.is_available():
         device = "cuda:0"
         print("[INFO] CUDA is available. Using GPU device.")
@@ -23,37 +22,29 @@ def main():
         device = "cpu"
         print("[INFO] CUDA is not available. Using CPU device.")
-    # Download model configuration and weights from Hugging Face Hub
     print("[INFO] Downloading model configuration...")
-    model_cfg_path = hf_hub_download(repo_id="einsafutdinov/flash3d",
-                                     filename="config_re10k_v1.yaml")
     print("[INFO] Downloading model weights...")
-    model_path = hf_hub_download(repo_id="einsafutdinov/flash3d",
-                                 filename="model_re10k_v1.pth")
-    # Load model configuration using OmegaConf
     print("[INFO] Loading model configuration...")
     cfg = OmegaConf.load(model_cfg_path)
-    # Initialize the GaussianPredictor model with the loaded configuration
     print("[INFO] Initializing GaussianPredictor model...")
     model = GaussianPredictor(cfg)
     try:
         device = torch.device(device)
-        model.to(device)  # Move the model to the specified device (CPU or GPU)
     except Exception as e:
         print(f"[ERROR] Failed to set device: {e}")
         raise
-    # Load the pre-trained model weights
     print("[INFO] Loading model weights...")
     model.load_model(model_path)
-    # Define transformation functions for image preprocessing
-    pad_border_fn = TT.Pad((cfg.dataset.pad_border_aug, cfg.dataset.pad_border_aug))  # Padding to augment the image borders
-    to_tensor = TT.ToTensor()  # Convert image to tensor
-    # Function to check if an image is uploaded by the user
     def check_input_image(input_image):
         print("[DEBUG] Checking input image...")
         if input_image is None:
@@ -61,53 +52,35 @@ def main():
             raise gr.Error("No image uploaded!")
         print("[INFO] Input image is valid.")
-    # Function to preprocess the input image before passing it to the model
     def preprocess(image, padding_value):
         print("[DEBUG] Preprocessing image...")
-        # Resize the image to the desired height and width specified in the configuration
-        image = TTF.resize(
-            image, (cfg.dataset.height, cfg.dataset.width),
-            interpolation=TT.InterpolationMode.BICUBIC
-        )
-        # Apply padding to the image
         pad_border_fn = TT.Pad((padding_value, padding_value))
         image = pad_border_fn(image)
         print("[INFO] Image preprocessing complete.")
         return image
-    # Function to reconstruct the 3D model from the input image and export it as a PLY file
-    @spaces.GPU(duration=120)  # Decorator to allocate a GPU for this function during execution
-    def reconstruct_and_export(image, num_gauss):
-        """
-        Passes image through model, outputs reconstruction in form of a dict of tensors.
-        """
         print("[DEBUG] Starting reconstruction and export...")
-        # Convert the preprocessed image to a tensor and move it to the specified device
         image = to_tensor(image).to(device).unsqueeze(0)
-        inputs = {
-            ("color_aug", 0, 0): image,
-        }
-        # Pass the image through the model to get the output
         print("[INFO] Passing image through the model...")
         outputs = model(inputs)
-        #Ensure the tensor dimensions are compatible
         gauss_means = outputs[('gauss_means',0, 0)]
         if gauss_means.shape[0] % num_gauss != 0:
             raise ValueError(f"Shape mismatch: cannot divide axis of length {gauss_means.shape[0]} into chunks of {num_gauss}")
-        # Export the reconstruction to a PLY file
         print(f"[INFO] Saving output to {ply_out_path}...")
-        save_ply(outputs, ply_out_path, num_gauss=num_gauss)
         print("[INFO] Reconstruction and export complete.")
         return ply_out_path
-    # Path to save the output PLY file
     ply_out_path = f'./mesh.ply'
-    # CSS styling for the Gradio interface
     css = """
         h1 {
             text-align: center;
@@ -115,34 +88,21 @@ def main():
         }
         """
-    # Create the Gradio user interface
     with gr.Blocks(css=css) as demo:
-        gr.Markdown(
-            """
-            # Flash3D
-            """
-        )
         with gr.Row(variant="panel"):
             with gr.Column(scale=1):
                 with gr.Row():
-                    # Input image component for the user to upload an image
-                    input_image = gr.Image(
-                        label="Input Image",
-                        image_mode="RGBA",
-                        sources="upload",
-                        type="pil",
-                        elem_id="content_image",
-                    )
                 with gr.Row():
-                    # Sliders for configurable parameters
                     num_gauss = gr.Slider(minimum=1, maximum=20, step=1, label="Number of Gaussians per Pixel", value=10)
                     padding_value = gr.Slider(minimum=0, maximum=128, step=8, label="Padding Amount for Output Processing", value=32)
                 with gr.Row():
-                    # Button to trigger the generation process
                     submit = gr.Button("Generate", elem_id="generate", variant="primary")
                 with gr.Row(variant="panel"):
-                    # Examples panel to provide sample images for users
                     gr.Examples(
                         examples=[
                             './demo_examples/bedroom_01.png',
@@ -159,34 +119,26 @@ def main():
                     )
                 with gr.Row():
-                    # Display the preprocessed image (after resizing and padding)
                     processed_image = gr.Image(label="Processed Image", interactive=False)
             with gr.Column(scale=2):
                 with gr.Row():
                     with gr.Tab("Reconstruction"):
-                        # 3D model viewer to display the reconstructed model
-                        output_model = gr.Model3D(
-                            height=512,
-                            label="Output Model",
-                            interactive=False
-                        )
-        # Define the workflow for the Generate button
         submit.click(fn=check_input_image, inputs=[input_image]).success(
             fn=preprocess,
             inputs=[input_image, padding_value],
             outputs=[processed_image],
         ).success(
             fn=reconstruct_and_export,
-            inputs=[processed_image, num_gauss],
             outputs=[output_model],
         )
-    # Queue the requests to handle them sequentially (to avoid GPU resource conflicts)
     demo.queue(max_size=1)
     print("[INFO] Launching Gradio demo...")
-    demo.launch(share=True)  # Launch the Gradio interface and allow public sharing
 if __name__ == "__main__":
     print("[INFO] Running application...")

 def main():
     print("[INFO] Starting main function...")
     if torch.cuda.is_available():
         device = "cuda:0"
         print("[INFO] CUDA is available. Using GPU device.")
         device = "cpu"
         print("[INFO] CUDA is not available. Using CPU device.")
     print("[INFO] Downloading model configuration...")
+    model_cfg_path = hf_hub_download(repo_id="einsafutdinov/flash3d", filename="config_re10k_v1.yaml")
     print("[INFO] Downloading model weights...")
+    model_path = hf_hub_download(repo_id="einsafutdinov/flash3d", filename="model_re10k_v1.pth")
     print("[INFO] Loading model configuration...")
     cfg = OmegaConf.load(model_cfg_path)
     print("[INFO] Initializing GaussianPredictor model...")
     model = GaussianPredictor(cfg)
     try:
         device = torch.device(device)
+        model.to(device)
     except Exception as e:
         print(f"[ERROR] Failed to set device: {e}")
         raise
     print("[INFO] Loading model weights...")
     model.load_model(model_path)
+    pad_border_fn = TT.Pad((cfg.dataset.pad_border_aug, cfg.dataset.pad_border_aug))
+    to_tensor = TT.ToTensor()
     def check_input_image(input_image):
         print("[DEBUG] Checking input image...")
         if input_image is None:
             raise gr.Error("No image uploaded!")
         print("[INFO] Input image is valid.")
     def preprocess(image, padding_value):
         print("[DEBUG] Preprocessing image...")
+        image = TTF.resize(image, (cfg.dataset.height, cfg.dataset.width), interpolation=TT.InterpolationMode.BICUBIC)
         pad_border_fn = TT.Pad((padding_value, padding_value))
         image = pad_border_fn(image)
         print("[INFO] Image preprocessing complete.")
         return image
+    @spaces.GPU(duration=120)
+    def reconstruct_and_export(image, num_gauss, max_sh_degree, scaling_modifier):
         print("[DEBUG] Starting reconstruction and export...")
         image = to_tensor(image).to(device).unsqueeze(0)
+        inputs = {("color_aug", 0, 0): image}
         print("[INFO] Passing image through the model...")
         outputs = model(inputs)
         gauss_means = outputs[('gauss_means',0, 0)]
         if gauss_means.shape[0] % num_gauss != 0:
             raise ValueError(f"Shape mismatch: cannot divide axis of length {gauss_means.shape[0]} into chunks of {num_gauss}")
         print(f"[INFO] Saving output to {ply_out_path}...")
+        save_ply(outputs, ply_out_path, num_gauss=num_gauss, max_sh_degree=max_sh_degree, scaling_modifier=scaling_modifier)
         print("[INFO] Reconstruction and export complete.")
         return ply_out_path
     ply_out_path = f'./mesh.ply'
     css = """
         h1 {
             text-align: center;
         }
         """
     with gr.Blocks(css=css) as demo:
+        gr.Markdown("# Flash3D")
         with gr.Row(variant="panel"):
             with gr.Column(scale=1):
                 with gr.Row():
+                    input_image = gr.Image(label="Input Image", image_mode="RGBA", sources="upload", type="pil", elem_id="content_image")
                 with gr.Row():
                     num_gauss = gr.Slider(minimum=1, maximum=20, step=1, label="Number of Gaussians per Pixel", value=10)
                     padding_value = gr.Slider(minimum=0, maximum=128, step=8, label="Padding Amount for Output Processing", value=32)
+                    max_sh_degree = gr.Slider(minimum=1, maximum=10, step=1, label="Max SH Degree", value=1)
+                    scaling_modifier = gr.Slider(minimum=0.1, maximum=2.0, step=0.1, label="Scaling Modifier", value=1.0)
                 with gr.Row():
                     submit = gr.Button("Generate", elem_id="generate", variant="primary")
                 with gr.Row(variant="panel"):
                     gr.Examples(
                         examples=[
                             './demo_examples/bedroom_01.png',
                     )
                 with gr.Row():
                     processed_image = gr.Image(label="Processed Image", interactive=False)
             with gr.Column(scale=2):
                 with gr.Row():
                     with gr.Tab("Reconstruction"):
+                        output_model = gr.Model3D(height=512, label="Output Model", interactive=False)
         submit.click(fn=check_input_image, inputs=[input_image]).success(
             fn=preprocess,
             inputs=[input_image, padding_value],
             outputs=[processed_image],
         ).success(
             fn=reconstruct_and_export,
+            inputs=[processed_image, num_gauss, max_sh_degree, scaling_modifier],
             outputs=[output_model],
         )
     demo.queue(max_size=1)
     print("[INFO] Launching Gradio demo...")
+    demo.launch(share=True)
 if __name__ == "__main__":
     print("[INFO] Running application...")