Spaces:
Build error
Build error
| import gradio as gr | |
| import torch | |
| import os | |
| import numpy as np | |
| import cv2 | |
| from PIL import Image | |
| import open3d as o3d | |
| import shutil | |
| # --- Model Classes (adapted from demo scripts) --- | |
| # Panorama Generation | |
| from hy3dworld import Text2PanoramaPipelines, Image2PanoramaPipelines, Perspective | |
| class Text2PanoramaDemo: | |
| def __init__(self): | |
| self.pipe = Text2PanoramaPipelines.from_pretrained( | |
| "black-forest-labs/FLUX.1-dev", | |
| torch_dtype=torch.bfloat16 | |
| ).to("cuda") | |
| self.pipe.load_lora_weights( | |
| "tencent/HunyuanWorld-1", | |
| subfolder="HunyuanWorld-PanoDiT-Text", | |
| weight_name="lora.safetensors", | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| self.pipe.enable_model_cpu_offload() | |
| self.pipe.enable_vae_tiling() | |
| def run(self, prompt, negative_prompt, seed, height, width, guidance_scale, steps): | |
| image = self.pipe( | |
| prompt, | |
| height=height, | |
| width=width, | |
| negative_prompt=negative_prompt, | |
| generator=torch.Generator("cuda").manual_seed(seed), | |
| num_inference_steps=steps, | |
| guidance_scale=guidance_scale, | |
| blend_extend=6, | |
| true_cfg_scale=0.0, | |
| ).images[0] | |
| return image | |
| class Image2PanoramaDemo: | |
| def __init__(self): | |
| self.pipe = Image2PanoramaPipelines.from_pretrained( | |
| "black-forest-labs/FLUX.1-dev", | |
| torch_dtype=torch.bfloat16 | |
| ).to("cuda") | |
| self.pipe.load_lora_weights( | |
| "tencent/HunyuanWorld-1", | |
| subfolder="HunyuanWorld-PanoDiT-Image", | |
| weight_name="lora.safetensors", | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| self.pipe.enable_model_cpu_offload() | |
| self.pipe.enable_vae_tiling() | |
| self.general_negative_prompt = "human, person, people, messy, low-quality, blur, noise, low-resolution" | |
| self.general_positive_prompt = "high-quality, high-resolution, sharp, clear, 8k" | |
| def run(self, prompt, negative_prompt, image, seed, height, width, guidance_scale, steps, fov): | |
| prompt = prompt + ", " + self.general_positive_prompt | |
| negative_prompt = self.general_negative_prompt + ", " + negative_prompt | |
| perspective_img = np.array(image) | |
| height_fov, width_fov = perspective_img.shape[:2] | |
| ratio = width_fov / height_fov | |
| w = int((fov / 360) * width) | |
| h = int(w / ratio) | |
| perspective_img = cv2.resize(perspective_img, (w, h), interpolation=cv2.INTER_AREA) | |
| equ = Perspective(perspective_img, fov, 0, 0, crop_bound=False) | |
| img, mask = equ.GetEquirec(height, width) | |
| mask = cv2.erode(mask.astype(np.uint8), np.ones((3, 3), np.uint8), iterations=5) | |
| img = img * mask | |
| mask = 255 - (mask.astype(np.uint8) * 255) | |
| mask = Image.fromarray(mask[:, :, 0]) | |
| img = Image.fromarray(cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)) | |
| image = self.pipe( | |
| prompt=prompt, image=img, mask_image=mask, height=height, width=width, | |
| negative_prompt=negative_prompt, guidance_scale=guidance_scale, num_inference_steps=steps, | |
| generator=torch.Generator("cuda").manual_seed(seed), blend_extend=6, shifting_extend=0, true_cfg_scale=2.0, | |
| ).images[0] | |
| return image | |
| # Scene Generation | |
| from hy3dworld import LayerDecomposition, WorldComposer, process_file | |
| class HYworldDemo: | |
| def __init__(self, seed=42): | |
| target_size = 3840 | |
| kernel_scale = max(1, int(target_size / 1920)) | |
| self.LayerDecomposer = LayerDecomposition() | |
| self.hy3d_world = WorldComposer( | |
| device=torch.device("cuda"), resolution=(target_size, target_size // 2), | |
| seed=seed, filter_mask=True, kernel_scale=kernel_scale, | |
| ) | |
| def run(self, image_path, labels_fg1, labels_fg2, classes, output_dir): | |
| os.makedirs(output_dir, exist_ok=True) | |
| fg1_infos = [{"image_path": image_path, "output_path": output_dir, "labels": labels_fg1, "class": classes}] | |
| fg2_infos = [{"image_path": os.path.join(output_dir, 'remove_fg1_image.png'), "output_path": output_dir, "labels": labels_fg2, "class": classes}] | |
| self.LayerDecomposer(fg1_infos, layer=0) | |
| self.LayerDecomposer(fg2_infos, layer=1) | |
| self.LayerDecomposer(fg2_infos, layer=2) | |
| separate_pano, fg_bboxes = self.hy3d_world._load_separate_pano_from_dir(output_dir, sr=True) | |
| layered_world_mesh = self.hy3d_world.generate_world(separate_pano=separate_pano, fg_bboxes=fg_bboxes, world_type='mesh') | |
| mesh_files = [] | |
| for layer_idx, layer_info in enumerate(layered_world_mesh): | |
| output_path = os.path.join(output_dir, f"mesh_layer{layer_idx}.ply") | |
| o3d.io.write_triangle_mesh(output_path, layer_info['mesh']) | |
| mesh_files.append(output_path) | |
| return mesh_files | |
| # --- Gradio UI --- | |
| # Instantiate models | |
| t2p_demo = Text2PanoramaDemo() | |
| i2p_demo = Image2PanoramaDemo() | |
| hy_demo = HYworldDemo() | |
| def generate_text_to_pano(prompt, neg_prompt, seed, height, width, scale, steps): | |
| image = t2p_demo.run(prompt, neg_prompt, seed, height, width, scale, steps) | |
| # Save to a temporary file to pass to the next stage | |
| temp_dir = "temp_outputs" | |
| os.makedirs(temp_dir, exist_ok=True) | |
| temp_path = os.path.join(temp_dir, f"pano_{seed}.png") | |
| image.save(temp_path) | |
| return image, temp_path | |
| def generate_image_to_pano(prompt, neg_prompt, image, seed, height, width, scale, steps, fov): | |
| pil_image = Image.fromarray(image) | |
| result_image = i2p_demo.run(prompt, neg_prompt, pil_image, seed, height, width, scale, steps, fov) | |
| temp_dir = "temp_outputs" | |
| os.makedirs(temp_dir, exist_ok=True) | |
| temp_path = os.path.join(temp_dir, f"pano_i2p_{seed}.png") | |
| result_image.save(temp_path) | |
| return result_image, temp_path | |
| def generate_scene(panorama_file_path, fg1, fg2, classes, seed): | |
| if panorama_file_path is None or not os.path.exists(panorama_file_path): | |
| raise gr.Error("Please generate or upload a panorama image first.") | |
| output_dir = f"output_scene_{seed}" | |
| shutil.rmtree(output_dir, ignore_errors=True) | |
| labels_fg1 = [label.strip() for label in fg1.split(',') if label.strip()] | |
| labels_fg2 = [label.strip() for label in fg2.split(',') if label.strip()] | |
| mesh_files = hy_demo.run(panorama_file_path, labels_fg1, labels_fg2, classes, output_dir) | |
| # For now, let's just display the first layer. Gradio's Model3D doesn't support multiple files well. | |
| # A better UI might zip and offer for download, or show multiple viewers. | |
| return mesh_files[0] if mesh_files else None | |
| css = """ | |
| #col-container {margin-left: auto; margin-right: auto;} | |
| #pano_output {min-height: 320px;} | |
| #scene_output {min-height: 480px;} | |
| """ | |
| with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("<h1>HunyuanWorld-1.0: A One-Stop Solution for Text-driven 3D Scene Generation</h1>") | |
| gr.Markdown("Official Repo: [Tencent-Hunyuan/HunyuanWorld-1.0](https://github.com/Tencent-Hunyuan/HunyuanWorld-1.0)") | |
| # State to hold the path of the generated panorama | |
| panorama_path_state = gr.State(None) | |
| with gr.Tabs(): | |
| with gr.TabItem("Step 1: Panorama Generation"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Tabs(): | |
| with gr.TabItem("Text-to-Panorama") as t2p_tab: | |
| t2p_prompt = gr.Textbox(label="Prompt", value="A beautiful sunset over a mountain range, fantasy style") | |
| t2p_neg_prompt = gr.Textbox(label="Negative Prompt", value="blurry, low quality") | |
| t2p_seed = gr.Slider(label="Seed", minimum=0, maximum=10000, step=1, value=42) | |
| with gr.Accordion("Advanced Settings", open=False): | |
| t2p_height = gr.Slider(label="Height", minimum=512, maximum=1024, step=64, value=960) | |
| t2p_width = gr.Slider(label="Width", minimum=1024, maximum=2048, step=128, value=1920) | |
| t2p_scale = gr.Slider(label="Guidance Scale", minimum=1, maximum=50, step=1, value=30) | |
| t2p_steps = gr.Slider(label="Inference Steps", minimum=10, maximum=100, step=5, value=50) | |
| t2p_button = gr.Button("Generate Panorama", variant="primary") | |
| with gr.TabItem("Image-to-Panorama") as i2p_tab: | |
| i2p_image = gr.Image(type="numpy", label="Input Image") | |
| i2p_prompt = gr.Textbox(label="Prompt", value="A photo of a room, modern design") | |
| i2p_neg_prompt = gr.Textbox(label="Negative Prompt", value="watermark, text") | |
| i2p_seed = gr.Slider(label="Seed", minimum=0, maximum=10000, step=1, value=100) | |
| with gr.Accordion("Advanced Settings", open=False): | |
| i2p_fov = gr.Slider(label="Field of View (FOV)", minimum=40, maximum=120, step=5, value=80) | |
| i2p_height = gr.Slider(label="Height", minimum=512, maximum=1024, step=64, value=960) | |
| i2p_width = gr.Slider(label="Width", minimum=1024, maximum=2048, step=128, value=1920) | |
| i2p_scale = gr.Slider(label="Guidance Scale", minimum=1, maximum=50, step=1, value=30) | |
| i2p_steps = gr.Slider(label="Inference Steps", minimum=10, maximum=100, step=5, value=50) | |
| i2p_button = gr.Button("Generate Panorama", variant="primary") | |
| with gr.Column(): | |
| pano_output = gr.Image(label="Panorama Output", elem_id="pano_output") | |
| send_to_scene_btn = gr.Button("Step 2: Send to Scene Generation") | |
| with gr.TabItem("Step 2: Scene Generation") as scene_tab: | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("Load the panorama generated in Step 1, or upload your own.") | |
| scene_input_image = gr.Image(type="filepath", label="Input Panorama") | |
| scene_classes = gr.Radio(["outdoor", "indoor"], label="Scene Class", value="outdoor") | |
| scene_fg1 = gr.Textbox(label="Foreground Labels (Layer 1)", placeholder="e.g., tree, car, person") | |
| scene_fg2 = gr.Textbox(label="Foreground Labels (Layer 2)", placeholder="e.g., building, mountain") | |
| scene_seed = gr.Slider(label="Seed", minimum=0, maximum=10000, step=1, value=2024) | |
| scene_button = gr.Button("Generate 3D Scene", variant="primary") | |
| with gr.Column(): | |
| scene_output = gr.Model3D(label="3D Scene Output (.ply)", elem_id="scene_output") | |
| # Wire up components | |
| t2p_button.click( | |
| fn=generate_text_to_pano, | |
| inputs=[t2p_prompt, t2p_neg_prompt, t2p_seed, t2p_height, t2p_width, t2p_scale, t2p_steps], | |
| outputs=[pano_output, panorama_path_state] | |
| ) | |
| i2p_button.click( | |
| fn=generate_image_to_pano, | |
| inputs=[i2p_prompt, i2p_neg_prompt, i2p_image, i2p_seed, i2p_height, i2p_width, i2p_scale, i2p_steps, i2p_fov], | |
| outputs=[pano_output, panorama_path_state] | |
| ) | |
| def transfer_to_scene_gen(path): | |
| return {scene_input_image: gr.update(value=path)} | |
| send_to_scene_btn.click( | |
| fn=lambda path: path, | |
| inputs=panorama_path_state, | |
| outputs=scene_input_image | |
| ).then( | |
| lambda: gr.Tabs.update(selected=scene_tab), | |
| outputs=demo.children[1] # This is a bit of a hack to select the tab | |
| ) | |
| scene_button.click( | |
| fn=generate_scene, | |
| inputs=[scene_input_image, scene_fg1, scene_fg2, scene_classes, scene_seed], | |
| outputs=scene_output | |
| ) | |
| demo.queue().launch(debug=True) | |