Spaces:

mooki0
/

HunyuanWorld-Demo

Build error

App Files Files Community

HunyuanWorld-Demo / app.py

mooki0

Initial commit of Gradio app

57276d4 verified 4 months ago

raw

history blame contribute delete

12 kB

	import gradio as gr
	import torch
	import os
	import numpy as np
	import cv2
	from PIL import Image
	import open3d as o3d
	import shutil

	# --- Model Classes (adapted from demo scripts) ---

	# Panorama Generation
	from hy3dworld import Text2PanoramaPipelines, Image2PanoramaPipelines, Perspective

	class Text2PanoramaDemo:
	def __init__(self):
	self.pipe = Text2PanoramaPipelines.from_pretrained(
	"black-forest-labs/FLUX.1-dev",
	torch_dtype=torch.bfloat16
	).to("cuda")
	self.pipe.load_lora_weights(
	"tencent/HunyuanWorld-1",
	subfolder="HunyuanWorld-PanoDiT-Text",
	weight_name="lora.safetensors",
	torch_dtype=torch.bfloat16
	)
	self.pipe.enable_model_cpu_offload()
	self.pipe.enable_vae_tiling()

	def run(self, prompt, negative_prompt, seed, height, width, guidance_scale, steps):
	image = self.pipe(
	prompt,
	height=height,
	width=width,
	negative_prompt=negative_prompt,
	generator=torch.Generator("cuda").manual_seed(seed),
	num_inference_steps=steps,
	guidance_scale=guidance_scale,
	blend_extend=6,
	true_cfg_scale=0.0,
	).images[0]
	return image

	class Image2PanoramaDemo:
	def __init__(self):
	self.pipe = Image2PanoramaPipelines.from_pretrained(
	"black-forest-labs/FLUX.1-dev",
	torch_dtype=torch.bfloat16
	).to("cuda")
	self.pipe.load_lora_weights(
	"tencent/HunyuanWorld-1",
	subfolder="HunyuanWorld-PanoDiT-Image",
	weight_name="lora.safetensors",
	torch_dtype=torch.bfloat16
	)
	self.pipe.enable_model_cpu_offload()
	self.pipe.enable_vae_tiling()
	self.general_negative_prompt = "human, person, people, messy, low-quality, blur, noise, low-resolution"
	self.general_positive_prompt = "high-quality, high-resolution, sharp, clear, 8k"

	def run(self, prompt, negative_prompt, image, seed, height, width, guidance_scale, steps, fov):
	prompt = prompt + ", " + self.general_positive_prompt
	negative_prompt = self.general_negative_prompt + ", " + negative_prompt

	perspective_img = np.array(image)
	height_fov, width_fov = perspective_img.shape[:2]
	ratio = width_fov / height_fov
	w = int((fov / 360) * width)
	h = int(w / ratio)
	perspective_img = cv2.resize(perspective_img, (w, h), interpolation=cv2.INTER_AREA)

	equ = Perspective(perspective_img, fov, 0, 0, crop_bound=False)
	img, mask = equ.GetEquirec(height, width)
	mask = cv2.erode(mask.astype(np.uint8), np.ones((3, 3), np.uint8), iterations=5)
	img = img * mask
	mask = 255 - (mask.astype(np.uint8) * 255)
	mask = Image.fromarray(mask[:, :, 0])
	img = Image.fromarray(cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB))

	image = self.pipe(
	prompt=prompt, image=img, mask_image=mask, height=height, width=width,
	negative_prompt=negative_prompt, guidance_scale=guidance_scale, num_inference_steps=steps,
	generator=torch.Generator("cuda").manual_seed(seed), blend_extend=6, shifting_extend=0, true_cfg_scale=2.0,
	).images[0]
	return image

	# Scene Generation
	from hy3dworld import LayerDecomposition, WorldComposer, process_file

	class HYworldDemo:
	def __init__(self, seed=42):
	target_size = 3840
	kernel_scale = max(1, int(target_size / 1920))
	self.LayerDecomposer = LayerDecomposition()
	self.hy3d_world = WorldComposer(
	device=torch.device("cuda"), resolution=(target_size, target_size // 2),
	seed=seed, filter_mask=True, kernel_scale=kernel_scale,
	)

	def run(self, image_path, labels_fg1, labels_fg2, classes, output_dir):
	os.makedirs(output_dir, exist_ok=True)
	fg1_infos = [{"image_path": image_path, "output_path": output_dir, "labels": labels_fg1, "class": classes}]
	fg2_infos = [{"image_path": os.path.join(output_dir, 'remove_fg1_image.png'), "output_path": output_dir, "labels": labels_fg2, "class": classes}]

	self.LayerDecomposer(fg1_infos, layer=0)
	self.LayerDecomposer(fg2_infos, layer=1)
	self.LayerDecomposer(fg2_infos, layer=2)
	separate_pano, fg_bboxes = self.hy3d_world._load_separate_pano_from_dir(output_dir, sr=True)
	layered_world_mesh = self.hy3d_world.generate_world(separate_pano=separate_pano, fg_bboxes=fg_bboxes, world_type='mesh')

	mesh_files = []
	for layer_idx, layer_info in enumerate(layered_world_mesh):
	output_path = os.path.join(output_dir, f"mesh_layer{layer_idx}.ply")
	o3d.io.write_triangle_mesh(output_path, layer_info['mesh'])
	mesh_files.append(output_path)
	return mesh_files

	# --- Gradio UI ---

	# Instantiate models
	t2p_demo = Text2PanoramaDemo()
	i2p_demo = Image2PanoramaDemo()
	hy_demo = HYworldDemo()

	def generate_text_to_pano(prompt, neg_prompt, seed, height, width, scale, steps):
	image = t2p_demo.run(prompt, neg_prompt, seed, height, width, scale, steps)
	# Save to a temporary file to pass to the next stage
	temp_dir = "temp_outputs"
	os.makedirs(temp_dir, exist_ok=True)
	temp_path = os.path.join(temp_dir, f"pano_{seed}.png")
	image.save(temp_path)
	return image, temp_path

	def generate_image_to_pano(prompt, neg_prompt, image, seed, height, width, scale, steps, fov):
	pil_image = Image.fromarray(image)
	result_image = i2p_demo.run(prompt, neg_prompt, pil_image, seed, height, width, scale, steps, fov)
	temp_dir = "temp_outputs"
	os.makedirs(temp_dir, exist_ok=True)
	temp_path = os.path.join(temp_dir, f"pano_i2p_{seed}.png")
	result_image.save(temp_path)
	return result_image, temp_path

	def generate_scene(panorama_file_path, fg1, fg2, classes, seed):
	if panorama_file_path is None or not os.path.exists(panorama_file_path):
	raise gr.Error("Please generate or upload a panorama image first.")
	output_dir = f"output_scene_{seed}"
	shutil.rmtree(output_dir, ignore_errors=True)
	labels_fg1 = [label.strip() for label in fg1.split(',') if label.strip()]
	labels_fg2 = [label.strip() for label in fg2.split(',') if label.strip()]
	mesh_files = hy_demo.run(panorama_file_path, labels_fg1, labels_fg2, classes, output_dir)

	# For now, let's just display the first layer. Gradio's Model3D doesn't support multiple files well.
	# A better UI might zip and offer for download, or show multiple viewers.
	return mesh_files[0] if mesh_files else None

	css = """
	#col-container {margin-left: auto; margin-right: auto;}
	#pano_output {min-height: 320px;}
	#scene_output {min-height: 480px;}
	"""

	with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
	gr.Markdown("<h1>HunyuanWorld-1.0: A One-Stop Solution for Text-driven 3D Scene Generation</h1>")
	gr.Markdown("Official Repo: [Tencent-Hunyuan/HunyuanWorld-1.0](https://github.com/Tencent-Hunyuan/HunyuanWorld-1.0)")

	# State to hold the path of the generated panorama
	panorama_path_state = gr.State(None)

	with gr.Tabs():
	with gr.TabItem("Step 1: Panorama Generation"):
	with gr.Row():
	with gr.Column():
	with gr.Tabs():
	with gr.TabItem("Text-to-Panorama") as t2p_tab:
	t2p_prompt = gr.Textbox(label="Prompt", value="A beautiful sunset over a mountain range, fantasy style")
	t2p_neg_prompt = gr.Textbox(label="Negative Prompt", value="blurry, low quality")
	t2p_seed = gr.Slider(label="Seed", minimum=0, maximum=10000, step=1, value=42)
	with gr.Accordion("Advanced Settings", open=False):
	t2p_height = gr.Slider(label="Height", minimum=512, maximum=1024, step=64, value=960)
	t2p_width = gr.Slider(label="Width", minimum=1024, maximum=2048, step=128, value=1920)
	t2p_scale = gr.Slider(label="Guidance Scale", minimum=1, maximum=50, step=1, value=30)
	t2p_steps = gr.Slider(label="Inference Steps", minimum=10, maximum=100, step=5, value=50)
	t2p_button = gr.Button("Generate Panorama", variant="primary")

	with gr.TabItem("Image-to-Panorama") as i2p_tab:
	i2p_image = gr.Image(type="numpy", label="Input Image")
	i2p_prompt = gr.Textbox(label="Prompt", value="A photo of a room, modern design")
	i2p_neg_prompt = gr.Textbox(label="Negative Prompt", value="watermark, text")
	i2p_seed = gr.Slider(label="Seed", minimum=0, maximum=10000, step=1, value=100)
	with gr.Accordion("Advanced Settings", open=False):
	i2p_fov = gr.Slider(label="Field of View (FOV)", minimum=40, maximum=120, step=5, value=80)
	i2p_height = gr.Slider(label="Height", minimum=512, maximum=1024, step=64, value=960)
	i2p_width = gr.Slider(label="Width", minimum=1024, maximum=2048, step=128, value=1920)
	i2p_scale = gr.Slider(label="Guidance Scale", minimum=1, maximum=50, step=1, value=30)
	i2p_steps = gr.Slider(label="Inference Steps", minimum=10, maximum=100, step=5, value=50)
	i2p_button = gr.Button("Generate Panorama", variant="primary")

	with gr.Column():
	pano_output = gr.Image(label="Panorama Output", elem_id="pano_output")
	send_to_scene_btn = gr.Button("Step 2: Send to Scene Generation")

	with gr.TabItem("Step 2: Scene Generation") as scene_tab:
	with gr.Row():
	with gr.Column():
	gr.Markdown("Load the panorama generated in Step 1, or upload your own.")
	scene_input_image = gr.Image(type="filepath", label="Input Panorama")
	scene_classes = gr.Radio(["outdoor", "indoor"], label="Scene Class", value="outdoor")
	scene_fg1 = gr.Textbox(label="Foreground Labels (Layer 1)", placeholder="e.g., tree, car, person")
	scene_fg2 = gr.Textbox(label="Foreground Labels (Layer 2)", placeholder="e.g., building, mountain")
	scene_seed = gr.Slider(label="Seed", minimum=0, maximum=10000, step=1, value=2024)
	scene_button = gr.Button("Generate 3D Scene", variant="primary")
	with gr.Column():
	scene_output = gr.Model3D(label="3D Scene Output (.ply)", elem_id="scene_output")

	# Wire up components
	t2p_button.click(
	fn=generate_text_to_pano,
	inputs=[t2p_prompt, t2p_neg_prompt, t2p_seed, t2p_height, t2p_width, t2p_scale, t2p_steps],
	outputs=[pano_output, panorama_path_state]
	)
	i2p_button.click(
	fn=generate_image_to_pano,
	inputs=[i2p_prompt, i2p_neg_prompt, i2p_image, i2p_seed, i2p_height, i2p_width, i2p_scale, i2p_steps, i2p_fov],
	outputs=[pano_output, panorama_path_state]
	)

	def transfer_to_scene_gen(path):
	return {scene_input_image: gr.update(value=path)}

	send_to_scene_btn.click(
	fn=lambda path: path,
	inputs=panorama_path_state,
	outputs=scene_input_image
	).then(
	lambda: gr.Tabs.update(selected=scene_tab),
	outputs=demo.children[1] # This is a bit of a hack to select the tab
	)

	scene_button.click(
	fn=generate_scene,
	inputs=[scene_input_image, scene_fg1, scene_fg2, scene_classes, scene_seed],
	outputs=scene_output
	)

	demo.queue().launch(debug=True)