Spaces:

mingyuan
/

ReMoDiffuse

Running

App Files Files Community

ReMoDiffuse / app.py

mingyuan

initial commit

a0d91d3 about 2 years ago

raw

history blame

4.03 kB

	import os
	import sys
	import gradio as gr

	os.makedirs("outputs", exist_ok=True)
	sys.path.insert(0, '.')

	import argparse
	import os.path as osp
	import mmcv
	import numpy as np
	import torch
	from mogen.models import build_architecture
	from mmcv.runner import load_checkpoint
	from mmcv.parallel import MMDataParallel
	from mogen.utils.plot_utils import (
	recover_from_ric,
	plot_3d_motion,
	t2m_kinematic_chain
	)
	from scipy.ndimage import gaussian_filter
	from IPython.display import Image


	def motion_temporal_filter(motion, sigma=1):
	motion = motion.reshape(motion.shape[0], -1)
	for i in range(motion.shape[1]):
	motion[:, i] = gaussian_filter(motion[:, i], sigma=sigma, mode="nearest")
	return motion.reshape(motion.shape[0], -1, 3)


	def plot_t2m(data, result_path, npy_path, caption):
	joint = recover_from_ric(torch.from_numpy(data).float(), 22).numpy()
	joint = motion_temporal_filter(joint, sigma=2.5)
	plot_3d_motion(result_path, t2m_kinematic_chain, joint, title=caption, fps=20)
	if npy_path is not None:
	np.save(npy_path, joint)

	def create_remodiffuse():
	config_path = "configs/remodiffuse/remodiffuse_t2m.py"
	ckpt_path = "logs/remodiffuse/remodiffuse_t2m/latest.pth"
	cfg = mmcv.Config.fromfile(config_path)
	model = build_architecture(cfg.model)
	load_checkpoint(model, ckpt_path, map_location='cpu')
	model.cpu()
	model.eval()
	return model

	def create_motiondiffuse():
	config_path = "configs/motiondiffuse/motiondiffuse_t2m.py"
	ckpt_path = "logs/motiondiffuse/motiondiffuse_t2m/latest.pth"
	cfg = mmcv.Config.fromfile(config_path)
	model = build_architecture(cfg.model)
	load_checkpoint(model, ckpt_path, map_location='cpu')
	model.cpu()
	model.eval()
	return model

	def create_mdm():
	config_path = "configs/mdm/mdm_t2m_official.py"
	ckpt_path = "logs/mdm/mdm_t2m/latest.pth"
	cfg = mmcv.Config.fromfile(config_path)
	model = build_architecture(cfg.model)
	load_checkpoint(model, ckpt_path, map_location='cpu')
	model.cpu()
	model.eval()
	return model

	model_remodiffuse = create_remodiffuse()
	# model_motiondiffuse = create_motiondiffuse()
	# model_mdm = create_mdm()

	mean_path = "data/datasets/human_ml3d/mean.npy"
	std_path = "data/datasets/human_ml3d/std.npy"
	mean = np.load(mean_path)
	std = np.load(std_path)


	def show_generation_result(model, text, motion_length, result_path):
	device = 'cpu'
	motion = torch.zeros(1, motion_length, 263).to(device)
	motion_mask = torch.ones(1, motion_length).to(device)
	motion_length = torch.Tensor([motion_length]).long().to(device)
	model = model.to(device)
	input = {
	'motion': motion,
	'motion_mask': motion_mask,
	'motion_length': motion_length,
	'motion_metas': [{'text': text}],
	}

	all_pred_motion = []
	with torch.no_grad():
	input['inference_kwargs'] = {}
	output_list = []
	output = model(**input)[0]['pred_motion']
	pred_motion = output.cpu().detach().numpy()
	pred_motion = pred_motion * std + mean

	plot_t2m(pred_motion, result_path, None, text)

	def generate(prompt, length):
	if not os.path.exists("outputs"):
	os.mkdir("outputs")
	result_path = "outputs/" + str(hash(prompt)) + ".mp4"
	show_generation_result(model_remodiffuse, prompt, length, result_path)
	return result_path

	demo = gr.Interface(
	fn=generate,
	inputs=["text", gr.Slider(20, 196, value=60)],
	examples=[
	["the man throws a punch with each hand.", 58],
	["a person spins quickly and takes off running.", 29],
	["a person quickly waves with their right hand", 46],
	["a person performing a slight bow", 89],
	],
	outputs="video",
	title="ReMoDiffuse: Retrieval-Augmented Motion Diffusion Model",
	description="This is an interactive demo for ReMoDiffuse. For more information, feel free to visit our project page(https://mingyuan-zhang.github.io/projects/ReMoDiffuse.html).")

	demo.queue()
	demo.launch()