Spaces:

silentchen
/

Shap_Editor_demo

Runtime error

App Files Files Community

silentchen commited on Jun 18, 2024

Commit

5d94b0f

1 Parent(s): 4533120

update space

Browse files

Files changed (1) hide show

app.py +158 -155

app.py CHANGED Viewed

@@ -62,158 +62,6 @@ class Blocks(gr.Blocks):
         return config
-@torch.no_grad()
-def optimize_all(xm, models, initial_noise, noise_start_t, diffusion, latent_model, device, prompt, instruction, rand_seed):
-    state = {}
-    out_gen_1, out_gen_2, out_gen_3, out_gen_4, state = generate_3d_with_shap_e(xm, diffusion, latent_model, device, prompt, rand_seed, state)
-    edited_1, edited_2, edited_3, edited_4, state = _3d_editing(xm, models, diffusion, initial_noise, noise_start_t, device, instruction, rand_seed, state)
-    print(state)
-    return out_gen_1, out_gen_2, out_gen_3, out_gen_4, edited_1, edited_2, edited_3, edited_4
-@spaces.GPU()
-@torch.no_grad()
-def generate_3d_with_shap_e(xm, diffusion, latent_model, device, prompt, rand_seed, state):
-    print("Check if I can use partial")
-    set_seed(rand_seed)
-    batch_size = 4
-    guidance_scale = 15.0
-    xm.renderer.volume.bbox_max = torch.tensor([1.0, 1.0, 1.0]).to(device)
-    xm.renderer.volume.bbox_min = torch.tensor([-1.0, -1.0, -1.0]).to(device)
-    xm.renderer.volume.bbox = torch.stack([xm.renderer.volume.bbox_min, xm.renderer.volume.bbox_max])
-    print("prompt: ", prompt, "rand_seed: ", rand_seed, "state:",  state)
-    latents = sample_latents(
-        batch_size=batch_size,
-        model=latent_model,
-        diffusion=diffusion,
-        guidance_scale=guidance_scale,
-        model_kwargs=dict(texts=[prompt] * batch_size),
-        progress=True,
-        clip_denoised=True,
-        use_fp16=True,
-        use_karras=True,
-        karras_steps=64,
-        sigma_min=1e-3,
-        sigma_max=160,
-        s_churn=0,
-    )
-    prompt_hash = str(hashlib.sha256((prompt + '_' + str(rand_seed)).encode('utf-8')).hexdigest())
-    mesh_path = []
-    output_path = './logs'
-    os.makedirs(os.path.join(output_path, 'source'), exist_ok=True)
-    state['latent'] = []
-    state['prompt'] = prompt
-    state['rand_seed_1'] = rand_seed
-    for i, latent in enumerate(latents):
-        output_path_tmp = os.path.join(output_path, 'source', '{}_{}.obj'.format(prompt_hash, i))
-        t_obj = decode_latent_mesh(xm, latent).tri_mesh()
-        with open(output_path_tmp, 'w') as f:
-            t_obj.write_obj(f)
-        mesh = trimesh.load_mesh(output_path_tmp)
-        angle = np.radians(180)
-        axis = [0, 1, 0]
-        rotation_matrix = trimesh.transformations.rotation_matrix(angle, axis)
-        mesh.apply_transform(rotation_matrix)
-        angle = np.radians(90)
-        axis = [1, 0, 0]
-        rotation_matrix = trimesh.transformations.rotation_matrix(angle, axis)
-        mesh.apply_transform(rotation_matrix)
-        output_path_tmp = os.path.join(output_path, 'source', '{}_{}.obj'.format(prompt_hash, i))
-        mesh.export(output_path_tmp)
-        state['latent'].append(latent.clone().detach().cpu())
-        mesh_path.append(output_path_tmp)
-    del latents
-    return mesh_path[0], mesh_path[1], mesh_path[2], mesh_path[3], state
-@spaces.GPU()
-@torch.no_grad()
-def _3d_editing(xm, models, diffusion, initial_noise, start_t, device, instruction, rand_seed, state):
-    set_seed(rand_seed)
-    mesh_path = []
-    prompt = state['prompt']
-    rand_seed_1 = state['rand_seed_1']
-    print("prompt: ", prompt, "rand_seed: ", rand_seed, "instruction:", instruction, "state:",  state)
-    prompt_hash = str(hashlib.sha256((prompt + '_' + str(rand_seed_1) + '_' + instruction + '_' + str(rand_seed)).encode('utf-8')).hexdigest())
-    if 'santa' in instruction:
-        e_type = 'santa_hat'
-    elif 'rainbow' in instruction:
-        e_type = 'rainbow'
-    elif 'gold' in instruction:
-        e_type = 'golden'
-    elif 'lego' in instruction:
-        e_type = 'lego'
-    elif 'wooden' in instruction:
-        e_type = 'wooden'
-    elif 'cyber' in instruction:
-        e_type = 'cyber'
-    model = load_model('text300M', device=device)
-    with torch.no_grad():
-        new_proj = nn.Linear(1024 * 2, 1024, device=device, dtype=model.wrapped.input_proj.weight.dtype)
-        new_proj.weight = nn.Parameter(torch.zeros_like(new_proj.weight))
-        new_proj.weight[:, :1024].copy_(model.wrapped.input_proj.weight)  #
-        new_proj.bias = nn.Parameter(torch.zeros_like(new_proj.bias))
-        new_proj.bias[:1024].copy_(model.wrapped.input_proj.bias)
-        model.wrapped.input_proj = new_proj
-    ckp = torch.load(hf_hub_download(repo_id='silentchen/Shap_Editor', subfolder='single', filename='{}.pt'.format(e_type)), map_location='cpu')
-    model.load_state_dict(ckp['model'])
-    noise_initial = initial_noise[e_type].to(device)
-    noise_start_t = start_t[e_type]
-    general_save_path = './logs/edited'
-    os.makedirs(general_save_path, exist_ok=True)
-    for i, latent in enumerate(state['latent']):
-        latent = latent.to(device)
-        text_embeddings_clip = model.cached_model_kwargs(1, dict(texts=[instruction]))
-        print("shape of latent: ", latent.clone().unsqueeze(0).shape, "instruction: ", instruction)
-        ref_latent = latent.clone().unsqueeze(0)
-        t_1 = torch.randint(noise_start_t, noise_start_t + 1, (1,), device=device).long()
-        noise_input = diffusion.q_sample(ref_latent, t_1, noise=noise_initial)
-        out_1 = diffusion.p_mean_variance(model, noise_input, t_1, clip_denoised=True,
-                                          model_kwargs=text_embeddings_clip,
-                                          condition_latents=ref_latent)
-        updated_latents = out_1['pred_xstart']
-        if 'santa' in instruction:
-            xm.renderer.volume.bbox_max = torch.tensor([1.0, 1.0, 1.25]).to(device)
-            xm.renderer.volume.bbox_min = torch.tensor([-1.0, -1.0, -1]).to(device)
-            xm.renderer.volume.bbox = torch.stack([xm.renderer.volume.bbox_min, xm.renderer.volume.bbox_max])
-        else:
-            xm.renderer.volume.bbox_max = torch.tensor([1.0, 1.0, 1.0]).to(device)
-            xm.renderer.volume.bbox_min = torch.tensor([-1.0, -1.0, -1.0]).to(device)
-            xm.renderer.volume.bbox = torch.stack([xm.renderer.volume.bbox_min, xm.renderer.volume.bbox_max])
-        for latent_idx, updated_latent in enumerate(updated_latents):
-            output_path = os.path.join(general_save_path, '{}_{}.obj'.format(prompt_hash, i))
-            t = decode_latent_mesh(xm, updated_latent).tri_mesh()
-            with open(output_path, 'w') as f:
-                t.write_obj(f)
-            mesh = trimesh.load_mesh(output_path)
-            angle = np.radians(180)
-            axis = [0, 1, 0]
-            rotation_matrix = trimesh.transformations.rotation_matrix(angle, axis)
-            mesh.apply_transform(rotation_matrix)
-            angle = np.radians(90)
-            axis = [1, 0, 0]
-            rotation_matrix = trimesh.transformations.rotation_matrix(angle, axis)
-            mesh.apply_transform(rotation_matrix)
-            output_path = os.path.join(general_save_path, '{}_{}.obj'.format(prompt_hash, i))
-            mesh.export(output_path)
-            mesh_path.append(output_path)
-    return mesh_path[0], mesh_path[1], mesh_path[2], mesh_path[3], state
 def main():
     css = """
@@ -320,6 +168,161 @@ def main():
         initial_noise[editing_type] = noise_initial
         noise_start_t[editing_type] = ckp['t_start']
         models[editing_type] = tmp_model
     del models
     models = None
@@ -388,13 +391,13 @@ def main():
                 rand_seed = gr.Slider(minimum=0, maximum=1000, step=1, value=445, label="Random seed")
             gen_btn.click(
-                fn=partial(generate_3d_with_shap_e, xm, diffusion, latent_model, device),
                 inputs=[prompt, rand_seed, state],
                 outputs=[out_gen_1, out_gen_2, out_gen_3, out_gen_4, state],
                 queue=False)
             apply_btn.click(
-                fn=partial(_3d_editing, xm, models, diffusion, initial_noise, noise_start_t, device),
                 inputs=[
                     editing_choice[0], rand_seed, state
                 ],
@@ -416,7 +419,7 @@ def main():
                 ],
                 inputs=[prompt, editing_choice[0], rand_seed],
                 outputs=[out_gen_1, out_gen_2, out_gen_3, out_gen_4, edited_1, edited_2, edited_3, edited_4],
-                fn=partial(optimize_all, xm, models, initial_noise, noise_start_t, diffusion, latent_model, device),
                 cache_examples=True,
             )

         return config
 def main():
     css = """
         initial_noise[editing_type] = noise_initial
         noise_start_t[editing_type] = ckp['t_start']
         models[editing_type] = tmp_model
+    @torch.no_grad()
+    def optimize_all(prompt, instruction,
+                     rand_seed):
+        print("Optimizing all")
+        state = {}
+        out_gen_1, out_gen_2, out_gen_3, out_gen_4, state = generate_3d_with_shap_e(prompt, rand_seed, state)
+        edited_1, edited_2, edited_3, edited_4, state = _3d_editing(instruction, rand_seed, state)
+        print(state)
+        return out_gen_1, out_gen_2, out_gen_3, out_gen_4, edited_1, edited_2, edited_3, edited_4
+    @spaces.GPU()
+    @torch.no_grad()
+    def generate_3d_with_shap_e(prompt, rand_seed, state):
+        print("Check if I can use partial")
+        set_seed(rand_seed)
+        batch_size = 4
+        guidance_scale = 15.0
+        xm.renderer.volume.bbox_max = torch.tensor([1.0, 1.0, 1.0]).to(device)
+        xm.renderer.volume.bbox_min = torch.tensor([-1.0, -1.0, -1.0]).to(device)
+        xm.renderer.volume.bbox = torch.stack([xm.renderer.volume.bbox_min, xm.renderer.volume.bbox_max])
+        print("prompt: ", prompt, "rand_seed: ", rand_seed, "state:", state)
+        latents = sample_latents(
+            batch_size=batch_size,
+            model=latent_model,
+            diffusion=diffusion,
+            guidance_scale=guidance_scale,
+            model_kwargs=dict(texts=[prompt] * batch_size),
+            progress=True,
+            clip_denoised=True,
+            use_fp16=True,
+            use_karras=True,
+            karras_steps=64,
+            sigma_min=1e-3,
+            sigma_max=160,
+            s_churn=0,
+        )
+        prompt_hash = str(hashlib.sha256((prompt + '_' + str(rand_seed)).encode('utf-8')).hexdigest())
+        mesh_path = []
+        output_path = './logs'
+        os.makedirs(os.path.join(output_path, 'source'), exist_ok=True)
+        state['latent'] = []
+        state['prompt'] = prompt
+        state['rand_seed_1'] = rand_seed
+        for i, latent in enumerate(latents):
+            output_path_tmp = os.path.join(output_path, 'source', '{}_{}.obj'.format(prompt_hash, i))
+            t_obj = decode_latent_mesh(xm, latent).tri_mesh()
+            with open(output_path_tmp, 'w') as f:
+                t_obj.write_obj(f)
+            mesh = trimesh.load_mesh(output_path_tmp)
+            angle = np.radians(180)
+            axis = [0, 1, 0]
+            rotation_matrix = trimesh.transformations.rotation_matrix(angle, axis)
+            mesh.apply_transform(rotation_matrix)
+            angle = np.radians(90)
+            axis = [1, 0, 0]
+            rotation_matrix = trimesh.transformations.rotation_matrix(angle, axis)
+            mesh.apply_transform(rotation_matrix)
+            output_path_tmp = os.path.join(output_path, 'source', '{}_{}.obj'.format(prompt_hash, i))
+            mesh.export(output_path_tmp)
+            state['latent'].append(latent.clone().detach().cpu())
+            mesh_path.append(output_path_tmp)
+        del latents
+        return mesh_path[0], mesh_path[1], mesh_path[2], mesh_path[3], state
+    @spaces.GPU()
+    @torch.no_grad()
+    def _3d_editing(instruction, rand_seed, state):
+        set_seed(rand_seed)
+        mesh_path = []
+        prompt = state['prompt']
+        rand_seed_1 = state['rand_seed_1']
+        print("prompt: ", prompt, "rand_seed: ", rand_seed, "instruction:", instruction, "state:", state)
+        prompt_hash = str(hashlib.sha256(
+            (prompt + '_' + str(rand_seed_1) + '_' + instruction + '_' + str(rand_seed)).encode('utf-8')).hexdigest())
+        if 'santa' in instruction:
+            e_type = 'santa_hat'
+        elif 'rainbow' in instruction:
+            e_type = 'rainbow'
+        elif 'gold' in instruction:
+            e_type = 'golden'
+        elif 'lego' in instruction:
+            e_type = 'lego'
+        elif 'wooden' in instruction:
+            e_type = 'wooden'
+        elif 'cyber' in instruction:
+            e_type = 'cyber'
+        model = load_model('text300M', device=device)
+        with torch.no_grad():
+            new_proj = nn.Linear(1024 * 2, 1024, device=device, dtype=model.wrapped.input_proj.weight.dtype)
+            new_proj.weight = nn.Parameter(torch.zeros_like(new_proj.weight))
+            new_proj.weight[:, :1024].copy_(model.wrapped.input_proj.weight)  #
+            new_proj.bias = nn.Parameter(torch.zeros_like(new_proj.bias))
+            new_proj.bias[:1024].copy_(model.wrapped.input_proj.bias)
+            model.wrapped.input_proj = new_proj
+        ckp = torch.load(
+            hf_hub_download(repo_id='silentchen/Shap_Editor', subfolder='single', filename='{}.pt'.format(e_type)),
+            map_location='cpu')
+        model.load_state_dict(ckp['model'])
+        noise_initial = initial_noise[e_type].to(device)
+        noise_start_t = noise_start_t[e_type]
+        general_save_path = './logs/edited'
+        os.makedirs(general_save_path, exist_ok=True)
+        for i, latent in enumerate(state['latent']):
+            latent = latent.to(device)
+            text_embeddings_clip = model.cached_model_kwargs(1, dict(texts=[instruction]))
+            print("shape of latent: ", latent.clone().unsqueeze(0).shape, "instruction: ", instruction)
+            ref_latent = latent.clone().unsqueeze(0)
+            t_1 = torch.randint(noise_start_t, noise_start_t + 1, (1,), device=device).long()
+            noise_input = diffusion.q_sample(ref_latent, t_1, noise=noise_initial)
+            out_1 = diffusion.p_mean_variance(model, noise_input, t_1, clip_denoised=True,
+                                              model_kwargs=text_embeddings_clip,
+                                              condition_latents=ref_latent)
+            updated_latents = out_1['pred_xstart']
+            if 'santa' in instruction:
+                xm.renderer.volume.bbox_max = torch.tensor([1.0, 1.0, 1.25]).to(device)
+                xm.renderer.volume.bbox_min = torch.tensor([-1.0, -1.0, -1]).to(device)
+                xm.renderer.volume.bbox = torch.stack([xm.renderer.volume.bbox_min, xm.renderer.volume.bbox_max])
+            else:
+                xm.renderer.volume.bbox_max = torch.tensor([1.0, 1.0, 1.0]).to(device)
+                xm.renderer.volume.bbox_min = torch.tensor([-1.0, -1.0, -1.0]).to(device)
+                xm.renderer.volume.bbox = torch.stack([xm.renderer.volume.bbox_min, xm.renderer.volume.bbox_max])
+            for latent_idx, updated_latent in enumerate(updated_latents):
+                output_path = os.path.join(general_save_path, '{}_{}.obj'.format(prompt_hash, i))
+                t = decode_latent_mesh(xm, updated_latent).tri_mesh()
+                with open(output_path, 'w') as f:
+                    t.write_obj(f)
+                mesh = trimesh.load_mesh(output_path)
+                angle = np.radians(180)
+                axis = [0, 1, 0]
+                rotation_matrix = trimesh.transformations.rotation_matrix(angle, axis)
+                mesh.apply_transform(rotation_matrix)
+                angle = np.radians(90)
+                axis = [1, 0, 0]
+                rotation_matrix = trimesh.transformations.rotation_matrix(angle, axis)
+                mesh.apply_transform(rotation_matrix)
+                output_path = os.path.join(general_save_path, '{}_{}.obj'.format(prompt_hash, i))
+                mesh.export(output_path)
+                mesh_path.append(output_path)
+        return mesh_path[0], mesh_path[1], mesh_path[2], mesh_path[3], state
     del models
     models = None
                 rand_seed = gr.Slider(minimum=0, maximum=1000, step=1, value=445, label="Random seed")
             gen_btn.click(
+                fn=generate_3d_with_shap_e,
                 inputs=[prompt, rand_seed, state],
                 outputs=[out_gen_1, out_gen_2, out_gen_3, out_gen_4, state],
                 queue=False)
             apply_btn.click(
+                fn=_3d_editing,
                 inputs=[
                     editing_choice[0], rand_seed, state
                 ],
                 ],
                 inputs=[prompt, editing_choice[0], rand_seed],
                 outputs=[out_gen_1, out_gen_2, out_gen_3, out_gen_4, edited_1, edited_2, edited_3, edited_4],
+                fn=optimize_all,
                 cache_examples=True,
             )