Emu3

Runtime error

App Files Files Community

ryanzhangfan commited on Sep 29, 2024

Commit

db312d6

verified ·

1 Parent(s): 9f2b36a

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -27

app.py CHANGED Viewed

@@ -44,6 +44,9 @@ EMU_GEN_HUB = "BAAI/Emu3-Gen"
 EMU_CHAT_HUB = "BAAI/Emu3-Chat"
 VQ_HUB = "BAAI/Emu3-VisionTokenizer"
 # Prepare models and processors
 # Emu3-Gen model and processor
 gen_model = AutoModelForCausalLM.from_pretrained(
@@ -54,15 +57,6 @@ gen_model = AutoModelForCausalLM.from_pretrained(
     trust_remote_code=True,
 )
-# Emu3-Chat model and processor
-chat_model = AutoModelForCausalLM.from_pretrained(
-    EMU_CHAT_HUB,
-    device_map="cpu",
-    torch_dtype=torch.bfloat16,
-    attn_implementation="flash_attention_2",
-    trust_remote_code=True,
-)
 tokenizer = AutoTokenizer.from_pretrained(EMU_CHAT_HUB, trust_remote_code=True)
 image_processor = AutoImageProcessor.from_pretrained(
     VQ_HUB, trust_remote_code=True
@@ -70,16 +64,16 @@ image_processor = AutoImageProcessor.from_pretrained(
 image_tokenizer = AutoModel.from_pretrained(
     VQ_HUB, device_map="cpu", trust_remote_code=True
 ).eval()
-processor = Emu3Processor(
-    image_processor, image_tokenizer, tokenizer
-)
 print(device)
 gen_model.to(device)
-chat_model.to(device)
 image_tokenizer.to(device)
-@spaces.GPU(duration=120)
 def generate_image(prompt):
     POSITIVE_PROMPT = " masterpiece, film grained, best quality."
     NEGATIVE_PROMPT = (
@@ -139,6 +133,48 @@ def generate_image(prompt):
             return im
     return None
 @spaces.GPU
 def vision_language_understanding(image, text):
     inputs = processor(
@@ -176,19 +212,8 @@ def chat(history, user_input, user_image):
         # Append the user input and response to the history
         history = history + [(image2str(user_image) + "<br>" + user_input, response)]
     else:
-        # history = history + [(user_input, "Currently do not support image genration, please provide an valid image.")]
-        # """
-        # Use Emu3-Gen for image generation
-        generated_image = generate_image(user_input)
-        if generated_image is not None:
-            # Append the user input and generated image to the history
-            history = history + [(user_input, image2str(generated_image))]
-        else:
-            # If image generation failed, respond with an error message
-            history = history + [
-                (user_input, "Sorry, I could not generate an image.")
-            ]
-        # """
     return history, history, gr.update(value=None)
 def clear_input():

 EMU_CHAT_HUB = "BAAI/Emu3-Chat"
 VQ_HUB = "BAAI/Emu3-VisionTokenizer"
+# uncomment to use gen model
+"""
 # Prepare models and processors
 # Emu3-Gen model and processor
 gen_model = AutoModelForCausalLM.from_pretrained(
     trust_remote_code=True,
 )
 tokenizer = AutoTokenizer.from_pretrained(EMU_CHAT_HUB, trust_remote_code=True)
 image_processor = AutoImageProcessor.from_pretrained(
     VQ_HUB, trust_remote_code=True
 image_tokenizer = AutoModel.from_pretrained(
     VQ_HUB, device_map="cpu", trust_remote_code=True
 ).eval()
 print(device)
 gen_model.to(device)
 image_tokenizer.to(device)
+processor = Emu3Processor(
+    image_processor, image_tokenizer, tokenizer
+)
+@spaces.GPU(duration=300)
 def generate_image(prompt):
     POSITIVE_PROMPT = " masterpiece, film grained, best quality."
     NEGATIVE_PROMPT = (
             return im
     return None
+def chat(history, user_input, user_image):
+    if user_image is not None:
+        history = history + [("", "Sorry, gen model do not accept image input")]
+    else:
+        # Use Emu3-Gen for image generation
+        generated_image = generate_image(user_input)
+        if generated_image is not None:
+            # Append the user input and generated image to the history
+            history = history + [(user_input, image2str(generated_image))]
+        else:
+            # If image generation failed, respond with an error message
+            history = history + [
+                (user_input, "Sorry, I could not generate an image.")
+            ]
+    return history, history, gr.update(value=None)
+"""
+# Emu3-Chat model and processor
+chat_model = AutoModelForCausalLM.from_pretrained(
+    EMU_CHAT_HUB,
+    device_map="cpu",
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+    trust_remote_code=True,
+)
+tokenizer = AutoTokenizer.from_pretrained(EMU_CHAT_HUB, trust_remote_code=True)
+image_processor = AutoImageProcessor.from_pretrained(
+    VQ_HUB, trust_remote_code=True
+)
+image_tokenizer = AutoModel.from_pretrained(
+    VQ_HUB, device_map="cpu", trust_remote_code=True
+).eval()
+print(device)
+chat_model.to(device)
+image_tokenizer.to(device)
+processor = Emu3Processor(
+    image_processor, image_tokenizer, tokenizer
+)
 @spaces.GPU
 def vision_language_understanding(image, text):
     inputs = processor(
         # Append the user input and response to the history
         history = history + [(image2str(user_image) + "<br>" + user_input, response)]
     else:
+        history = history + [(user_input, "Sorry, please specify a valid image for vl understanding.")]
     return history, history, gr.update(value=None)
 def clear_input():