Spaces:

Mungert
/

UI-TARS-1.5-7B

Running

App Files Files Community

Mungert commited on Aug 12

Commit

7ec32f8

verified ·

1 Parent(s): 7d85a18

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -5

app.py CHANGED Viewed

@@ -128,7 +128,7 @@ def parse_click_coordinates(text: str, img_w: int, img_h: int):
     return None
-# --- Load model/processor ON CPU at import time (required for ZeroGPU) ---
 print(f"Loading model and processor for {MODEL_ID} on CPU startup (ZeroGPU safe)...")
 model = None
 processor = None
@@ -141,7 +141,8 @@ try:
         torch_dtype=torch.float32,  # CPU-safe dtype at import
         trust_remote_code=True,
     )
-    processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
     model.eval()
     model_loaded = True
     print("Model and processor loaded on CPU.")
@@ -179,7 +180,7 @@ def run_inference_localization(
     device: str,
     dtype: torch.dtype,
 ) -> str:
-    text_prompt = apply_chat_template_compat(processor, messages_for_template)
     inputs = processor(
         text=[text_prompt],
@@ -214,7 +215,7 @@ def run_inference_localization(
     return decoded_output[0] if decoded_output else ""
 # --- Gradio processing function (ZeroGPU-visible) ---
-@spaces.GGPU(duration=120)  # keep GPU attached briefly between calls (seconds)
 def predict_click_location(input_pil_image: Image.Image, instruction: str):
     if not model_loaded or not processor or not model:
         return f"Model not loaded. Error: {load_error_message}", None, "device: n/a | dtype: n/a"
@@ -334,7 +335,7 @@ else:
             with gr.Column(scale=1):
                 output_coords_component = gr.Textbox(
-                    label="Predicted Coordinates (Normalized)",
                     interactive=False
                 )
                 output_image_component = gr.Image(

     return None
+# --- Load model/processor ON CPU at import time (ZeroGPU safe) ---
 print(f"Loading model and processor for {MODEL_ID} on CPU startup (ZeroGPU safe)...")
 model = None
 processor = None
         torch_dtype=torch.float32,  # CPU-safe dtype at import
         trust_remote_code=True,
     )
+    # IMPORTANT: use_fast=False to avoid the breaking change error you hit
+    processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True, use_fast=False)
     model.eval()
     model_loaded = True
     print("Model and processor loaded on CPU.")
     device: str,
     dtype: torch.dtype,
 ) -> str:
+    text_prompt = apply_chat_template_compat(processor, messages, ) if False else apply_chat_template_compat(processor, messages_for_template)
     inputs = processor(
         text=[text_prompt],
     return decoded_output[0] if decoded_output else ""
 # --- Gradio processing function (ZeroGPU-visible) ---
+@spaces.GPU(duration=120)  # keep GPU attached briefly between calls (seconds)
 def predict_click_location(input_pil_image: Image.Image, instruction: str):
     if not model_loaded or not processor or not model:
         return f"Model not loaded. Error: {load_error_message}", None, "device: n/a | dtype: n/a"
             with gr.Column(scale=1):
                 output_coords_component = gr.Textbox(
+                    label="Predicted Coordinates / Action",
                     interactive=False
                 )
                 output_image_component = gr.Image(