Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -128,7 +128,7 @@ def parse_click_coordinates(text: str, img_w: int, img_h: int): 
     | 
|
| 128 | 
         | 
| 129 | 
         
             
                return None
         
     | 
| 130 | 
         | 
| 131 | 
         
            -
            # --- Load model/processor ON CPU at import time ( 
     | 
| 132 | 
         
             
            print(f"Loading model and processor for {MODEL_ID} on CPU startup (ZeroGPU safe)...")
         
     | 
| 133 | 
         
             
            model = None
         
     | 
| 134 | 
         
             
            processor = None
         
     | 
| 
         @@ -141,7 +141,8 @@ try: 
     | 
|
| 141 | 
         
             
                    torch_dtype=torch.float32,  # CPU-safe dtype at import
         
     | 
| 142 | 
         
             
                    trust_remote_code=True,
         
     | 
| 143 | 
         
             
                )
         
     | 
| 144 | 
         
            -
                 
     | 
| 
         | 
|
| 145 | 
         
             
                model.eval()
         
     | 
| 146 | 
         
             
                model_loaded = True
         
     | 
| 147 | 
         
             
                print("Model and processor loaded on CPU.")
         
     | 
| 
         @@ -179,7 +180,7 @@ def run_inference_localization( 
     | 
|
| 179 | 
         
             
                device: str,
         
     | 
| 180 | 
         
             
                dtype: torch.dtype,
         
     | 
| 181 | 
         
             
            ) -> str:
         
     | 
| 182 | 
         
            -
                text_prompt = apply_chat_template_compat(processor, messages_for_template)
         
     | 
| 183 | 
         | 
| 184 | 
         
             
                inputs = processor(
         
     | 
| 185 | 
         
             
                    text=[text_prompt],
         
     | 
| 
         @@ -214,7 +215,7 @@ def run_inference_localization( 
     | 
|
| 214 | 
         
             
                return decoded_output[0] if decoded_output else ""
         
     | 
| 215 | 
         | 
| 216 | 
         
             
            # --- Gradio processing function (ZeroGPU-visible) ---
         
     | 
| 217 | 
         
            -
            @spaces. 
     | 
| 218 | 
         
             
            def predict_click_location(input_pil_image: Image.Image, instruction: str):
         
     | 
| 219 | 
         
             
                if not model_loaded or not processor or not model:
         
     | 
| 220 | 
         
             
                    return f"Model not loaded. Error: {load_error_message}", None, "device: n/a | dtype: n/a"
         
     | 
| 
         @@ -334,7 +335,7 @@ else: 
     | 
|
| 334 | 
         | 
| 335 | 
         
             
                        with gr.Column(scale=1):
         
     | 
| 336 | 
         
             
                            output_coords_component = gr.Textbox(
         
     | 
| 337 | 
         
            -
                                label="Predicted Coordinates  
     | 
| 338 | 
         
             
                                interactive=False
         
     | 
| 339 | 
         
             
                            )
         
     | 
| 340 | 
         
             
                            output_image_component = gr.Image(
         
     | 
| 
         | 
|
| 128 | 
         | 
| 129 | 
         
             
                return None
         
     | 
| 130 | 
         | 
| 131 | 
         
            +
            # --- Load model/processor ON CPU at import time (ZeroGPU safe) ---
         
     | 
| 132 | 
         
             
            print(f"Loading model and processor for {MODEL_ID} on CPU startup (ZeroGPU safe)...")
         
     | 
| 133 | 
         
             
            model = None
         
     | 
| 134 | 
         
             
            processor = None
         
     | 
| 
         | 
|
| 141 | 
         
             
                    torch_dtype=torch.float32,  # CPU-safe dtype at import
         
     | 
| 142 | 
         
             
                    trust_remote_code=True,
         
     | 
| 143 | 
         
             
                )
         
     | 
| 144 | 
         
            +
                # IMPORTANT: use_fast=False to avoid the breaking change error you hit
         
     | 
| 145 | 
         
            +
                processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True, use_fast=False)
         
     | 
| 146 | 
         
             
                model.eval()
         
     | 
| 147 | 
         
             
                model_loaded = True
         
     | 
| 148 | 
         
             
                print("Model and processor loaded on CPU.")
         
     | 
| 
         | 
|
| 180 | 
         
             
                device: str,
         
     | 
| 181 | 
         
             
                dtype: torch.dtype,
         
     | 
| 182 | 
         
             
            ) -> str:
         
     | 
| 183 | 
         
            +
                text_prompt = apply_chat_template_compat(processor, messages, ) if False else apply_chat_template_compat(processor, messages_for_template)
         
     | 
| 184 | 
         | 
| 185 | 
         
             
                inputs = processor(
         
     | 
| 186 | 
         
             
                    text=[text_prompt],
         
     | 
| 
         | 
|
| 215 | 
         
             
                return decoded_output[0] if decoded_output else ""
         
     | 
| 216 | 
         | 
| 217 | 
         
             
            # --- Gradio processing function (ZeroGPU-visible) ---
         
     | 
| 218 | 
         
            +
            @spaces.GPU(duration=120)  # keep GPU attached briefly between calls (seconds)
         
     | 
| 219 | 
         
             
            def predict_click_location(input_pil_image: Image.Image, instruction: str):
         
     | 
| 220 | 
         
             
                if not model_loaded or not processor or not model:
         
     | 
| 221 | 
         
             
                    return f"Model not loaded. Error: {load_error_message}", None, "device: n/a | dtype: n/a"
         
     | 
| 
         | 
|
| 335 | 
         | 
| 336 | 
         
             
                        with gr.Column(scale=1):
         
     | 
| 337 | 
         
             
                            output_coords_component = gr.Textbox(
         
     | 
| 338 | 
         
            +
                                label="Predicted Coordinates / Action",
         
     | 
| 339 | 
         
             
                                interactive=False
         
     | 
| 340 | 
         
             
                            )
         
     | 
| 341 | 
         
             
                            output_image_component = gr.Image(
         
     |