VanguardAI commited on
Commit
3ea8059
·
verified ·
1 Parent(s): b4b76e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -66,11 +66,10 @@ def ensure_model_loaded():
66
  trust_remote_code=True,
67
  )
68
 
69
- # Load processor with resolution settings
 
70
  loaded_processor = AutoProcessor.from_pretrained(
71
  MODEL_ID,
72
- min_pixels=MIN_PIXELS,
73
- max_pixels=MAX_PIXELS,
74
  trust_remote_code=True,
75
  )
76
 
@@ -121,6 +120,7 @@ def extract_text_from_image(
121
  max_pix = max_pixels if max_pixels else MAX_PIXELS
122
 
123
  # Prepare messages in the format expected by the model
 
124
  messages = [
125
  {
126
  "role": "user",
@@ -128,6 +128,8 @@ def extract_text_from_image(
128
  {
129
  "type": "image",
130
  "image": image,
 
 
131
  },
132
  {
133
  "type": "text",
@@ -292,16 +294,19 @@ def create_gradio_interface():
292
  info="Maximum length of extracted text"
293
  )
294
 
 
 
 
295
  with gr.Row():
296
  min_pixels_input = gr.Number(
297
  value=MIN_PIXELS,
298
  label="Min Pixels",
299
- info="Minimum image resolution"
300
  )
301
  max_pixels_input = gr.Number(
302
  value=MAX_PIXELS,
303
  label="Max Pixels",
304
- info="Maximum image resolution"
305
  )
306
 
307
  show_prompt_btn = gr.Button("👁️ Show Default Prompt", size="sm")
 
66
  trust_remote_code=True,
67
  )
68
 
69
+ # Load processor
70
+ # Note: We handle min_pixels and max_pixels in the process_vision_info step
71
  loaded_processor = AutoProcessor.from_pretrained(
72
  MODEL_ID,
 
 
73
  trust_remote_code=True,
74
  )
75
 
 
120
  max_pix = max_pixels if max_pixels else MAX_PIXELS
121
 
122
  # Prepare messages in the format expected by the model
123
+ # Include min_pixels and max_pixels in the image content for proper resizing
124
  messages = [
125
  {
126
  "role": "user",
 
128
  {
129
  "type": "image",
130
  "image": image,
131
+ "min_pixels": min_pix,
132
+ "max_pixels": max_pix,
133
  },
134
  {
135
  "type": "text",
 
294
  info="Maximum length of extracted text"
295
  )
296
 
297
+ gr.Markdown("**Image Resolution Settings**")
298
+ gr.Markdown("*Controls the range of visual tokens (4-16384) for balancing quality and speed*")
299
+
300
  with gr.Row():
301
  min_pixels_input = gr.Number(
302
  value=MIN_PIXELS,
303
  label="Min Pixels",
304
+ info=f"Default: {MIN_PIXELS:,} (~{MIN_PIXELS//1000}k)"
305
  )
306
  max_pixels_input = gr.Number(
307
  value=MAX_PIXELS,
308
  label="Max Pixels",
309
+ info=f"Default: {MAX_PIXELS:,} (~{MAX_PIXELS//1000}k)"
310
  )
311
 
312
  show_prompt_btn = gr.Button("👁️ Show Default Prompt", size="sm")