Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -66,11 +66,10 @@ def ensure_model_loaded():
|
|
| 66 |
trust_remote_code=True,
|
| 67 |
)
|
| 68 |
|
| 69 |
-
# Load processor
|
|
|
|
| 70 |
loaded_processor = AutoProcessor.from_pretrained(
|
| 71 |
MODEL_ID,
|
| 72 |
-
min_pixels=MIN_PIXELS,
|
| 73 |
-
max_pixels=MAX_PIXELS,
|
| 74 |
trust_remote_code=True,
|
| 75 |
)
|
| 76 |
|
|
@@ -121,6 +120,7 @@ def extract_text_from_image(
|
|
| 121 |
max_pix = max_pixels if max_pixels else MAX_PIXELS
|
| 122 |
|
| 123 |
# Prepare messages in the format expected by the model
|
|
|
|
| 124 |
messages = [
|
| 125 |
{
|
| 126 |
"role": "user",
|
|
@@ -128,6 +128,8 @@ def extract_text_from_image(
|
|
| 128 |
{
|
| 129 |
"type": "image",
|
| 130 |
"image": image,
|
|
|
|
|
|
|
| 131 |
},
|
| 132 |
{
|
| 133 |
"type": "text",
|
|
@@ -292,16 +294,19 @@ def create_gradio_interface():
|
|
| 292 |
info="Maximum length of extracted text"
|
| 293 |
)
|
| 294 |
|
|
|
|
|
|
|
|
|
|
| 295 |
with gr.Row():
|
| 296 |
min_pixels_input = gr.Number(
|
| 297 |
value=MIN_PIXELS,
|
| 298 |
label="Min Pixels",
|
| 299 |
-
info="
|
| 300 |
)
|
| 301 |
max_pixels_input = gr.Number(
|
| 302 |
value=MAX_PIXELS,
|
| 303 |
label="Max Pixels",
|
| 304 |
-
info="
|
| 305 |
)
|
| 306 |
|
| 307 |
show_prompt_btn = gr.Button("👁️ Show Default Prompt", size="sm")
|
|
|
|
| 66 |
trust_remote_code=True,
|
| 67 |
)
|
| 68 |
|
| 69 |
+
# Load processor
|
| 70 |
+
# Note: We handle min_pixels and max_pixels in the process_vision_info step
|
| 71 |
loaded_processor = AutoProcessor.from_pretrained(
|
| 72 |
MODEL_ID,
|
|
|
|
|
|
|
| 73 |
trust_remote_code=True,
|
| 74 |
)
|
| 75 |
|
|
|
|
| 120 |
max_pix = max_pixels if max_pixels else MAX_PIXELS
|
| 121 |
|
| 122 |
# Prepare messages in the format expected by the model
|
| 123 |
+
# Include min_pixels and max_pixels in the image content for proper resizing
|
| 124 |
messages = [
|
| 125 |
{
|
| 126 |
"role": "user",
|
|
|
|
| 128 |
{
|
| 129 |
"type": "image",
|
| 130 |
"image": image,
|
| 131 |
+
"min_pixels": min_pix,
|
| 132 |
+
"max_pixels": max_pix,
|
| 133 |
},
|
| 134 |
{
|
| 135 |
"type": "text",
|
|
|
|
| 294 |
info="Maximum length of extracted text"
|
| 295 |
)
|
| 296 |
|
| 297 |
+
gr.Markdown("**Image Resolution Settings**")
|
| 298 |
+
gr.Markdown("*Controls the range of visual tokens (4-16384) for balancing quality and speed*")
|
| 299 |
+
|
| 300 |
with gr.Row():
|
| 301 |
min_pixels_input = gr.Number(
|
| 302 |
value=MIN_PIXELS,
|
| 303 |
label="Min Pixels",
|
| 304 |
+
info=f"Default: {MIN_PIXELS:,} (~{MIN_PIXELS//1000}k)"
|
| 305 |
)
|
| 306 |
max_pixels_input = gr.Number(
|
| 307 |
value=MAX_PIXELS,
|
| 308 |
label="Max Pixels",
|
| 309 |
+
info=f"Default: {MAX_PIXELS:,} (~{MAX_PIXELS//1000}k)"
|
| 310 |
)
|
| 311 |
|
| 312 |
show_prompt_btn = gr.Button("👁️ Show Default Prompt", size="sm")
|