Spaces:

VanguardAI
/

Arabic-OCR

Running

App Files Files Community

VanguardAI commited on 15 days ago

Commit

3ea8059

verified ·

1 Parent(s): b4b76e6

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -5

app.py CHANGED Viewed

@@ -66,11 +66,10 @@ def ensure_model_loaded():
             trust_remote_code=True,
         )
-        # Load processor with resolution settings
         loaded_processor = AutoProcessor.from_pretrained(
             MODEL_ID,
-            min_pixels=MIN_PIXELS,
-            max_pixels=MAX_PIXELS,
             trust_remote_code=True,
         )
@@ -121,6 +120,7 @@ def extract_text_from_image(
         max_pix = max_pixels if max_pixels else MAX_PIXELS
         # Prepare messages in the format expected by the model
         messages = [
             {
                 "role": "user",
@@ -128,6 +128,8 @@ def extract_text_from_image(
                     {
                         "type": "image",
                         "image": image,
                     },
                     {
                         "type": "text",
@@ -292,16 +294,19 @@ def create_gradio_interface():
                         info="Maximum length of extracted text"
                     )
                     with gr.Row():
                         min_pixels_input = gr.Number(
                             value=MIN_PIXELS,
                             label="Min Pixels",
-                            info="Minimum image resolution"
                         )
                         max_pixels_input = gr.Number(
                             value=MAX_PIXELS,
                             label="Max Pixels",
-                            info="Maximum image resolution"
                         )
                     show_prompt_btn = gr.Button("👁️ Show Default Prompt", size="sm")

             trust_remote_code=True,
         )
+        # Load processor
+        # Note: We handle min_pixels and max_pixels in the process_vision_info step
         loaded_processor = AutoProcessor.from_pretrained(
             MODEL_ID,
             trust_remote_code=True,
         )
         max_pix = max_pixels if max_pixels else MAX_PIXELS
         # Prepare messages in the format expected by the model
+        # Include min_pixels and max_pixels in the image content for proper resizing
         messages = [
             {
                 "role": "user",
                     {
                         "type": "image",
                         "image": image,
+                        "min_pixels": min_pix,
+                        "max_pixels": max_pix,
                     },
                     {
                         "type": "text",
                         info="Maximum length of extracted text"
                     )
+                    gr.Markdown("**Image Resolution Settings**")
+                    gr.Markdown("*Controls the range of visual tokens (4-16384) for balancing quality and speed*")
                     with gr.Row():
                         min_pixels_input = gr.Number(
                             value=MIN_PIXELS,
                             label="Min Pixels",
+                            info=f"Default: {MIN_PIXELS:,} (~{MIN_PIXELS//1000}k)"
                         )
                         max_pixels_input = gr.Number(
                             value=MAX_PIXELS,
                             label="Max Pixels",
+                            info=f"Default: {MAX_PIXELS:,} (~{MAX_PIXELS//1000}k)"
                         )
                     show_prompt_btn = gr.Button("👁️ Show Default Prompt", size="sm")