Spaces:

VanguardAI
/

Arabic-OCR

Sleeping

App Files Files Community

VanguardAI commited on 22 days ago

Commit

ec099ff

verified ·

1 Parent(s): cb39880

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -7

app.py CHANGED Viewed

@@ -3,8 +3,11 @@ import gradio as gr
 import torch
 from PIL import Image
 from qwen_vl_utils import process_vision_info
-from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
 import traceback
 # ========================================
 # AIN VLM MODEL FOR OCR
@@ -66,12 +69,42 @@ def ensure_model_loaded():
             trust_remote_code=True,
         )
-        # Load processor
-        # Note: We handle min_pixels and max_pixels in the process_vision_info step
-        loaded_processor = AutoProcessor.from_pretrained(
-            MODEL_ID,
-            trust_remote_code=True,
-        )
         model = loaded_model
         processor = loaded_processor

 import torch
 from PIL import Image
 from qwen_vl_utils import process_vision_info
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, AutoTokenizer
+from transformers import Qwen2VLProcessor, Qwen2VLImageProcessor
 import traceback
+import json
+import os
 # ========================================
 # AIN VLM MODEL FOR OCR
             trust_remote_code=True,
         )
+        # Load processor with proper configuration
+        # Manual construction to avoid size parameter issues
+        try:
+            # First, try the standard way
+            loaded_processor = AutoProcessor.from_pretrained(
+                MODEL_ID,
+                trust_remote_code=True,
+            )
+            print("✅ Processor loaded successfully (standard method)")
+        except ValueError as e:
+            if "size must contain 'shortest_edge' and 'longest_edge' keys" in str(e):
+                print("⚠️ Standard processor loading failed, trying manual construction...")
+                # Manually construct processor with correct size format
+                try:
+                    # Load tokenizer separately
+                    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+                    # Create image processor with correct size format
+                    image_processor = Qwen2VLImageProcessor(
+                        size={"shortest_edge": 224, "longest_edge": 1120},  # Valid format
+                        do_resize=True,
+                        do_rescale=True,
+                        do_normalize=True,
+                    )
+                    # Create processor from components
+                    loaded_processor = Qwen2VLProcessor(
+                        image_processor=image_processor,
+                        tokenizer=tokenizer,
+                    )
+                    print("✅ Processor loaded successfully (manual construction)")
+                except Exception as manual_error:
+                    print(f"❌ Manual construction also failed: {manual_error}")
+                    raise
+            else:
+                raise
         model = loaded_model
         processor = loaded_processor