Spaces:

VanguardAI
/

Arabic-OCR

Running

App Files Files Community

VanguardAI commited on 11 days ago

Commit

a36f21d

verified ·

1 Parent(s): 9d3935e

Update app.py

Browse files

Files changed (1) hide show

app.py +145 -5

app.py CHANGED Viewed

@@ -16,6 +16,9 @@ from PIL import Image, ImageDraw, ImageFont
 from qwen_vl_utils import process_vision_info
 from transformers import AutoModelForCausalLM, AutoProcessor
 # Constants
 MIN_PIXELS = 3136
 MAX_PIXELS = 11289600
@@ -378,7 +381,7 @@ pdf_cache = {
     "is_parsed": False,
     "results": []
 }
-@spaces.GPU(duration=300)
 def inference(image: Image.Image, prompt: str, max_new_tokens: int = 24000) -> str:
     """Run inference on an image with the given prompt"""
     try:
@@ -451,7 +454,7 @@ def inference(image: Image.Image, prompt: str, max_new_tokens: int = 24000) -> s
         return f"Error during inference: {str(e)}"
-@spaces.GPU(duration=300)
 def _generate_text_and_confidence_for_crop(
     image: Image.Image,
     max_new_tokens: int = 128,
@@ -604,9 +607,51 @@ def process_image(
                 print(f"Error generating markdown: {e}")
                 result['markdown_content'] = raw_output
         except json.JSONDecodeError:
             print("Failed to parse JSON output, using raw output")
             result['markdown_content'] = raw_output
         return result
@@ -813,6 +858,43 @@ def create_gradio_interface():
         color: #0c5460;
         border: 1px solid #b8daff;
     }
     """
     with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Arabic OCR - Document Text Extraction") as demo:
@@ -900,6 +982,30 @@ def create_gradio_interface():
                             interactive=False,
                             height=500
                         )
                     # Editable OCR Results Table
                     with gr.Tab("📊 OCR Results Table"):
                         gr.Markdown("### Editable OCR Results\nReview and edit the extracted text for each detected region")
@@ -981,7 +1087,7 @@ def create_gradio_interface():
             return table_data
         # Event handlers
-        @spaces.GPU(duration=240)
         def process_document(file_path, max_tokens, min_pix, max_pix):
             """Process the uploaded document"""
             global pdf_cache
@@ -1038,8 +1144,23 @@ def create_gradio_interface():
                             first_result['layout_result']
                         )
                     return (
                         first_result['processed_image'],
                         ocr_table_data,
                         markdown_update,
                         first_result['layout_result']
@@ -1071,8 +1192,23 @@ def create_gradio_interface():
                             result['layout_result']
                         )
                     return (
                         result['processed_image'],
                         ocr_table_data,
                         markdown_update,
                         result['layout_result']
@@ -1082,7 +1218,7 @@ def create_gradio_interface():
                 error_msg = f"Error processing document: {str(e)}"
                 print(error_msg)
                 traceback.print_exc()
-                return None, [], error_msg, None
         def handle_file_upload(file_path):
             """Handle file upload and show preview"""
@@ -1111,6 +1247,9 @@ def create_gradio_interface():
                 None,  # image_preview
                 '<div class="page-info">No file loaded</div>',  # page_info
                 None,  # processed_image
                 [],  # ocr_table
                 "Click 'Process Document' to see extracted content...",  # markdown_output
                 None,  # json_output
@@ -1137,7 +1276,7 @@ def create_gradio_interface():
         process_btn.click(
             process_document,
             inputs=[file_input, max_new_tokens, min_pixels, max_pixels],
-            outputs=[processed_image, ocr_table, markdown_output, json_output]
         )
         # The outputs list for the clear button is now correct
@@ -1145,6 +1284,7 @@ def create_gradio_interface():
             clear_all,
             outputs=[
                 file_input, image_preview, page_info, processed_image,
                 ocr_table, markdown_output, json_output
             ]
         )

 from qwen_vl_utils import process_vision_info
 from transformers import AutoModelForCausalLM, AutoProcessor
+# Import Arabic text correction module
+from arabic_corrector import get_corrector
 # Constants
 MIN_PIXELS = 3136
 MAX_PIXELS = 11289600
     "is_parsed": False,
     "results": []
 }
+@spaces.GPU()
 def inference(image: Image.Image, prompt: str, max_new_tokens: int = 24000) -> str:
     """Run inference on an image with the given prompt"""
     try:
         return f"Error during inference: {str(e)}"
+@spaces.GPU()
 def _generate_text_and_confidence_for_crop(
     image: Image.Image,
     max_new_tokens: int = 128,
                 print(f"Error generating markdown: {e}")
                 result['markdown_content'] = raw_output
+            # ✨ ARABIC TEXT CORRECTION: Apply intelligent correction to each text region
+            try:
+                print("🔧 Applying Arabic text correction...")
+                corrector = get_corrector()
+                for idx, item in enumerate(layout_data):
+                    text_content = item.get('text', '')
+                    category = item.get('category', '')
+                    # Only correct text regions (skip pictures, formulas, etc.)
+                    if not text_content or category in ['Picture', 'Formula', 'Table']:
+                        continue
+                    # Apply correction
+                    correction_result = corrector.correct_text(text_content)
+                    # Store both original and corrected versions
+                    item['text_original'] = text_content
+                    item['text_corrected'] = correction_result['corrected']
+                    item['correction_confidence'] = correction_result['overall_confidence']
+                    item['corrections_made'] = correction_result['corrections_made']
+                    item['word_corrections'] = correction_result['words']
+                    # Update the text field to use corrected version
+                    item['text'] = correction_result['corrected']
+                # Regenerate markdown with corrected text
+                corrected_markdown = layoutjson2md(image, layout_data, text_key='text')
+                result['markdown_content_corrected'] = corrected_markdown
+                result['markdown_content_original'] = markdown_content
+                print(f"✅ Correction complete")
+            except Exception as e:
+                print(f"⚠️ Error during Arabic correction: {e}")
+                traceback.print_exc()
+                # Fallback: keep original text
+                result['markdown_content_corrected'] = markdown_content
+                result['markdown_content_original'] = markdown_content
         except json.JSONDecodeError:
             print("Failed to parse JSON output, using raw output")
             result['markdown_content'] = raw_output
+            result['markdown_content_original'] = raw_output
+            result['markdown_content_corrected'] = raw_output
         return result
         color: #0c5460;
         border: 1px solid #b8daff;
     }
+    /* Arabic Correction Styling */
+    .original-text-box {
+        background: #fff5f5 !important;
+        border: 2px solid #fc8181 !important;
+        border-radius: 8px;
+        padding: 15px;
+        min-height: 300px;
+        direction: rtl;
+    }
+    .corrected-text-box {
+        background: #f0fff4 !important;
+        border: 2px solid #68d391 !important;
+        border-radius: 8px;
+        padding: 15px;
+        min-height: 300px;
+        direction: rtl;
+    }
+    .correction-high {
+        background: #c6f6d5;
+        padding: 2px 4px;
+        border-radius: 3px;
+    }
+    .correction-medium {
+        background: #fef5e7;
+        padding: 2px 4px;
+        border-radius: 3px;
+    }
+    .correction-low {
+        background: #ffe0e0;
+        padding: 2px 4px;
+        border-radius: 3px;
+    }
     """
     with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Arabic OCR - Document Text Extraction") as demo:
                             interactive=False,
                             height=500
                         )
+                    # ✨ NEW: Arabic Text Correction Comparison Tab
+                    with gr.Tab("✨ Corrected Text (AI)"):
+                        gr.Markdown("""
+                        ### 🔧 AI-Powered Arabic Text Correction
+                        This tab shows **Original OCR** vs **AI-Corrected** text side-by-side.
+                        Corrections use dictionary matching, context analysis, and linguistic intelligence.
+                        """)
+                        with gr.Row():
+                            with gr.Column():
+                                gr.Markdown("#### 📄 Original OCR Output")
+                                original_text_output = gr.Markdown(
+                                    value="Original text will appear here...",
+                                    elem_classes=["original-text-box"]
+                                )
+                            with gr.Column():
+                                gr.Markdown("#### ✅ Corrected Text")
+                                corrected_text_output = gr.Markdown(
+                                    value="Corrected text will appear here...",
+                                    elem_classes=["corrected-text-box"]
+                                )
+                        correction_stats = gr.Markdown(value="")
                     # Editable OCR Results Table
                     with gr.Tab("📊 OCR Results Table"):
                         gr.Markdown("### Editable OCR Results\nReview and edit the extracted text for each detected region")
             return table_data
         # Event handlers
+        @spaces.GPU()
         def process_document(file_path, max_tokens, min_pix, max_pix):
             """Process the uploaded document"""
             global pdf_cache
                             first_result['layout_result']
                         )
+                    # Prepare correction comparison
+                    original_text = first_result.get('markdown_content_original', first_result.get('markdown_content', ''))
+                    corrected_text = first_result.get('markdown_content_corrected', first_result.get('markdown_content', ''))
+                    # Calculate correction statistics
+                    total_corrections = 0
+                    if first_result.get('layout_result'):
+                        for item in first_result['layout_result']:
+                            total_corrections += item.get('corrections_made', 0)
+                    stats_text = f"### 📊 Correction Statistics\n- **Corrections Made**: {total_corrections}\n- **Method**: Dictionary + Context Analysis"
                     return (
                         first_result['processed_image'],
+                        original_text if is_arabic_text(original_text) else gr.update(value=original_text, rtl=False),
+                        corrected_text if is_arabic_text(corrected_text) else gr.update(value=corrected_text, rtl=False),
+                        stats_text,
                         ocr_table_data,
                         markdown_update,
                         first_result['layout_result']
                             result['layout_result']
                         )
+                    # Prepare correction comparison
+                    original_text = result.get('markdown_content_original', result.get('markdown_content', ''))
+                    corrected_text = result.get('markdown_content_corrected', result.get('markdown_content', ''))
+                    # Calculate correction statistics
+                    total_corrections = 0
+                    if result.get('layout_result'):
+                        for item in result['layout_result']:
+                            total_corrections += item.get('corrections_made', 0)
+                    stats_text = f"### 📊 Correction Statistics\n- **Corrections Made**: {total_corrections}\n- **Method**: Dictionary + Context Analysis"
                     return (
                         result['processed_image'],
+                        original_text if is_arabic_text(original_text) else gr.update(value=original_text, rtl=False),
+                        corrected_text if is_arabic_text(corrected_text) else gr.update(value=corrected_text, rtl=False),
+                        stats_text,
                         ocr_table_data,
                         markdown_update,
                         result['layout_result']
                 error_msg = f"Error processing document: {str(e)}"
                 print(error_msg)
                 traceback.print_exc()
+                return None, "Error", "Error", "Error occurred", [], error_msg, None
         def handle_file_upload(file_path):
             """Handle file upload and show preview"""
                 None,  # image_preview
                 '<div class="page-info">No file loaded</div>',  # page_info
                 None,  # processed_image
+                "Original text will appear here...",  # original_text_output
+                "Corrected text will appear here...",  # corrected_text_output
+                "",  # correction_stats
                 [],  # ocr_table
                 "Click 'Process Document' to see extracted content...",  # markdown_output
                 None,  # json_output
         process_btn.click(
             process_document,
             inputs=[file_input, max_new_tokens, min_pixels, max_pixels],
+            outputs=[processed_image, original_text_output, corrected_text_output, correction_stats, ocr_table, markdown_output, json_output]
         )
         # The outputs list for the clear button is now correct
             clear_all,
             outputs=[
                 file_input, image_preview, page_info, processed_image,
+                original_text_output, corrected_text_output, correction_stats,
                 ocr_table, markdown_output, json_output
             ]
         )