VanguardAI commited on
Commit
a36f21d
Β·
verified Β·
1 Parent(s): 9d3935e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -5
app.py CHANGED
@@ -16,6 +16,9 @@ from PIL import Image, ImageDraw, ImageFont
16
  from qwen_vl_utils import process_vision_info
17
  from transformers import AutoModelForCausalLM, AutoProcessor
18
 
 
 
 
19
  # Constants
20
  MIN_PIXELS = 3136
21
  MAX_PIXELS = 11289600
@@ -378,7 +381,7 @@ pdf_cache = {
378
  "is_parsed": False,
379
  "results": []
380
  }
381
- @spaces.GPU(duration=300)
382
  def inference(image: Image.Image, prompt: str, max_new_tokens: int = 24000) -> str:
383
  """Run inference on an image with the given prompt"""
384
  try:
@@ -451,7 +454,7 @@ def inference(image: Image.Image, prompt: str, max_new_tokens: int = 24000) -> s
451
  return f"Error during inference: {str(e)}"
452
 
453
 
454
- @spaces.GPU(duration=300)
455
  def _generate_text_and_confidence_for_crop(
456
  image: Image.Image,
457
  max_new_tokens: int = 128,
@@ -604,9 +607,51 @@ def process_image(
604
  print(f"Error generating markdown: {e}")
605
  result['markdown_content'] = raw_output
606
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
  except json.JSONDecodeError:
608
  print("Failed to parse JSON output, using raw output")
609
  result['markdown_content'] = raw_output
 
 
610
 
611
  return result
612
 
@@ -813,6 +858,43 @@ def create_gradio_interface():
813
  color: #0c5460;
814
  border: 1px solid #b8daff;
815
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
816
  """
817
 
818
  with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Arabic OCR - Document Text Extraction") as demo:
@@ -900,6 +982,30 @@ def create_gradio_interface():
900
  interactive=False,
901
  height=500
902
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
903
  # Editable OCR Results Table
904
  with gr.Tab("πŸ“Š OCR Results Table"):
905
  gr.Markdown("### Editable OCR Results\nReview and edit the extracted text for each detected region")
@@ -981,7 +1087,7 @@ def create_gradio_interface():
981
  return table_data
982
 
983
  # Event handlers
984
- @spaces.GPU(duration=240)
985
  def process_document(file_path, max_tokens, min_pix, max_pix):
986
  """Process the uploaded document"""
987
  global pdf_cache
@@ -1038,8 +1144,23 @@ def create_gradio_interface():
1038
  first_result['layout_result']
1039
  )
1040
 
 
 
 
 
 
 
 
 
 
 
 
 
1041
  return (
1042
  first_result['processed_image'],
 
 
 
1043
  ocr_table_data,
1044
  markdown_update,
1045
  first_result['layout_result']
@@ -1071,8 +1192,23 @@ def create_gradio_interface():
1071
  result['layout_result']
1072
  )
1073
 
 
 
 
 
 
 
 
 
 
 
 
 
1074
  return (
1075
  result['processed_image'],
 
 
 
1076
  ocr_table_data,
1077
  markdown_update,
1078
  result['layout_result']
@@ -1082,7 +1218,7 @@ def create_gradio_interface():
1082
  error_msg = f"Error processing document: {str(e)}"
1083
  print(error_msg)
1084
  traceback.print_exc()
1085
- return None, [], error_msg, None
1086
 
1087
  def handle_file_upload(file_path):
1088
  """Handle file upload and show preview"""
@@ -1111,6 +1247,9 @@ def create_gradio_interface():
1111
  None, # image_preview
1112
  '<div class="page-info">No file loaded</div>', # page_info
1113
  None, # processed_image
 
 
 
1114
  [], # ocr_table
1115
  "Click 'Process Document' to see extracted content...", # markdown_output
1116
  None, # json_output
@@ -1137,7 +1276,7 @@ def create_gradio_interface():
1137
  process_btn.click(
1138
  process_document,
1139
  inputs=[file_input, max_new_tokens, min_pixels, max_pixels],
1140
- outputs=[processed_image, ocr_table, markdown_output, json_output]
1141
  )
1142
 
1143
  # The outputs list for the clear button is now correct
@@ -1145,6 +1284,7 @@ def create_gradio_interface():
1145
  clear_all,
1146
  outputs=[
1147
  file_input, image_preview, page_info, processed_image,
 
1148
  ocr_table, markdown_output, json_output
1149
  ]
1150
  )
 
16
  from qwen_vl_utils import process_vision_info
17
  from transformers import AutoModelForCausalLM, AutoProcessor
18
 
19
+ # Import Arabic text correction module
20
+ from arabic_corrector import get_corrector
21
+
22
  # Constants
23
  MIN_PIXELS = 3136
24
  MAX_PIXELS = 11289600
 
381
  "is_parsed": False,
382
  "results": []
383
  }
384
+ @spaces.GPU()
385
  def inference(image: Image.Image, prompt: str, max_new_tokens: int = 24000) -> str:
386
  """Run inference on an image with the given prompt"""
387
  try:
 
454
  return f"Error during inference: {str(e)}"
455
 
456
 
457
+ @spaces.GPU()
458
  def _generate_text_and_confidence_for_crop(
459
  image: Image.Image,
460
  max_new_tokens: int = 128,
 
607
  print(f"Error generating markdown: {e}")
608
  result['markdown_content'] = raw_output
609
 
610
+ # ✨ ARABIC TEXT CORRECTION: Apply intelligent correction to each text region
611
+ try:
612
+ print("πŸ”§ Applying Arabic text correction...")
613
+ corrector = get_corrector()
614
+
615
+ for idx, item in enumerate(layout_data):
616
+ text_content = item.get('text', '')
617
+ category = item.get('category', '')
618
+
619
+ # Only correct text regions (skip pictures, formulas, etc.)
620
+ if not text_content or category in ['Picture', 'Formula', 'Table']:
621
+ continue
622
+
623
+ # Apply correction
624
+ correction_result = corrector.correct_text(text_content)
625
+
626
+ # Store both original and corrected versions
627
+ item['text_original'] = text_content
628
+ item['text_corrected'] = correction_result['corrected']
629
+ item['correction_confidence'] = correction_result['overall_confidence']
630
+ item['corrections_made'] = correction_result['corrections_made']
631
+ item['word_corrections'] = correction_result['words']
632
+
633
+ # Update the text field to use corrected version
634
+ item['text'] = correction_result['corrected']
635
+
636
+ # Regenerate markdown with corrected text
637
+ corrected_markdown = layoutjson2md(image, layout_data, text_key='text')
638
+ result['markdown_content_corrected'] = corrected_markdown
639
+ result['markdown_content_original'] = markdown_content
640
+
641
+ print(f"βœ… Correction complete")
642
+
643
+ except Exception as e:
644
+ print(f"⚠️ Error during Arabic correction: {e}")
645
+ traceback.print_exc()
646
+ # Fallback: keep original text
647
+ result['markdown_content_corrected'] = markdown_content
648
+ result['markdown_content_original'] = markdown_content
649
+
650
  except json.JSONDecodeError:
651
  print("Failed to parse JSON output, using raw output")
652
  result['markdown_content'] = raw_output
653
+ result['markdown_content_original'] = raw_output
654
+ result['markdown_content_corrected'] = raw_output
655
 
656
  return result
657
 
 
858
  color: #0c5460;
859
  border: 1px solid #b8daff;
860
  }
861
+
862
+ /* Arabic Correction Styling */
863
+ .original-text-box {
864
+ background: #fff5f5 !important;
865
+ border: 2px solid #fc8181 !important;
866
+ border-radius: 8px;
867
+ padding: 15px;
868
+ min-height: 300px;
869
+ direction: rtl;
870
+ }
871
+
872
+ .corrected-text-box {
873
+ background: #f0fff4 !important;
874
+ border: 2px solid #68d391 !important;
875
+ border-radius: 8px;
876
+ padding: 15px;
877
+ min-height: 300px;
878
+ direction: rtl;
879
+ }
880
+
881
+ .correction-high {
882
+ background: #c6f6d5;
883
+ padding: 2px 4px;
884
+ border-radius: 3px;
885
+ }
886
+
887
+ .correction-medium {
888
+ background: #fef5e7;
889
+ padding: 2px 4px;
890
+ border-radius: 3px;
891
+ }
892
+
893
+ .correction-low {
894
+ background: #ffe0e0;
895
+ padding: 2px 4px;
896
+ border-radius: 3px;
897
+ }
898
  """
899
 
900
  with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Arabic OCR - Document Text Extraction") as demo:
 
982
  interactive=False,
983
  height=500
984
  )
985
+ # ✨ NEW: Arabic Text Correction Comparison Tab
986
+ with gr.Tab("✨ Corrected Text (AI)"):
987
+ gr.Markdown("""
988
+ ### πŸ”§ AI-Powered Arabic Text Correction
989
+ This tab shows **Original OCR** vs **AI-Corrected** text side-by-side.
990
+ Corrections use dictionary matching, context analysis, and linguistic intelligence.
991
+ """)
992
+
993
+ with gr.Row():
994
+ with gr.Column():
995
+ gr.Markdown("#### πŸ“„ Original OCR Output")
996
+ original_text_output = gr.Markdown(
997
+ value="Original text will appear here...",
998
+ elem_classes=["original-text-box"]
999
+ )
1000
+ with gr.Column():
1001
+ gr.Markdown("#### βœ… Corrected Text")
1002
+ corrected_text_output = gr.Markdown(
1003
+ value="Corrected text will appear here...",
1004
+ elem_classes=["corrected-text-box"]
1005
+ )
1006
+
1007
+ correction_stats = gr.Markdown(value="")
1008
+
1009
  # Editable OCR Results Table
1010
  with gr.Tab("πŸ“Š OCR Results Table"):
1011
  gr.Markdown("### Editable OCR Results\nReview and edit the extracted text for each detected region")
 
1087
  return table_data
1088
 
1089
  # Event handlers
1090
+ @spaces.GPU()
1091
  def process_document(file_path, max_tokens, min_pix, max_pix):
1092
  """Process the uploaded document"""
1093
  global pdf_cache
 
1144
  first_result['layout_result']
1145
  )
1146
 
1147
+ # Prepare correction comparison
1148
+ original_text = first_result.get('markdown_content_original', first_result.get('markdown_content', ''))
1149
+ corrected_text = first_result.get('markdown_content_corrected', first_result.get('markdown_content', ''))
1150
+
1151
+ # Calculate correction statistics
1152
+ total_corrections = 0
1153
+ if first_result.get('layout_result'):
1154
+ for item in first_result['layout_result']:
1155
+ total_corrections += item.get('corrections_made', 0)
1156
+
1157
+ stats_text = f"### πŸ“Š Correction Statistics\n- **Corrections Made**: {total_corrections}\n- **Method**: Dictionary + Context Analysis"
1158
+
1159
  return (
1160
  first_result['processed_image'],
1161
+ original_text if is_arabic_text(original_text) else gr.update(value=original_text, rtl=False),
1162
+ corrected_text if is_arabic_text(corrected_text) else gr.update(value=corrected_text, rtl=False),
1163
+ stats_text,
1164
  ocr_table_data,
1165
  markdown_update,
1166
  first_result['layout_result']
 
1192
  result['layout_result']
1193
  )
1194
 
1195
+ # Prepare correction comparison
1196
+ original_text = result.get('markdown_content_original', result.get('markdown_content', ''))
1197
+ corrected_text = result.get('markdown_content_corrected', result.get('markdown_content', ''))
1198
+
1199
+ # Calculate correction statistics
1200
+ total_corrections = 0
1201
+ if result.get('layout_result'):
1202
+ for item in result['layout_result']:
1203
+ total_corrections += item.get('corrections_made', 0)
1204
+
1205
+ stats_text = f"### πŸ“Š Correction Statistics\n- **Corrections Made**: {total_corrections}\n- **Method**: Dictionary + Context Analysis"
1206
+
1207
  return (
1208
  result['processed_image'],
1209
+ original_text if is_arabic_text(original_text) else gr.update(value=original_text, rtl=False),
1210
+ corrected_text if is_arabic_text(corrected_text) else gr.update(value=corrected_text, rtl=False),
1211
+ stats_text,
1212
  ocr_table_data,
1213
  markdown_update,
1214
  result['layout_result']
 
1218
  error_msg = f"Error processing document: {str(e)}"
1219
  print(error_msg)
1220
  traceback.print_exc()
1221
+ return None, "Error", "Error", "Error occurred", [], error_msg, None
1222
 
1223
  def handle_file_upload(file_path):
1224
  """Handle file upload and show preview"""
 
1247
  None, # image_preview
1248
  '<div class="page-info">No file loaded</div>', # page_info
1249
  None, # processed_image
1250
+ "Original text will appear here...", # original_text_output
1251
+ "Corrected text will appear here...", # corrected_text_output
1252
+ "", # correction_stats
1253
  [], # ocr_table
1254
  "Click 'Process Document' to see extracted content...", # markdown_output
1255
  None, # json_output
 
1276
  process_btn.click(
1277
  process_document,
1278
  inputs=[file_input, max_new_tokens, min_pixels, max_pixels],
1279
+ outputs=[processed_image, original_text_output, corrected_text_output, correction_stats, ocr_table, markdown_output, json_output]
1280
  )
1281
 
1282
  # The outputs list for the clear button is now correct
 
1284
  clear_all,
1285
  outputs=[
1286
  file_input, image_preview, page_info, processed_image,
1287
+ original_text_output, corrected_text_output, correction_stats,
1288
  ocr_table, markdown_output, json_output
1289
  ]
1290
  )