AnjaliSarawgi commited on
Commit
864fa38
·
1 Parent(s): 6f2b338

clean model final changes

Browse files
Files changed (1) hide show
  1. app.py +60 -19
app.py CHANGED
@@ -45,6 +45,7 @@ from transformers import (
45
  )
46
  from matplotlib import cm
47
  import gradio as gr
 
48
 
49
  # ----------------------------------------------------------------------
50
  # Configuration
@@ -789,7 +790,15 @@ def run_ocr(
789
  plain_text = re.sub(r"<[^>]*>", "", predicted_html.replace("<br>", "\n"))
790
  # Write temporary files
791
 
792
- return overlay_img, predicted_html, df_all
 
 
 
 
 
 
 
 
793
 
794
 
795
  # ----------------------------------------------------------------------
@@ -797,8 +806,23 @@ def run_ocr(
797
  #
798
  def create_gradio_interface():
799
  """Create and return the Gradio Blocks interface."""
800
- with gr.Blocks(title="Old Nepali HTR") as demo:
801
- gr.Markdown("""# Old Nepali HTR (Gradio)\n\nUpload a scanned image and (optionally) a segmentation XML file. Choose preprocessing\nsteps and a highlight metric, then click **Run OCR** to extract the text.\nUncertain tokens are highlighted with tooltips showing alternative predictions.\nYou can edit the plain text below and download it or the full token scores.""")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
802
  with gr.Row():
803
  image_input = gr.Image(type="numpy", label="Upload Image")
804
  # When used as an input, gr.File returns either a file path or bytes
@@ -811,18 +835,34 @@ def create_gradio_interface():
811
  type="binary",
812
  file_types=[".xml"],
813
  )
814
- with gr.Row():
815
- apply_gray_checkbox = gr.Checkbox(label="Convert to Grayscale", value=False)
816
- apply_bin_checkbox = gr.Checkbox(label="Binarize", value=False)
817
- metric_radio = gr.Radio([
818
- "Relative Probability",
819
- "Entropy",
820
- ], label="Highlight tokens by", value="Relative Probability")
821
  run_btn = gr.Button("Run OCR")
822
  # Outputs
823
- overlay_output = gr.Image(label="Detected Regions")
824
- predictions_output = gr.HTML(label="Predictions (HTML)")
825
- df_output = gr.DataFrame(label="Token Scores", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
826
  # txt_file_output = gr.File(label="Download OCR Prediction (.txt)")
827
  # csv_file_output = gr.File(label="Download Token Scores (.csv)")
828
  # Editable text
@@ -830,17 +870,18 @@ def create_gradio_interface():
830
  label="Edit full predicted text", lines=8, interactive=True
831
  )
832
  # download_edited_btn = gr.Button("Download edited text")
833
-
834
  # Callback for OCR
835
- def on_run(image, xml, gray, binarize, metric):
836
- return run_ocr(image, xml, gray, binarize, metric)
 
837
 
838
  run_btn.click(
839
  fn=on_run,
840
- inputs=[image_input, xml_input, apply_gray_checkbox, apply_bin_checkbox, metric_radio],
841
- outputs=[overlay_output, predictions_output, df_output],
 
842
  )
843
-
844
  # Populate editable text with plain text from predictions
845
  def update_edited_text(pred_html):
846
  plain = re.sub(r"<[^>]*>", "", (pred_html or "").replace("<br>", "\n"))
 
45
  )
46
  from matplotlib import cm
47
  import gradio as gr
48
+ import tempfile
49
 
50
  # ----------------------------------------------------------------------
51
  # Configuration
 
790
  plain_text = re.sub(r"<[^>]*>", "", predicted_html.replace("<br>", "\n"))
791
  # Write temporary files
792
 
793
+ # return overlay_img, predicted_html
794
+ # Save plain text to a temporary .txt file
795
+ txt_dir = tempfile.gettempdir()
796
+ txt_path = os.path.join(txt_dir, "predictions.txt")
797
+ with open(txt_path, "w", encoding="utf-8") as f:
798
+ f.write(plain_text)
799
+
800
+
801
+ return overlay_img, predicted_html, txt_path
802
 
803
 
804
  # ----------------------------------------------------------------------
 
806
  #
807
  def create_gradio_interface():
808
  """Create and return the Gradio Blocks interface."""
809
+ with gr.Blocks(title="Handwritten Text Recognition (Old Nepali)") as demo:
810
+ gr.Markdown("""# Handwritten Text Recognition (Old Nepali) \n\nUpload an image and (optionally) a segmentation XML file. Then click **Run OCR** to extract the text.""")
811
+ gr.HTML("""
812
+ <style>
813
+ #prediction-box {
814
+ border: 1px solid #ccc;
815
+ padding: 16px;
816
+ border-radius: 8px;
817
+ background-color: #f9f9f9;
818
+ font-size: 18px;
819
+ line-height: 1.6;
820
+ min-height: 100px;
821
+ }
822
+
823
+ }
824
+ </style>
825
+ """)
826
  with gr.Row():
827
  image_input = gr.Image(type="numpy", label="Upload Image")
828
  # When used as an input, gr.File returns either a file path or bytes
 
835
  type="binary",
836
  file_types=[".xml"],
837
  )
838
+ # with gr.Row():
839
+ # apply_gray_checkbox = gr.Checkbox(label="Convert to Grayscale", value=False)
840
+ # apply_bin_checkbox = gr.Checkbox(label="Binarize", value=False)
841
+ # metric_radio = gr.Radio([
842
+ # "Relative Probability",
843
+ # "Entropy",
844
+ # ], label="Highlight tokens by", value="Relative Probability")
845
  run_btn = gr.Button("Run OCR")
846
  # Outputs
847
+ # overlay_output = gr.Image(label="Detected Regions")
848
+ # # predictions_output = gr.HTML(label="Predictions (HTML)")
849
+ # predictions_output = gr.HTML(
850
+ # label="Predictions (HTML)",
851
+ # elem_id="prediction-box"
852
+ # )
853
+ # df_output = gr.DataFrame(label="Token Scores", interactive=False)
854
+ with gr.Row():
855
+ with gr.Column(scale=2):
856
+ overlay_output = gr.Image(label="Detected Regions")
857
+
858
+ with gr.Column(scale=2):
859
+ predictions_output = gr.HTML(
860
+ label="Predictions (HTML)",
861
+ elem_id="prediction-box"
862
+ )
863
+
864
+ # df_output = gr.DataFrame(label="Token Scores", interactive=False)
865
+
866
  # txt_file_output = gr.File(label="Download OCR Prediction (.txt)")
867
  # csv_file_output = gr.File(label="Download Token Scores (.csv)")
868
  # Editable text
 
870
  label="Edit full predicted text", lines=8, interactive=True
871
  )
872
  # download_edited_btn = gr.Button("Download edited text")
873
+ txt_file_output = gr.File(label="Download OCR Prediction (.txt)")
874
  # Callback for OCR
875
+ def on_run(image, xml):
876
+ return run_ocr(image, xml, False, False, "Relative Probability")
877
+
878
 
879
  run_btn.click(
880
  fn=on_run,
881
+ # inputs=[image_input, xml_input, apply_gray_checkbox, apply_bin_checkbox, metric_radio],
882
+ inputs=[image_input, xml_input],
883
+ outputs=[overlay_output, predictions_output, txt_file_output],
884
  )
 
885
  # Populate editable text with plain text from predictions
886
  def update_edited_text(pred_html):
887
  plain = re.sub(r"<[^>]*>", "", (pred_html or "").replace("<br>", "\n"))