Spaces:
Running
Running
| import cv2 as cv | |
| import numpy as np | |
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from ppocr_det import PPOCRDet | |
| from crnn import CRNN | |
| # Download model files from Hugging Face | |
| det_model_path = hf_hub_download( | |
| repo_id="opencv/text_detection_ppocr", | |
| filename="text_detection_en_ppocrv3_2023may.onnx" | |
| ) | |
| rec_model_path = hf_hub_download( | |
| repo_id="opencv/text_recognition_crnn", | |
| filename="text_recognition_CRNN_EN_2021sep.onnx" | |
| ) | |
| # DNN backend and target | |
| backend_id = cv.dnn.DNN_BACKEND_OPENCV | |
| target_id = cv.dnn.DNN_TARGET_CPU | |
| # Detector and recognizer setup | |
| detector = PPOCRDet( | |
| modelPath=det_model_path, | |
| inputSize=[736, 736], | |
| binaryThreshold=0.3, | |
| polygonThreshold=0.5, | |
| maxCandidates=200, | |
| unclipRatio=2.0, | |
| backendId=backend_id, | |
| targetId=target_id | |
| ) | |
| recognizer = CRNN( | |
| modelPath=rec_model_path, | |
| backendId=backend_id, | |
| targetId=target_id | |
| ) | |
| def detect_and_recognize(input_image): | |
| bgr = cv.cvtColor(input_image, cv.COLOR_RGB2BGR) | |
| h_orig, w_orig = input_image.shape[:2] | |
| resized = cv.resize(bgr, (736, 736)) | |
| scale_w = w_orig / 736 | |
| scale_h = h_orig / 736 | |
| # Detect & recognize | |
| det_results, _ = detector.infer(resized) | |
| texts = [recognizer.infer(resized, box.reshape(8)) for box in det_results] | |
| # Prepare canvases | |
| left = input_image.copy() | |
| right = np.ones_like(input_image) * 255 | |
| for box_raw, text in zip(det_results, texts): | |
| # Rescale box to original image coords | |
| box = np.int32([[pt[0] * scale_w, pt[1] * scale_h] for pt in box_raw]) | |
| # Compute box dimensions | |
| xs = box[:, 0] | |
| box_w = xs.max() - xs.min() | |
| # box height (average vertical edges) | |
| h1 = np.linalg.norm(box[1] - box[0]) | |
| h2 = np.linalg.norm(box[2] - box[3]) | |
| box_h = (h1 + h2) / 2.0 | |
| # Initial font scale so text height ≈ 80% of box height | |
| (_, th0), _ = cv.getTextSize(text, cv.FONT_HERSHEY_SIMPLEX, 1.0, 1) | |
| font_scale = (box_h * 0.8) / th0 if th0 > 0 else 1.0 | |
| font_thickness = max(1, int(font_scale)) | |
| # Re-measure text size with this scale | |
| (tw, th), _ = cv.getTextSize(text, cv.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness) | |
| # If text is wider than box or taller than box, scale down to fit | |
| scale_x = box_w / tw if tw > 0 else 1.0 | |
| scale_y = (box_h * 0.8) / th if th > 0 else 1.0 | |
| final_scale = font_scale * min(1.0, scale_x, scale_y) | |
| font_scale = final_scale | |
| font_thickness = max(1, int(np.floor(font_scale))) | |
| # Draw boxes on both panels | |
| cv.polylines(left, [box], isClosed=True, color=(0, 0, 255), thickness=2) | |
| cv.polylines(right, [box], isClosed=True, color=(0, 0, 255), thickness=2) | |
| # Draw text on whiteboard, just above top-left corner | |
| x0, y0 = box[0] | |
| y_text = max(0, int(y0 - 5)) | |
| cv.putText( | |
| right, text, (int(x0), y_text), | |
| cv.FONT_HERSHEY_SIMPLEX, | |
| font_scale, (0, 0, 0), font_thickness | |
| ) | |
| combined = cv.hconcat([left, right]) | |
| return combined | |
| with gr.Blocks(css='''.example * { | |
| font-style: italic; | |
| font-size: 18px !important; | |
| color: #0ea5e9 !important; | |
| }''') as demo: | |
| gr.Markdown("## Scene Text Detection and Recognition (PPOCR + CRNN)") | |
| gr.Markdown("Upload an image with scene text to detect text regions and recognize text using OpenCV DNN with PPOCR + CRNN models.") | |
| input_img = gr.Image(type="numpy", label="Upload Image") | |
| output_img = gr.Image(type="numpy", label="Detected Text Image") | |
| input_img.change(fn=lambda: (None), outputs=output_img) | |
| with gr.Row(): | |
| submit_btn = gr.Button("Submit", variant="primary") | |
| clear_btn = gr.Button("Clear") | |
| submit_btn.click( | |
| fn=detect_and_recognize, | |
| inputs=input_img, | |
| outputs=output_img | |
| ) | |
| clear_btn.click( | |
| fn=lambda: (None, None), | |
| inputs=[], | |
| outputs=[input_img, output_img] | |
| ) | |
| gr.Markdown("Click on any example to try it.", elem_classes=["example"]) | |
| gr.Examples( | |
| examples=[ | |
| ["examples/text_det_test2.jpg"], | |
| ["examples/right.jpg"] | |
| ], | |
| inputs=input_img | |
| ) | |
| gr.Markdown("**Note**: Left side of output shows detected regions, right side shows recognized text.") | |
| if __name__ == "__main__": | |
| demo.launch() | |