Spaces:

opencv
/

text_recognition_crnn

Running

Abhishek Gola

Added samples

d198adf 5 months ago

4.4 kB

	import cv2 as cv
	import numpy as np
	import gradio as gr
	from huggingface_hub import hf_hub_download
	from ppocr_det import PPOCRDet
	from crnn import CRNN

	# Download model files from Hugging Face
	det_model_path = hf_hub_download(
	repo_id="opencv/text_detection_ppocr",
	filename="text_detection_en_ppocrv3_2023may.onnx"
	)
	rec_model_path = hf_hub_download(
	repo_id="opencv/text_recognition_crnn",
	filename="text_recognition_CRNN_EN_2021sep.onnx"
	)

	# DNN backend and target
	backend_id = cv.dnn.DNN_BACKEND_OPENCV
	target_id = cv.dnn.DNN_TARGET_CPU

	# Detector and recognizer setup
	detector = PPOCRDet(
	modelPath=det_model_path,
	inputSize=[736, 736],
	binaryThreshold=0.3,
	polygonThreshold=0.5,
	maxCandidates=200,
	unclipRatio=2.0,
	backendId=backend_id,
	targetId=target_id
	)

	recognizer = CRNN(
	modelPath=rec_model_path,
	backendId=backend_id,
	targetId=target_id
	)

	def detect_and_recognize(input_image):
	bgr = cv.cvtColor(input_image, cv.COLOR_RGB2BGR)
	h_orig, w_orig = input_image.shape[:2]
	resized = cv.resize(bgr, (736, 736))
	scale_w = w_orig / 736
	scale_h = h_orig / 736

	# Detect & recognize
	det_results, _ = detector.infer(resized)
	texts = [recognizer.infer(resized, box.reshape(8)) for box in det_results]

	# Prepare canvases
	left = input_image.copy()
	right = np.ones_like(input_image) * 255

	for box_raw, text in zip(det_results, texts):
	# Rescale box to original image coords
	box = np.int32([[pt[0] * scale_w, pt[1] * scale_h] for pt in box_raw])

	# Compute box dimensions
	xs = box[:, 0]
	box_w = xs.max() - xs.min()
	# box height (average vertical edges)
	h1 = np.linalg.norm(box[1] - box[0])
	h2 = np.linalg.norm(box[2] - box[3])
	box_h = (h1 + h2) / 2.0

	# Initial font scale so text height ≈ 80% of box height
	(_, th0), _ = cv.getTextSize(text, cv.FONT_HERSHEY_SIMPLEX, 1.0, 1)
	font_scale = (box_h * 0.8) / th0 if th0 > 0 else 1.0
	font_thickness = max(1, int(font_scale))

	# Re-measure text size with this scale
	(tw, th), _ = cv.getTextSize(text, cv.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness)

	# If text is wider than box or taller than box, scale down to fit
	scale_x = box_w / tw if tw > 0 else 1.0
	scale_y = (box_h * 0.8) / th if th > 0 else 1.0
	final_scale = font_scale * min(1.0, scale_x, scale_y)
	font_scale = final_scale
	font_thickness = max(1, int(np.floor(font_scale)))

	# Draw boxes on both panels
	cv.polylines(left, [box], isClosed=True, color=(0, 0, 255), thickness=2)
	cv.polylines(right, [box], isClosed=True, color=(0, 0, 255), thickness=2)

	# Draw text on whiteboard, just above top-left corner
	x0, y0 = box[0]
	y_text = max(0, int(y0 - 5))
	cv.putText(
	right, text, (int(x0), y_text),
	cv.FONT_HERSHEY_SIMPLEX,
	font_scale, (0, 0, 0), font_thickness
	)

	combined = cv.hconcat([left, right])
	return combined

	with gr.Blocks(css='''.example * {
	font-style: italic;
	font-size: 18px !important;
	color: #0ea5e9 !important;
	}''') as demo:

	gr.Markdown("## Scene Text Detection and Recognition (PPOCR + CRNN)")
	gr.Markdown("Upload an image with scene text to detect text regions and recognize text using OpenCV DNN with PPOCR + CRNN models.")

	input_img = gr.Image(type="numpy", label="Upload Image")
	output_img = gr.Image(type="numpy", label="Detected Text Image")

	input_img.change(fn=lambda: (None), outputs=output_img)

	with gr.Row():
	submit_btn = gr.Button("Submit", variant="primary")
	clear_btn = gr.Button("Clear")

	submit_btn.click(
	fn=detect_and_recognize,
	inputs=input_img,
	outputs=output_img
	)

	clear_btn.click(
	fn=lambda: (None, None),
	inputs=[],
	outputs=[input_img, output_img]
	)

	gr.Markdown("Click on any example to try it.", elem_classes=["example"])

	gr.Examples(
	examples=[
	["examples/text_det_test2.jpg"],
	["examples/right.jpg"]
	],
	inputs=input_img
	)

	gr.Markdown("Note: Left side of output shows detected regions, right side shows recognized text.")

	if __name__ == "__main__":
	demo.launch()