Spaces:

firman-ml
/

ocr-ktp

Sleeping

App Files Files Community

ocr-ktp / app.py

firman-ml

Update app.py

e1bb0fe verified 3 months ago

raw

history blame contribute delete

4.68 kB

	import gradio as gr
	import torch
	from transformers import AutoProcessor, AutoModelForImageTextToText
	from PIL import Image
	import re
	import pandas as pd
	import os

	# --- 1. MODEL LOADING (GLOBAL) ---
	# This part runs only once when the Gradio app starts, making it efficient.
	print("Initializing application...")
	MODEL_PATH = "emisilab/model-ocr-ktp-v1"

	# Set device to GPU (cuda) if available, otherwise fallback to CPU
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Using device: {device}")

	# Load the processor and the OCR model from Hugging Face
	print(f"Loading model from {MODEL_PATH}...")
	try:
	processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
	model = AutoModelForImageTextToText.from_pretrained(MODEL_PATH).to(device)
	print("Model loaded successfully.")
	except Exception as e:
	print(f"Error loading model: {e}")
	model = None
	processor = None

	# --- 2. CORE LOGIC: THE EXTRACTION FUNCTION ---
	def extract_ktp_data(image_files):
	"""
	Processes a list of uploaded image files, performs OCR, and extracts structured data.

	Args:
	image_files (list): A list of file path objects from the Gradio File input.

	Returns:
	pandas.DataFrame: A DataFrame containing the extracted data for each image.
	"""
	if not image_files:
	print("No image files provided.")
	return pd.DataFrame(columns=['Filename', 'NIK', 'Nama', 'Tempat Lahir', 'Tanggal Lahir'])

	if not model or not processor:
	raise gr.Error("Model could not be loaded. The application is not functional.")

	print(f"Processing {len(image_files)} image(s)...")
	all_results = []

	# Define the refined regex patterns for data extraction
	patterns = {
	"nik": r'\b\d{16}\b',
	"nama": r'(?<=\b\d{16}\b\s)(.*?)(?=\s+WNI)',
	"tempat_lahir": r'(?<=\d{2}-\d{2}-\d{4}\s)([A-Z]+)',
	"tanggal_lahir": r'\d{2}-\d{2}-\d{4}',
	}

	# Loop through each uploaded file
	for file_path in image_files:
	filename = os.path.basename(file_path)
	print(f"-> Processing: {filename}")

	try:
	image = Image.open(file_path).convert("RGB")

	pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
	generated_ids = model.generate(pixel_values, max_length=1024)
	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	current_image_data = {"Filename": filename}

	for key, pattern in patterns.items():
	match = re.search(pattern, generated_text)
	if match:
	if key in ['nama', 'tempat_lahir']:
	current_image_data[key.replace('_', ' ').title()] = match.group(1).strip()
	else:
	current_image_data[key.replace('_', ' ').title()] = match.group(0).strip()
	else:
	current_image_data[key.replace('_', ' ').title()] = None

	all_results.append(current_image_data)

	except Exception as e:
	print(f"Error processing {filename}: {e}")
	all_results.append({
	"Filename": filename, "NIK": f"Error: {e}", "Nama": None,
	"Tempat Lahir": None, "Tanggal Lahir": None
	})

	results_df = pd.DataFrame(all_results)
	print("Processing complete.")
	return results_df

	# --- 3. UI DEFINITION: THE GRADIO INTERFACE ---

	app_description = f"""
	# KTP (Indonesian ID Card) OCR Extractor 🇮🇩
	This application extracts key information (NIK, Nama, Tempat Lahir, Tanggal Lahir) from Indonesian ID cards (KTP).
	You can upload one or multiple KTP images at once. The results will be displayed in a table below.
	"""

	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown(app_description)

	with gr.Row():
	with gr.Column(scale=1):
	image_input = gr.File(
	label="Upload KTP Images",
	file_count="multiple",
	file_types=["image"],
	type="filepath"
	)
	extract_button = gr.Button("Extract KTP Data", variant="primary")

	with gr.Column(scale=2):
	output_dataframe = gr.DataFrame(
	label="Extracted Information",
	headers=['Filename', 'NIK', 'Nama', 'Tempat Lahir', 'Tanggal Lahir']
	)

	extract_button.click(
	fn=extract_ktp_data,
	inputs=image_input,
	outputs=output_dataframe
	)

	# --- 4. LAUNCH THE APP ---
	if __name__ == "__main__":
	demo.launch()