File size: 4,684 Bytes
481cb03 62ea705 481cb03 62ea705 481cb03 62ea705 481cb03 62ea705 481cb03 eba7ff5 481cb03 62ea705 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import gradio as gr
import torch
from transformers import AutoProcessor, AutoModelForImageTextToText
from PIL import Image
import re
import pandas as pd
import os
# --- 1. MODEL LOADING (GLOBAL) ---
# This part runs only once when the Gradio app starts, making it efficient.
print("Initializing application...")
MODEL_PATH = "emisilab/model-ocr-ktp-v1"
# Set device to GPU (cuda) if available, otherwise fallback to CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# Load the processor and the OCR model from Hugging Face
print(f"Loading model from {MODEL_PATH}...")
try:
processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
model = AutoModelForImageTextToText.from_pretrained(MODEL_PATH).to(device)
print("Model loaded successfully.")
except Exception as e:
print(f"Error loading model: {e}")
model = None
processor = None
# --- 2. CORE LOGIC: THE EXTRACTION FUNCTION ---
def extract_ktp_data(image_files):
"""
Processes a list of uploaded image files, performs OCR, and extracts structured data.
Args:
image_files (list): A list of file path objects from the Gradio File input.
Returns:
pandas.DataFrame: A DataFrame containing the extracted data for each image.
"""
if not image_files:
print("No image files provided.")
return pd.DataFrame(columns=['Filename', 'NIK', 'Nama', 'Tempat Lahir', 'Tanggal Lahir'])
if not model or not processor:
raise gr.Error("Model could not be loaded. The application is not functional.")
print(f"Processing {len(image_files)} image(s)...")
all_results = []
# Define the refined regex patterns for data extraction
patterns = {
"nik": r'\b\d{16}\b',
"nama": r'(?<=\b\d{16}\b\s)(.*?)(?=\s+WNI)',
"tempat_lahir": r'(?<=\d{2}-\d{2}-\d{4}\s)([A-Z]+)',
"tanggal_lahir": r'\d{2}-\d{2}-\d{4}',
}
# Loop through each uploaded file
for file_path in image_files:
filename = os.path.basename(file_path)
print(f"-> Processing: {filename}")
try:
image = Image.open(file_path).convert("RGB")
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
generated_ids = model.generate(pixel_values, max_length=1024)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
current_image_data = {"Filename": filename}
for key, pattern in patterns.items():
match = re.search(pattern, generated_text)
if match:
if key in ['nama', 'tempat_lahir']:
current_image_data[key.replace('_', ' ').title()] = match.group(1).strip()
else:
current_image_data[key.replace('_', ' ').title()] = match.group(0).strip()
else:
current_image_data[key.replace('_', ' ').title()] = None
all_results.append(current_image_data)
except Exception as e:
print(f"Error processing {filename}: {e}")
all_results.append({
"Filename": filename, "NIK": f"Error: {e}", "Nama": None,
"Tempat Lahir": None, "Tanggal Lahir": None
})
results_df = pd.DataFrame(all_results)
print("Processing complete.")
return results_df
# --- 3. UI DEFINITION: THE GRADIO INTERFACE ---
app_description = f"""
# KTP (Indonesian ID Card) OCR Extractor 🇮🇩
This application extracts key information (**NIK, Nama, Tempat Lahir, Tanggal Lahir**) from Indonesian ID cards (KTP).
You can upload one or multiple KTP images at once. The results will be displayed in a table below.
"""
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(app_description)
with gr.Row():
with gr.Column(scale=1):
image_input = gr.File(
label="Upload KTP Images",
file_count="multiple",
file_types=["image"],
type="filepath"
)
extract_button = gr.Button("Extract KTP Data", variant="primary")
with gr.Column(scale=2):
output_dataframe = gr.DataFrame(
label="Extracted Information",
headers=['Filename', 'NIK', 'Nama', 'Tempat Lahir', 'Tanggal Lahir']
)
extract_button.click(
fn=extract_ktp_data,
inputs=image_input,
outputs=output_dataframe
)
# --- 4. LAUNCH THE APP ---
if __name__ == "__main__":
demo.launch()
|