ocr-ktp / app.py
firman-ml's picture
Update app.py
e1bb0fe verified
import gradio as gr
import torch
from transformers import AutoProcessor, AutoModelForImageTextToText
from PIL import Image
import re
import pandas as pd
import os
# --- 1. MODEL LOADING (GLOBAL) ---
# This part runs only once when the Gradio app starts, making it efficient.
print("Initializing application...")
MODEL_PATH = "emisilab/model-ocr-ktp-v1"
# Set device to GPU (cuda) if available, otherwise fallback to CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# Load the processor and the OCR model from Hugging Face
print(f"Loading model from {MODEL_PATH}...")
try:
processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
model = AutoModelForImageTextToText.from_pretrained(MODEL_PATH).to(device)
print("Model loaded successfully.")
except Exception as e:
print(f"Error loading model: {e}")
model = None
processor = None
# --- 2. CORE LOGIC: THE EXTRACTION FUNCTION ---
def extract_ktp_data(image_files):
"""
Processes a list of uploaded image files, performs OCR, and extracts structured data.
Args:
image_files (list): A list of file path objects from the Gradio File input.
Returns:
pandas.DataFrame: A DataFrame containing the extracted data for each image.
"""
if not image_files:
print("No image files provided.")
return pd.DataFrame(columns=['Filename', 'NIK', 'Nama', 'Tempat Lahir', 'Tanggal Lahir'])
if not model or not processor:
raise gr.Error("Model could not be loaded. The application is not functional.")
print(f"Processing {len(image_files)} image(s)...")
all_results = []
# Define the refined regex patterns for data extraction
patterns = {
"nik": r'\b\d{16}\b',
"nama": r'(?<=\b\d{16}\b\s)(.*?)(?=\s+WNI)',
"tempat_lahir": r'(?<=\d{2}-\d{2}-\d{4}\s)([A-Z]+)',
"tanggal_lahir": r'\d{2}-\d{2}-\d{4}',
}
# Loop through each uploaded file
for file_path in image_files:
filename = os.path.basename(file_path)
print(f"-> Processing: {filename}")
try:
image = Image.open(file_path).convert("RGB")
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
generated_ids = model.generate(pixel_values, max_length=1024)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
current_image_data = {"Filename": filename}
for key, pattern in patterns.items():
match = re.search(pattern, generated_text)
if match:
if key in ['nama', 'tempat_lahir']:
current_image_data[key.replace('_', ' ').title()] = match.group(1).strip()
else:
current_image_data[key.replace('_', ' ').title()] = match.group(0).strip()
else:
current_image_data[key.replace('_', ' ').title()] = None
all_results.append(current_image_data)
except Exception as e:
print(f"Error processing {filename}: {e}")
all_results.append({
"Filename": filename, "NIK": f"Error: {e}", "Nama": None,
"Tempat Lahir": None, "Tanggal Lahir": None
})
results_df = pd.DataFrame(all_results)
print("Processing complete.")
return results_df
# --- 3. UI DEFINITION: THE GRADIO INTERFACE ---
app_description = f"""
# KTP (Indonesian ID Card) OCR Extractor 🇮🇩
This application extracts key information (**NIK, Nama, Tempat Lahir, Tanggal Lahir**) from Indonesian ID cards (KTP).
You can upload one or multiple KTP images at once. The results will be displayed in a table below.
"""
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(app_description)
with gr.Row():
with gr.Column(scale=1):
image_input = gr.File(
label="Upload KTP Images",
file_count="multiple",
file_types=["image"],
type="filepath"
)
extract_button = gr.Button("Extract KTP Data", variant="primary")
with gr.Column(scale=2):
output_dataframe = gr.DataFrame(
label="Extracted Information",
headers=['Filename', 'NIK', 'Nama', 'Tempat Lahir', 'Tanggal Lahir']
)
extract_button.click(
fn=extract_ktp_data,
inputs=image_input,
outputs=output_dataframe
)
# --- 4. LAUNCH THE APP ---
if __name__ == "__main__":
demo.launch()