File size: 4,684 Bytes
481cb03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62ea705
481cb03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62ea705
 
481cb03
 
 
62ea705
481cb03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62ea705
 
481cb03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eba7ff5
481cb03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62ea705
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import gradio as gr
import torch
from transformers import AutoProcessor, AutoModelForImageTextToText
from PIL import Image
import re
import pandas as pd
import os

# --- 1. MODEL LOADING (GLOBAL) ---
# This part runs only once when the Gradio app starts, making it efficient.
print("Initializing application...")
MODEL_PATH = "emisilab/model-ocr-ktp-v1"

# Set device to GPU (cuda) if available, otherwise fallback to CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load the processor and the OCR model from Hugging Face
print(f"Loading model from {MODEL_PATH}...")
try:
    processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
    model = AutoModelForImageTextToText.from_pretrained(MODEL_PATH).to(device)
    print("Model loaded successfully.")
except Exception as e:
    print(f"Error loading model: {e}")
    model = None
    processor = None

# --- 2. CORE LOGIC: THE EXTRACTION FUNCTION ---
def extract_ktp_data(image_files):
    """
    Processes a list of uploaded image files, performs OCR, and extracts structured data.
    
    Args:
        image_files (list): A list of file path objects from the Gradio File input.

    Returns:
        pandas.DataFrame: A DataFrame containing the extracted data for each image.
    """
    if not image_files:
        print("No image files provided.")
        return pd.DataFrame(columns=['Filename', 'NIK', 'Nama', 'Tempat Lahir', 'Tanggal Lahir'])
        
    if not model or not processor:
        raise gr.Error("Model could not be loaded. The application is not functional.")

    print(f"Processing {len(image_files)} image(s)...")
    all_results = []

    # Define the refined regex patterns for data extraction
    patterns = {
        "nik": r'\b\d{16}\b',
        "nama": r'(?<=\b\d{16}\b\s)(.*?)(?=\s+WNI)',
        "tempat_lahir": r'(?<=\d{2}-\d{2}-\d{4}\s)([A-Z]+)',
        "tanggal_lahir": r'\d{2}-\d{2}-\d{4}',
    }

    # Loop through each uploaded file
    for file_path in image_files:
        filename = os.path.basename(file_path)
        print(f"-> Processing: {filename}")
        
        try:
            image = Image.open(file_path).convert("RGB")
            
            pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
            generated_ids = model.generate(pixel_values, max_length=1024)
            generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
            
            current_image_data = {"Filename": filename}
            
            for key, pattern in patterns.items():
                match = re.search(pattern, generated_text)
                if match:
                    if key in ['nama', 'tempat_lahir']:
                        current_image_data[key.replace('_', ' ').title()] = match.group(1).strip()
                    else:
                        current_image_data[key.replace('_', ' ').title()] = match.group(0).strip()
                else:
                    current_image_data[key.replace('_', ' ').title()] = None
            
            all_results.append(current_image_data)

        except Exception as e:
            print(f"Error processing {filename}: {e}")
            all_results.append({
                "Filename": filename, "NIK": f"Error: {e}", "Nama": None, 
                "Tempat Lahir": None, "Tanggal Lahir": None
            })

    results_df = pd.DataFrame(all_results)
    print("Processing complete.")
    return results_df

# --- 3. UI DEFINITION: THE GRADIO INTERFACE ---

app_description = f"""
# KTP (Indonesian ID Card) OCR Extractor 🇮🇩
This application extracts key information (**NIK, Nama, Tempat Lahir, Tanggal Lahir**) from Indonesian ID cards (KTP). 
You can upload one or multiple KTP images at once. The results will be displayed in a table below.
"""

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(app_description)

    with gr.Row():
        with gr.Column(scale=1):
            image_input = gr.File(
                label="Upload KTP Images",
                file_count="multiple",
                file_types=["image"],
                type="filepath"
            )
            extract_button = gr.Button("Extract KTP Data", variant="primary")

        with gr.Column(scale=2):
            output_dataframe = gr.DataFrame(
                label="Extracted Information",
                headers=['Filename', 'NIK', 'Nama', 'Tempat Lahir', 'Tanggal Lahir']
            )

    extract_button.click(
        fn=extract_ktp_data,
        inputs=image_input,
        outputs=output_dataframe
    )

# --- 4. LAUNCH THE APP ---
if __name__ == "__main__":
    demo.launch()