firman-ml commited on
Commit
481cb03
·
verified ·
1 Parent(s): 057f514

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -0
app.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoProcessor, AutoModelForImageTextToText
4
+ from PIL import Image
5
+ import re
6
+ import pandas as pd
7
+ import os
8
+ from datetime import datetime
9
+ import pytz
10
+
11
+ # --- 1. MODEL LOADING (GLOBAL) ---
12
+ # This part runs only once when the Gradio app starts, making it efficient.
13
+ print("Initializing application...")
14
+ MODEL_PATH = "emisilab/model-ocr-ktp-v1"
15
+
16
+ # Set device to GPU (cuda) if available, otherwise fallback to CPU
17
+ device = "cuda" if torch.cuda.is_available() else "cpu"
18
+ print(f"Using device: {device}")
19
+
20
+ # Load the processor and the OCR model from Hugging Face
21
+ print(f"Loading model from {MODEL_PATH}...")
22
+ try:
23
+ processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
24
+ model = AutoModelForImageTextToText.from_pretrained(MODEL_PATH).to(device)
25
+ print("Model loaded successfully.")
26
+ except Exception as e:
27
+ print(f"Error loading model: {e}")
28
+ # If the model fails to load, the app is not usable.
29
+ # We can handle this by raising the exception or setting a flag.
30
+ model = None
31
+ processor = None
32
+
33
+ # --- 2. CORE LOGIC: THE EXTRACTION FUNCTION ---
34
+ def extract_ktp_data(image_files):
35
+ """
36
+ Processes a list of uploaded image files, performs OCR, and extracts structured data.
37
+
38
+ Args:
39
+ image_files (list): A list of file-like objects from the Gradio File input.
40
+
41
+ Returns:
42
+ pandas.DataFrame: A DataFrame containing the extracted data for each image.
43
+ """
44
+ if not image_files:
45
+ print("No image files provided.")
46
+ # Return an empty dataframe with the correct columns if no files are uploaded
47
+ return pd.DataFrame(columns=['Filename', 'NIK', 'Nama', 'Tempat Lahir', 'Tanggal Lahir'])
48
+
49
+ if not model or not processor:
50
+ raise gr.Error("Model could not be loaded. The application is not functional.")
51
+
52
+ print(f"Processing {len(image_files)} image(s)...")
53
+ all_results = []
54
+
55
+ # Define the refined regex patterns for data extraction
56
+ patterns = {
57
+ "nik": r'\b\d{16}\b',
58
+ "nama": r'(?<=\b\d{16}\b\s)(.*?)(?=\s+WNI)',
59
+ "tempat_lahir": r'(?<=\d{2}-\d{2}-\d{4}\s)([A-Z]+)',
60
+ "tanggal_lahir": r'\d{2}-\d{2}-\d{4}',
61
+ }
62
+
63
+ # Loop through each uploaded file
64
+ for file_obj in image_files:
65
+ filename = os.path.basename(file_obj.name)
66
+ print(f"-> Processing: {filename}")
67
+
68
+ try:
69
+ # Open the image using Pillow
70
+ image = Image.open(file_obj.name).convert("RGB")
71
+
72
+ # Perform inference
73
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
74
+ generated_ids = model.generate(pixel_values, max_length=1024)
75
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
76
+
77
+ # Store results for the current image
78
+ current_image_data = {"Filename": filename}
79
+
80
+ # Apply regex patterns to the OCR output
81
+ for key, pattern in patterns.items():
82
+ match = re.search(pattern, generated_text)
83
+ if match:
84
+ # Specific capture groups are needed for 'nama' and 'tempat_lahir'
85
+ if key in ['nama', 'tempat_lahir']:
86
+ current_image_data[key.replace('_', ' ').title()] = match.group(1).strip()
87
+ else:
88
+ current_image_data[key.replace('_', ' ').title()] = match.group(0).strip()
89
+ else:
90
+ # If no match is found, record it as None
91
+ current_image_data[key.replace('_', ' ').title()] = None
92
+
93
+ all_results.append(current_image_data)
94
+
95
+ except Exception as e:
96
+ print(f"Error processing {filename}: {e}")
97
+ # Add an entry indicating the error for this file
98
+ all_results.append({
99
+ "Filename": filename,
100
+ "NIK": f"Error: {e}",
101
+ "Nama": None,
102
+ "Tempat Lahir": None,
103
+ "Tanggal Lahir": None
104
+ })
105
+
106
+ # Convert the list of results into a Pandas DataFrame
107
+ results_df = pd.DataFrame(all_results)
108
+ print("Processing complete.")
109
+ return results_df
110
+
111
+ # --- 3. UI DEFINITION: THE GRADIO INTERFACE ---
112
+
113
+ # Get current time in WIB (Western Indonesia Time) for the description
114
+ jakarta_tz = pytz.timezone('Asia/Jakarta')
115
+ current_time_wib = datetime.now(jakarta_tz).strftime("%A, %B %d, %Y at %I:%M %p WIB")
116
+
117
+ # A description for the app header, written in Markdown
118
+ app_description = f"""
119
+ # KTP (Indonesian ID Card) OCR Extractor 🇮🇩
120
+ This application extracts key information (**NIK, Nama, Tempat Lahir, Tanggal Lahir**) from Indonesian ID cards (KTP).
121
+ You can upload one or multiple KTP images at once. The results will be displayed in a table below.
122
+
123
+ *Powered by the `emisilab/model-ocr-ktp-v1` model from Hugging Face.*
124
+ \n*Last Updated: {current_time_wib}*
125
+ """
126
+
127
+ # Example images for users to try
128
+ example_images = [
129
+ "https://huggingface.co/emisilab/model-ocr-ktp-v1/resolve/main/ocr-ktp-1.jpg",
130
+ "https://huggingface.co/emisilab/model-ocr-ktp-v1/resolve/main/ocr-ktp-2.jpg"
131
+ ]
132
+
133
+ # Use gr.Blocks() for a custom layout
134
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
135
+ # Title and description
136
+ gr.Markdown(app_description)
137
+
138
+ with gr.Row():
139
+ with gr.Column(scale=1):
140
+ # Input component: Allows multiple image uploads
141
+ image_input = gr.File(
142
+ label="Upload KTP Images",
143
+ file_count="multiple",
144
+ file_types=["image"],
145
+ type="file"
146
+ )
147
+
148
+ # Action button to trigger the process
149
+ extract_button = gr.Button("Extract KTP Data", variant="primary")
150
+
151
+ # Add examples for users to easily test the app
152
+ gr.Examples(
153
+ examples=example_images,
154
+ inputs=image_input,
155
+ label="Click an example to try"
156
+ )
157
+
158
+ with gr.Column(scale=2):
159
+ # Output component: Displays the results in a table
160
+ output_dataframe = gr.DataFrame(
161
+ label="Extracted Information",
162
+ headers=['Filename', 'NIK', 'Nama', 'Tempat Lahir', 'Tanggal Lahir']
163
+ )
164
+
165
+ # Connect the button to the function
166
+ extract_button.click(
167
+ fn=extract_ktp_data,
168
+ inputs=image_input,
169
+ outputs=output_dataframe
170
+ )
171
+
172
+ # --- 4. LAUNCH THE APP ---
173
+ if __name__ == "__main__":
174
+ demo.launch()