Spaces:

firman-ml
/

ocr-ktp

Sleeping

App Files Files Community

firman-ml commited on Aug 31

Commit

481cb03

verified ·

1 Parent(s): 057f514

Create app.py

Browse files

Files changed (1) hide show

app.py +174 -0

app.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import gradio as gr
+import torch
+from transformers import AutoProcessor, AutoModelForImageTextToText
+from PIL import Image
+import re
+import pandas as pd
+import os
+from datetime import datetime
+import pytz
+# --- 1. MODEL LOADING (GLOBAL) ---
+# This part runs only once when the Gradio app starts, making it efficient.
+print("Initializing application...")
+MODEL_PATH = "emisilab/model-ocr-ktp-v1"
+# Set device to GPU (cuda) if available, otherwise fallback to CPU
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# Load the processor and the OCR model from Hugging Face
+print(f"Loading model from {MODEL_PATH}...")
+try:
+    processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
+    model = AutoModelForImageTextToText.from_pretrained(MODEL_PATH).to(device)
+    print("Model loaded successfully.")
+except Exception as e:
+    print(f"Error loading model: {e}")
+    # If the model fails to load, the app is not usable.
+    # We can handle this by raising the exception or setting a flag.
+    model = None
+    processor = None
+# --- 2. CORE LOGIC: THE EXTRACTION FUNCTION ---
+def extract_ktp_data(image_files):
+    """
+    Processes a list of uploaded image files, performs OCR, and extracts structured data.
+    Args:
+        image_files (list): A list of file-like objects from the Gradio File input.
+    Returns:
+        pandas.DataFrame: A DataFrame containing the extracted data for each image.
+    """
+    if not image_files:
+        print("No image files provided.")
+        # Return an empty dataframe with the correct columns if no files are uploaded
+        return pd.DataFrame(columns=['Filename', 'NIK', 'Nama', 'Tempat Lahir', 'Tanggal Lahir'])
+    if not model or not processor:
+        raise gr.Error("Model could not be loaded. The application is not functional.")
+    print(f"Processing {len(image_files)} image(s)...")
+    all_results = []
+    # Define the refined regex patterns for data extraction
+    patterns = {
+        "nik": r'\b\d{16}\b',
+        "nama": r'(?<=\b\d{16}\b\s)(.*?)(?=\s+WNI)',
+        "tempat_lahir": r'(?<=\d{2}-\d{2}-\d{4}\s)([A-Z]+)',
+        "tanggal_lahir": r'\d{2}-\d{2}-\d{4}',
+    }
+    # Loop through each uploaded file
+    for file_obj in image_files:
+        filename = os.path.basename(file_obj.name)
+        print(f"-> Processing: {filename}")
+        try:
+            # Open the image using Pillow
+            image = Image.open(file_obj.name).convert("RGB")
+            # Perform inference
+            pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
+            generated_ids = model.generate(pixel_values, max_length=1024)
+            generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+            # Store results for the current image
+            current_image_data = {"Filename": filename}
+            # Apply regex patterns to the OCR output
+            for key, pattern in patterns.items():
+                match = re.search(pattern, generated_text)
+                if match:
+                    # Specific capture groups are needed for 'nama' and 'tempat_lahir'
+                    if key in ['nama', 'tempat_lahir']:
+                        current_image_data[key.replace('_', ' ').title()] = match.group(1).strip()
+                    else:
+                        current_image_data[key.replace('_', ' ').title()] = match.group(0).strip()
+                else:
+                    # If no match is found, record it as None
+                    current_image_data[key.replace('_', ' ').title()] = None
+            all_results.append(current_image_data)
+        except Exception as e:
+            print(f"Error processing {filename}: {e}")
+            # Add an entry indicating the error for this file
+            all_results.append({
+                "Filename": filename,
+                "NIK": f"Error: {e}",
+                "Nama": None,
+                "Tempat Lahir": None,
+                "Tanggal Lahir": None
+            })
+    # Convert the list of results into a Pandas DataFrame
+    results_df = pd.DataFrame(all_results)
+    print("Processing complete.")
+    return results_df
+# --- 3. UI DEFINITION: THE GRADIO INTERFACE ---
+# Get current time in WIB (Western Indonesia Time) for the description
+jakarta_tz = pytz.timezone('Asia/Jakarta')
+current_time_wib = datetime.now(jakarta_tz).strftime("%A, %B %d, %Y at %I:%M %p WIB")
+# A description for the app header, written in Markdown
+app_description = f"""
+# KTP (Indonesian ID Card) OCR Extractor 🇮🇩
+This application extracts key information (**NIK, Nama, Tempat Lahir, Tanggal Lahir**) from Indonesian ID cards (KTP).
+You can upload one or multiple KTP images at once. The results will be displayed in a table below.
+*Powered by the `emisilab/model-ocr-ktp-v1` model from Hugging Face.*
+\n*Last Updated: {current_time_wib}*
+"""
+# Example images for users to try
+example_images = [
+    "https://huggingface.co/emisilab/model-ocr-ktp-v1/resolve/main/ocr-ktp-1.jpg",
+    "https://huggingface.co/emisilab/model-ocr-ktp-v1/resolve/main/ocr-ktp-2.jpg"
+]
+# Use gr.Blocks() for a custom layout
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    # Title and description
+    gr.Markdown(app_description)
+    with gr.Row():
+        with gr.Column(scale=1):
+            # Input component: Allows multiple image uploads
+            image_input = gr.File(
+                label="Upload KTP Images",
+                file_count="multiple",
+                file_types=["image"],
+                type="file"
+            )
+            # Action button to trigger the process
+            extract_button = gr.Button("Extract KTP Data", variant="primary")
+            # Add examples for users to easily test the app
+            gr.Examples(
+                examples=example_images,
+                inputs=image_input,
+                label="Click an example to try"
+            )
+        with gr.Column(scale=2):
+            # Output component: Displays the results in a table
+            output_dataframe = gr.DataFrame(
+                label="Extracted Information",
+                headers=['Filename', 'NIK', 'Nama', 'Tempat Lahir', 'Tanggal Lahir']
+            )
+    # Connect the button to the function
+    extract_button.click(
+        fn=extract_ktp_data,
+        inputs=image_input,
+        outputs=output_dataframe
+    )
+# --- 4. LAUNCH THE APP ---
+if __name__ == "__main__":
+    demo.launch()