dots-ocr-idcard / models.py
tommulder's picture
Prepare for Hugging Face Spaces deployment
e300623
raw
history blame
2.76 kB
"""Pydantic models for Dots.OCR text extraction service.
This module defines the data structures used for API requests,
responses, and internal data processing.
"""
from typing import List, Optional, Dict, Any
from pydantic import BaseModel, Field
class ExtractedField(BaseModel):
"""Individual extracted field from identity document."""
field_name: str = Field(..., description="Standardized field name")
value: Optional[str] = Field(None, description="Extracted field value")
confidence: float = Field(..., ge=0.0, le=1.0, description="Extraction confidence")
source: str = Field(..., description="Source of extraction (MRZ, OCR, VLM)")
class IdCardFields(BaseModel):
"""Structured fields extracted from identity documents."""
document_number: Optional[ExtractedField] = Field(None, description="Document number/ID")
document_type: Optional[ExtractedField] = Field(None, description="Type of document")
issuing_country: Optional[ExtractedField] = Field(None, description="Issuing country code")
issuing_authority: Optional[ExtractedField] = Field(None, description="Issuing authority")
# Personal Information
surname: Optional[ExtractedField] = Field(None, description="Family name/surname")
given_names: Optional[ExtractedField] = Field(None, description="Given names")
nationality: Optional[ExtractedField] = Field(None, description="Nationality code")
date_of_birth: Optional[ExtractedField] = Field(None, description="Date of birth")
gender: Optional[ExtractedField] = Field(None, description="Gender")
place_of_birth: Optional[ExtractedField] = Field(None, description="Place of birth")
# Validity Information
date_of_issue: Optional[ExtractedField] = Field(None, description="Date of issue")
date_of_expiry: Optional[ExtractedField] = Field(None, description="Date of expiry")
personal_number: Optional[ExtractedField] = Field(None, description="Personal number")
# Additional fields for specific document types
optional_data_1: Optional[ExtractedField] = Field(None, description="Optional data field 1")
optional_data_2: Optional[ExtractedField] = Field(None, description="Optional data field 2")
class MRZData(BaseModel):
"""Machine Readable Zone data extracted from identity documents."""
raw_text: str = Field(..., description="Raw MRZ text as extracted")
format_type: str = Field(..., description="MRZ format type (TD1, TD2, TD3, MRVA, MRVB)")
is_valid: bool = Field(..., description="Whether MRZ checksums are valid")
checksum_errors: List[str] = Field(default_factory=list, description="List of checksum validation errors")
confidence: float = Field(..., ge=0.0, le=1.0, description="Extraction confidence score")