Spaces:
Paused
Paused
| """Pydantic models for Dots.OCR text extraction service. | |
| This module defines the data structures used for API requests, | |
| responses, and internal data processing. | |
| """ | |
| from typing import List, Optional, Dict, Any | |
| from pydantic import BaseModel, Field | |
| class ExtractedField(BaseModel): | |
| """Individual extracted field from identity document.""" | |
| field_name: str = Field(..., description="Standardized field name") | |
| value: Optional[str] = Field(None, description="Extracted field value") | |
| confidence: float = Field(..., ge=0.0, le=1.0, description="Extraction confidence") | |
| source: str = Field(..., description="Source of extraction (MRZ, OCR, VLM)") | |
| class IdCardFields(BaseModel): | |
| """Structured fields extracted from identity documents.""" | |
| document_number: Optional[ExtractedField] = Field(None, description="Document number/ID") | |
| document_type: Optional[ExtractedField] = Field(None, description="Type of document") | |
| issuing_country: Optional[ExtractedField] = Field(None, description="Issuing country code") | |
| issuing_authority: Optional[ExtractedField] = Field(None, description="Issuing authority") | |
| # Personal Information | |
| surname: Optional[ExtractedField] = Field(None, description="Family name/surname") | |
| given_names: Optional[ExtractedField] = Field(None, description="Given names") | |
| nationality: Optional[ExtractedField] = Field(None, description="Nationality code") | |
| date_of_birth: Optional[ExtractedField] = Field(None, description="Date of birth") | |
| gender: Optional[ExtractedField] = Field(None, description="Gender") | |
| place_of_birth: Optional[ExtractedField] = Field(None, description="Place of birth") | |
| # Validity Information | |
| date_of_issue: Optional[ExtractedField] = Field(None, description="Date of issue") | |
| date_of_expiry: Optional[ExtractedField] = Field(None, description="Date of expiry") | |
| personal_number: Optional[ExtractedField] = Field(None, description="Personal number") | |
| # Additional fields for specific document types | |
| optional_data_1: Optional[ExtractedField] = Field(None, description="Optional data field 1") | |
| optional_data_2: Optional[ExtractedField] = Field(None, description="Optional data field 2") | |
| class MRZData(BaseModel): | |
| """Machine Readable Zone data extracted from identity documents.""" | |
| raw_text: str = Field(..., description="Raw MRZ text as extracted") | |
| format_type: str = Field(..., description="MRZ format type (TD1, TD2, TD3, MRVA, MRVB)") | |
| is_valid: bool = Field(..., description="Whether MRZ checksums are valid") | |
| checksum_errors: List[str] = Field(default_factory=list, description="List of checksum validation errors") | |
| confidence: float = Field(..., ge=0.0, le=1.0, description="Extraction confidence score") | |