|
|
import sys |
|
|
from pathlib import Path |
|
|
from typing import Annotated, Tuple |
|
|
|
|
|
from pydantic import BaseModel, PlainValidator |
|
|
from pydantic_settings import BaseSettings, SettingsConfigDict |
|
|
|
|
|
|
|
|
def _validate_page_range(v: Tuple[int, int]) -> Tuple[int, int]: |
|
|
if v[0] < 1 or v[1] < v[0]: |
|
|
raise ValueError( |
|
|
"Invalid page range: start must be ≥ 1 and end must be ≥ start." |
|
|
) |
|
|
return v |
|
|
|
|
|
|
|
|
PageRange = Annotated[Tuple[int, int], PlainValidator(_validate_page_range)] |
|
|
|
|
|
DEFAULT_PAGE_RANGE: PageRange = (1, sys.maxsize) |
|
|
|
|
|
|
|
|
class DocumentLimits(BaseModel): |
|
|
max_num_pages: int = sys.maxsize |
|
|
max_file_size: int = sys.maxsize |
|
|
page_range: PageRange = DEFAULT_PAGE_RANGE |
|
|
|
|
|
|
|
|
class BatchConcurrencySettings(BaseModel): |
|
|
doc_batch_size: int = 2 |
|
|
doc_batch_concurrency: int = 2 |
|
|
page_batch_size: int = 4 |
|
|
page_batch_concurrency: int = 2 |
|
|
elements_batch_size: int = 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DebugSettings(BaseModel): |
|
|
visualize_cells: bool = False |
|
|
visualize_ocr: bool = False |
|
|
visualize_layout: bool = False |
|
|
visualize_raw_layout: bool = False |
|
|
visualize_tables: bool = False |
|
|
|
|
|
profile_pipeline_timings: bool = False |
|
|
|
|
|
|
|
|
debug_output_path: str = str(Path.cwd() / "debug") |
|
|
|
|
|
|
|
|
class AppSettings(BaseSettings): |
|
|
model_config = SettingsConfigDict(env_prefix="DOCLING_", env_nested_delimiter="_") |
|
|
|
|
|
perf: BatchConcurrencySettings |
|
|
debug: DebugSettings |
|
|
|
|
|
cache_dir: Path = Path.home() / ".cache" / "docling" |
|
|
|
|
|
|
|
|
settings = AppSettings(perf=BatchConcurrencySettings(), debug=DebugSettings()) |
|
|
|