|
|
import time |
|
|
from datetime import datetime |
|
|
from enum import Enum |
|
|
from typing import TYPE_CHECKING, List |
|
|
|
|
|
import numpy as np |
|
|
from pydantic import BaseModel |
|
|
|
|
|
from docling.datamodel.settings import settings |
|
|
|
|
|
if TYPE_CHECKING: |
|
|
from docling.datamodel.document import ConversionResult |
|
|
|
|
|
|
|
|
class ProfilingScope(str, Enum): |
|
|
PAGE = "page" |
|
|
DOCUMENT = "document" |
|
|
|
|
|
|
|
|
class ProfilingItem(BaseModel): |
|
|
scope: ProfilingScope |
|
|
count: int = 0 |
|
|
times: List[float] = [] |
|
|
start_timestamps: List[datetime] = [] |
|
|
|
|
|
def avg(self) -> float: |
|
|
return np.average(self.times) |
|
|
|
|
|
def std(self) -> float: |
|
|
return np.std(self.times) |
|
|
|
|
|
def mean(self) -> float: |
|
|
return np.mean(self.times) |
|
|
|
|
|
def percentile(self, perc: float) -> float: |
|
|
return np.percentile(self.times, perc) |
|
|
|
|
|
|
|
|
class TimeRecorder: |
|
|
def __init__( |
|
|
self, |
|
|
conv_res: "ConversionResult", |
|
|
key: str, |
|
|
scope: ProfilingScope = ProfilingScope.PAGE, |
|
|
): |
|
|
if settings.debug.profile_pipeline_timings: |
|
|
if key not in conv_res.timings.keys(): |
|
|
conv_res.timings[key] = ProfilingItem(scope=scope) |
|
|
self.conv_res = conv_res |
|
|
self.key = key |
|
|
|
|
|
def __enter__(self): |
|
|
if settings.debug.profile_pipeline_timings: |
|
|
self.start = time.monotonic() |
|
|
self.conv_res.timings[self.key].start_timestamps.append(datetime.utcnow()) |
|
|
return self |
|
|
|
|
|
def __exit__(self, *args): |
|
|
if settings.debug.profile_pipeline_timings: |
|
|
elapsed = time.monotonic() - self.start |
|
|
self.conv_res.timings[self.key].times.append(elapsed) |
|
|
self.conv_res.timings[self.key].count += 1 |
|
|
|