File size: 1,753 Bytes
fcaa164 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import time
from datetime import datetime
from enum import Enum
from typing import TYPE_CHECKING, List
import numpy as np
from pydantic import BaseModel
from docling.datamodel.settings import settings
if TYPE_CHECKING:
from docling.datamodel.document import ConversionResult
class ProfilingScope(str, Enum):
PAGE = "page"
DOCUMENT = "document"
class ProfilingItem(BaseModel):
scope: ProfilingScope
count: int = 0
times: List[float] = []
start_timestamps: List[datetime] = []
def avg(self) -> float:
return np.average(self.times) # type: ignore
def std(self) -> float:
return np.std(self.times) # type: ignore
def mean(self) -> float:
return np.mean(self.times) # type: ignore
def percentile(self, perc: float) -> float:
return np.percentile(self.times, perc) # type: ignore
class TimeRecorder:
def __init__(
self,
conv_res: "ConversionResult",
key: str,
scope: ProfilingScope = ProfilingScope.PAGE,
):
if settings.debug.profile_pipeline_timings:
if key not in conv_res.timings.keys():
conv_res.timings[key] = ProfilingItem(scope=scope)
self.conv_res = conv_res
self.key = key
def __enter__(self):
if settings.debug.profile_pipeline_timings:
self.start = time.monotonic()
self.conv_res.timings[self.key].start_timestamps.append(datetime.utcnow())
return self
def __exit__(self, *args):
if settings.debug.profile_pipeline_timings:
elapsed = time.monotonic() - self.start
self.conv_res.timings[self.key].times.append(elapsed)
self.conv_res.timings[self.key].count += 1
|