Spaces:
Runtime error
Runtime error
| @startuml NTR_FileParser | |
| package "ntr_fileparser" { | |
| package "data_classes" { | |
| abstract class ParsedStructure { | |
| +{abstract} apply(func: Callable[[str], str]) | |
| +{abstract} to_dict() | |
| +{abstract} to_string() | |
| } | |
| class ParsedDocument { | |
| +name: str | |
| +type: str | |
| +meta: ParsedMeta | |
| +paragraphs: list[ParsedTextBlock] | |
| +tables: list[ParsedTable] | |
| +images: list[ParsedImage] | |
| +formulas: list[ParsedFormula] | |
| } | |
| class ParsedMeta { | |
| +title: str | |
| +author: str | |
| +creation_date: str | |
| } | |
| class ParsedTextBlock { | |
| +text: str | |
| +style: TextStyle | |
| } | |
| enum TextStyle { | |
| NORMAL | |
| BOLD | |
| ITALIC | |
| UNDERLINE | |
| HEADING1 | |
| HEADING2 | |
| HEADING3 | |
| } | |
| class ParsedTable { | |
| +headers: list[str] | |
| +rows: list[ParsedRow] | |
| +subtables: list[ParsedSubtable] | |
| +tag: TableTag | |
| } | |
| class ParsedRow { | |
| +cells: list[str] | |
| } | |
| class ParsedSubtable { | |
| +table: ParsedTable | |
| } | |
| enum TableTag { | |
| UNKNOWN | |
| DATA | |
| METADATA | |
| } | |
| class ParsedImage | |
| +path: str | |
| +alt_text: str | |
| .. Примечание .. | |
| В текущей реализации не используется | |
| } | |
| class ParsedFormula | |
| +latex: str | |
| .. Примечание .. | |
| В текущей реализации не используется | |
| } | |
| ParsedStructure <|-- ParsedDocument | |
| ParsedStructure <|-- ParsedTextBlock | |
| ParsedStructure <|-- ParsedTable | |
| ParsedStructure <|-- ParsedRow | |
| ParsedStructure <|-- ParsedSubtable | |
| ParsedStructure <|-- ParsedImage | |
| ParsedStructure <|-- ParsedFormula | |
| ParsedStructure <|-- ParsedMeta | |
| ParsedDocument o-- ParsedMeta | |
| ParsedDocument o-- "*" ParsedTextBlock | |
| ParsedDocument o-- "*" ParsedTable | |
| ParsedDocument o-- "*" ParsedImage | |
| ParsedDocument o-- "*" ParsedFormula | |
| ParsedTable o-- "*" ParsedRow | |
| ParsedTable o-- "*" ParsedSubtable | |
| ParsedTable -- TableTag | |
| ParsedTextBlock -- TextStyle | |
| } | |
| package "parsers" { | |
| abstract class AbstractParser { | |
| +file_types: list | |
| +{abstract} parse() | |
| +{abstract} parse_by_path() | |
| +supports_file() | |
| +_supported_extension() | |
| } | |
| class ParserFactory { | |
| +parsers: list[AbstractParser] | |
| +register_parser() | |
| +get_parser() | |
| } | |
| class UniversalParser { | |
| +factory: ParserFactory | |
| +parse() | |
| +parse_by_path() | |
| } | |
| enum FileType { | |
| XML | |
| DOCX | |
| DOC | |
| HTML | |
| MD | |
| EML | |
| +from_extension() | |
| +get_supported_extensions() | |
| } | |
| package "specific_parsers" { | |
| package "xml" { | |
| class XMLParagraphParser { | |
| +parse() | |
| } | |
| class XMLTableParser { | |
| +parse() | |
| } | |
| class XMLMetaParser { | |
| +parse() | |
| +_extract_info_value() | |
| +_extract_info_recurse() | |
| } | |
| class XMLImageParser | |
| +parse() | |
| .. Примечание .. | |
| В текущей реализации не используется | |
| } | |
| class XMLFormulaParser | |
| +parse() | |
| .. Примечание .. | |
| В текущей реализации не используется | |
| } | |
| } | |
| package "docx" { | |
| class CorePropertiesParser { | |
| +parse() | |
| } | |
| class MetadataParser { | |
| +parse() | |
| } | |
| class NumberingParser { | |
| +parse() | |
| } | |
| class RelationshipsParser { | |
| +parse() | |
| } | |
| class StylesParser { | |
| +parse() | |
| } | |
| } | |
| class DocParser { | |
| } | |
| class DocxParser { | |
| } | |
| class PDFParser { | |
| } | |
| class XMLParser { | |
| } | |
| class HTMLParser { | |
| } | |
| class MarkdownParser { | |
| } | |
| class EmailParser { | |
| } | |
| XMLParser -- xml | |
| DocxParser -- docx | |
| } | |
| AbstractParser <|-- DocParser | |
| AbstractParser <|-- DocxParser | |
| AbstractParser <|-- PDFParser | |
| AbstractParser <|-- XMLParser | |
| AbstractParser <|-- HTMLParser | |
| AbstractParser <|-- MarkdownParser | |
| AbstractParser <|-- EmailParser | |
| AbstractParser -- FileType | |
| ParserFactory o-- "*" AbstractParser | |
| UniversalParser --> ParserFactory | |
| } | |
| data_classes <.. parsers : использует | |
| } | |
| @enduml |