Spaces:
Runtime error
Runtime error
| """This module includes classes to define configurations.""" | |
| from typing import Any, Dict, List, Optional | |
| from pyaml_env import parse_config | |
| from pydantic import BaseModel | |
| class Query(BaseModel): | |
| query: str | |
| query_abbreviation: str | |
| abbreviations_replaced: Optional[List] = None | |
| userName: Optional[str] = None | |
| class SemanticChunk(BaseModel): | |
| index_answer: int | |
| doc_name: str | |
| title: str | |
| text_answer: str | |
| # doc_number: str # TODO Потом поменять название переменной на doc_id везде с чем это будет связанно | |
| other_info: List | |
| start_index_paragraph: int | |
| class FilterChunks(BaseModel): | |
| id: str | |
| filename: str | |
| title: str | |
| chunks: List[SemanticChunk] | |
| class BusinessProcess(BaseModel): | |
| production_activities_section: Optional[str] | |
| processes_name: Optional[str] | |
| level_process: Optional[str] | |
| class Lead(BaseModel): | |
| person: Optional[str] | |
| leads: Optional[str] | |
| class Subordinate(BaseModel): | |
| person_name: Optional[str] | |
| position: Optional[str] | |
| class OrganizationalStructure(BaseModel): | |
| position: Optional[str] = None | |
| leads: Optional[List[Lead]] = None | |
| subordinates: Optional[Subordinate] = None | |
| class RocksNN(BaseModel): | |
| division: Optional[str] | |
| company_name: Optional[str] | |
| class RocksNNSearch(BaseModel): | |
| division: Optional[str] | |
| company_name: Optional[List] | |
| class SegmentationSearch(BaseModel): | |
| segmentation_model: Optional[str] | |
| company_name: Optional[List] | |
| class Group(BaseModel): | |
| group_name: Optional[str] | |
| position_in_group: Optional[str] | |
| block: Optional[str] | |
| class GroupComposition(BaseModel): | |
| person_name: Optional[str] | |
| position_in_group: Optional[str] | |
| class SearchGroupComposition(BaseModel): | |
| group_name: Optional[str] | |
| group_composition: Optional[List[GroupComposition]] | |
| class PeopleChunks(BaseModel): | |
| business_processes: Optional[List[BusinessProcess]] = None | |
| organizatinal_structure: Optional[List[OrganizationalStructure]] = None | |
| business_curator: Optional[List[RocksNN]] = None | |
| groups: Optional[List[Group]] = None | |
| person_name: str | |
| class SummaryChunks(BaseModel): | |
| doc_chunks: Optional[List[FilterChunks]] = None | |
| people_search: Optional[List[PeopleChunks]] = None | |
| groups_search: Optional[SearchGroupComposition] = None | |
| rocks_nn_search: Optional[RocksNNSearch] = None | |
| segmentation_search: Optional[SegmentationSearch] = None | |
| query_type: str = '[3]' | |
| class ElasticConfiguration: | |
| def __init__(self, config_data): | |
| self.es_host = str(config_data['es_host']) | |
| self.es_port = int(config_data['es_port']) | |
| self.use_elastic = bool(config_data['use_elastic']) | |
| self.people_path = str(config_data['people_path']) | |
| class FaissDataConfiguration: | |
| def __init__(self, config_data): | |
| self.model_embedding_path = str(config_data['model_embedding_path']) | |
| self.device = str(config_data['device']) | |
| self.path_to_metadata = str(config_data['path_to_metadata']) | |
| class ChunksElasticSearchConfiguration: | |
| def __init__(self, config_data): | |
| self.use_chunks_search = bool(config_data['use_chunks_search']) | |
| self.index_name = str(config_data['index_name']) | |
| self.k_neighbors = int(config_data['k_neighbors']) | |
| class PeopleSearchConfiguration: | |
| def __init__(self, config_data): | |
| self.use_people_search = bool(config_data['use_people_search']) | |
| self.index_name = str(config_data['index_name']) | |
| self.k_neighbors = int(config_data['k_neighbors']) | |
| class VectorSearchConfiguration: | |
| def __init__(self, config_data): | |
| self.use_vector_search = bool(config_data['use_vector_search']) | |
| self.k_neighbors = int(config_data['k_neighbors']) | |
| class GroupsSearchConfiguration: | |
| def __init__(self, config_data): | |
| self.use_groups_search = bool(config_data['use_groups_search']) | |
| self.index_name = str(config_data['index_name']) | |
| self.k_neighbors = int(config_data['k_neighbors']) | |
| class RocksNNSearchConfiguration: | |
| def __init__(self, config_data): | |
| self.use_rocks_nn_search = bool(config_data['use_rocks_nn_search']) | |
| self.index_name = str(config_data['index_name']) | |
| self.k_neighbors = int(config_data['k_neighbors']) | |
| class AbbreviationSearchConfiguration: | |
| def __init__(self, config_data): | |
| self.use_abbreviation_search = bool(config_data['use_abbreviation_search']) | |
| self.index_name = str(config_data['index_name']) | |
| self.k_neighbors = int(config_data['k_neighbors']) | |
| class SegmentationSearchConfiguration: | |
| def __init__(self, config_data): | |
| self.use_segmentation_search = bool(config_data['use_segmentation_search']) | |
| self.index_name = str(config_data['index_name']) | |
| self.k_neighbors = int(config_data['k_neighbors']) | |
| class SearchConfiguration: | |
| def __init__(self, config_data): | |
| self.vector_search = VectorSearchConfiguration(config_data['vector_search']) | |
| self.people_elastic_search = PeopleSearchConfiguration( | |
| config_data['people_elastic_search'] | |
| ) | |
| self.chunks_elastic_search = ChunksElasticSearchConfiguration( | |
| config_data['chunks_elastic_search'] | |
| ) | |
| self.groups_elastic_search = GroupsSearchConfiguration( | |
| config_data['groups_elastic_search'] | |
| ) | |
| self.rocks_nn_elastic_search = RocksNNSearchConfiguration( | |
| config_data['rocks_nn_elastic_search'] | |
| ) | |
| self.segmentation_elastic_search = SegmentationSearchConfiguration( | |
| config_data['segmentation_elastic_search'] | |
| ) | |
| self.stop_index_names = list(config_data['stop_index_names']) | |
| self.abbreviation_search = AbbreviationSearchConfiguration( | |
| config_data['abbreviation_search'] | |
| ) | |
| class FilesConfiguration: | |
| def __init__(self, config_data): | |
| self.empty_start = bool(config_data['empty_start']) | |
| self.regulations_path = str(config_data['regulations_path']) | |
| self.default_regulations_path = str(config_data['default_regulations_path']) | |
| self.documents_path = str(config_data['documents_path']) | |
| class RankingConfiguration: | |
| def __init__(self, config_data): | |
| self.use_ranging = bool(config_data['use_ranging']) | |
| self.alpha = float(config_data['alpha']) | |
| self.beta = float(config_data['beta']) | |
| self.k_neighbors = int(config_data['k_neighbors']) | |
| class DataBaseConfiguration: | |
| def __init__(self, config_data): | |
| self.elastic = ElasticConfiguration(config_data['elastic']) | |
| self.faiss = FaissDataConfiguration(config_data['faiss']) | |
| self.search = SearchConfiguration(config_data['search']) | |
| self.files = FilesConfiguration(config_data['files']) | |
| self.ranker = RankingConfiguration(config_data['ranging']) | |
| class LLMConfiguration: | |
| def __init__(self, config_data): | |
| self.base_url = str(config_data['base_url']) if config_data['base_url'] not in ("", "null", "None") else None | |
| self.api_key_env = ( | |
| str(config_data['api_key_env']) | |
| if config_data['api_key_env'] not in ("", "null", "None") | |
| else None | |
| ) | |
| self.model = str(config_data['model']) | |
| self.tokenizer = str(config_data['tokenizer_name']) | |
| self.temperature = float(config_data['temperature']) | |
| self.top_p = float(config_data['top_p']) | |
| self.min_p = float(config_data['min_p']) | |
| self.frequency_penalty = float(config_data['frequency_penalty']) | |
| self.presence_penalty = float(config_data['presence_penalty']) | |
| self.seed = int(config_data['seed']) | |
| class CommonConfiguration: | |
| def __init__(self, config_data): | |
| self.log_file_path = str(config_data['log_file_path']) | |
| self.log_sql_path = str(config_data['log_sql_path']) | |
| class Configuration: | |
| """Encapsulates all configuration parameters.""" | |
| def __init__(self, config_file_path: Optional[str] = None): | |
| """Creates an instance of the class. | |
| There is 1 possibility to load configuration data: | |
| - from configuration file using a path; | |
| If attribute is not None, the configuration file is used. | |
| Args: | |
| config_file_path: A path to config file to load configuration data from. | |
| """ | |
| if config_file_path is not None: | |
| self._load_from_config(config_file_path) | |
| else: | |
| raise ValueError('At least one of config_path must be not None.') | |
| def _load_data(self, data: Dict[str, Any]): | |
| """Loads configuration data from dictionary. | |
| Args: | |
| data: A configuration dictionary to load configuration data from. | |
| """ | |
| self.common_config = CommonConfiguration(data['common']) | |
| self.db_config = DataBaseConfiguration(data['bd']) | |
| self.llm_config = LLMConfiguration(data['llm']) | |
| def _load_from_config(self, config_file_path: str): | |
| """Reads configuration file and form configuration dictionary. | |
| Args: | |
| config_file_path: A configuration dictionary to load configuration data from. | |
| """ | |
| data = parse_config(config_file_path) | |
| self._load_data(data) | |