Spaces:
Runtime error
Runtime error
| from typing import List, Dict, Optional, Tuple | |
| import requests | |
| from logging import Logger | |
| from common.configuration import SemanticChunk | |
| from common.configuration import SegmentationSearch | |
| from common.configuration import SummaryChunks | |
| from common.configuration import FilterChunks | |
| from common.configuration import RocksNNSearch | |
| from common.configuration import PeopleChunks | |
| from common.configuration import SearchGroupComposition | |
| def aggregate_answers(vector_answer: Optional[Dict] = None, | |
| people_answer: Optional[List] = None, | |
| chunks_answer: Optional[List] = None, | |
| groups_answer: Optional[List] = None, | |
| rocks_nn_answer: Optional[List] = None, | |
| segmentation_answer: Optional[List] = None) -> Dict: | |
| """ | |
| Args: | |
| vector_answer: | |
| people_answer: | |
| chunks_answer: | |
| groups_answer: | |
| rocks_nn_answer: | |
| segmentation_answer: | |
| Returns: | |
| """ | |
| answer = {} | |
| if vector_answer is not None or chunks_answer is not None: | |
| answer['doc_chunks'] = combine_answer([vector_answer, chunks_answer]) | |
| if people_answer is not None: | |
| answer['people_search'] = [PeopleChunks(**answer_dict['_source']) for answer_dict in people_answer] | |
| if groups_answer is not None: | |
| answer['groups_search'] = SearchGroupComposition(**groups_answer[0]['_source']) | |
| if rocks_nn_answer is not None: | |
| answer['rocks_nn_search'] = RocksNNSearch(division=rocks_nn_answer[0]['_source']['division_name'], | |
| company_name=rocks_nn_answer[0]['_source']['company_name']) | |
| if segmentation_answer is not None: | |
| answer['segmentation_search'] = SegmentationSearch(**segmentation_answer[0]['_source']) | |
| return answer | |
| def combine_answer(answers): | |
| """ | |
| Args: | |
| answers: | |
| Returns: | |
| """ | |
| answer_combined = [] | |
| answer_file_names = [] | |
| indexes = [] | |
| for answer in answers: | |
| if answer is not None: | |
| for key in answer: | |
| if answer[key]["doc_name"] in answer_file_names: | |
| if answer[key]['start_index_paragraph'] not in indexes: | |
| obj_index = answer_file_names.index(answer[key]["doc_name"]) | |
| answer_combined[obj_index].chunks.append(SemanticChunk(**answer[key])) | |
| else: | |
| answer_combined.append(FilterChunks( | |
| id=str(answer[key]['id']), | |
| filename=answer[key]["doc_name"], | |
| title=answer[key]["title"], | |
| chunks=[SemanticChunk(**answer[key])])) | |
| answer_file_names.append(answer[key]["doc_name"]) | |
| indexes.append(answer[key]['start_index_paragraph']) | |
| return answer_combined | |
| def preprocessed_chunks(answer_chunks: SummaryChunks, llm_host_tokens: str, logger: Logger) -> str: | |
| output_text = '' | |
| count = 0 | |
| count_tokens = 0 | |
| if answer_chunks.doc_chunks is not None: | |
| for doc in answer_chunks.doc_chunks: | |
| output_text += f'Документ: [{count + 1}]\n' | |
| if doc.title != 'unknown': | |
| output_text += f'Название документа: {doc.title}\n' | |
| else: | |
| output_text += f'Название документа: {doc.filename}\n' | |
| for chunk in doc.chunks: | |
| if len(chunk.other_info): | |
| output_text += '...\n' | |
| for i in chunk.other_info: | |
| output_text += f'{i}'.replace('', '-') | |
| output_text += '...\n' | |
| else: | |
| output_text += '...\n' | |
| output_text += f'{chunk.text_answer}' | |
| output_text += '...\n' | |
| count_tokens = len(output_text) * 2 | |
| #TODO: в deepinfra нет такой возможности. Нужно прокинуть токенизатор | |
| #len(requests.post(url=f'{llm_host_tokens}', json={"content": output_text}).json()['tokens']) | |
| if count_tokens > 20000: | |
| logger.info('Количество токенов превысило значение 20k! Оставшиеся чанки отброшены!') | |
| break | |
| if count_tokens > 20000: | |
| output_text += '\n\\\n\n' | |
| count += 1 | |
| break | |
| output_text += '\n\\\n\n' | |
| count += 1 | |
| if answer_chunks.people_search is not None: | |
| for doc in answer_chunks.people_search: | |
| output_text += f'Документ: [{count + 1}]\n' | |
| output_text += f'Название документа: Информация о сотруднике {doc.person_name}\n' | |
| output_text += f'Информация о сотруднике {doc.person_name}\n' | |
| if doc.organizatinal_structure is not None: | |
| for organizatinal_structure in doc.organizatinal_structure: | |
| output_text += '[\n' | |
| if organizatinal_structure.position != 'undefined': | |
| output_text += f'Должность: {organizatinal_structure.position}' | |
| if organizatinal_structure.leads is not None: | |
| output_text += f'\nРуководит следующими сотрудниками:\n' | |
| for lead in organizatinal_structure.leads: | |
| if lead.person != "undefined": | |
| output_text += f'{lead.person}\n' | |
| if organizatinal_structure.subordinates is not None: | |
| if organizatinal_structure.subordinates.person_name != "undefined": | |
| output_text += f'Руководителем {doc.person_name} является {organizatinal_structure.subordinates.person_name}' | |
| output_text += '\n]\n' | |
| if doc.business_processes is not None: | |
| if len(doc.business_processes) >= 2: | |
| output_text += f'Отвечает за Бизнес процессы:\n' | |
| else: | |
| output_text += f'Отвечает за Бизнес процесс: ' | |
| for process in doc.business_processes: | |
| output_text += f'{process.processes_name}\n' | |
| if doc.business_curator is not None: | |
| output_text += 'Является Бизнес-куратором (РОКС НН):\n' | |
| for curator in doc.business_curator: | |
| output_text += f'{curator.company_name}\n' | |
| if doc.groups is not None: | |
| output_text += '\nВходит в состав групп, комитетов, координационных советов (КО):\n' | |
| for group in doc.groups: | |
| if 'Члены' in group.position_in_group: | |
| output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group.replace("Члены", "Член")}\n' | |
| else: | |
| output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group}\n' | |
| output_text += f'\n\\\n\n' | |
| count += 1 | |
| if answer_chunks.groups_search is not None: | |
| output_text += f'Документ: [{count + 1}]\n' | |
| output_text += f'Название документа: Информация о группе\n' | |
| output_text += f'Название группы: {answer_chunks.groups_search.group_name}\n' | |
| if len(answer_chunks.groups_search.group_composition) > 1: | |
| output_text += f'\t ФИО \t\t\t| Должность внутри группы\n' | |
| for person_data in answer_chunks.groups_search.group_composition: | |
| if 'Члены' in person_data.position_in_group: | |
| output_text += f'{person_data.person_name:<{20}}| {person_data.position_in_group.replace("Члены", "Член")}\n' | |
| else: | |
| output_text += f'{person_data.person_name:<{20}}| {person_data.position_in_group}\n' | |
| output_text += f'\n\\\n\n' | |
| count += 1 | |
| if answer_chunks.rocks_nn_search is not None: | |
| output_text += f'Документ: [{count + 1}]\n' | |
| output_text += f'Название документа: Информация о {answer_chunks.rocks_nn_search.division}\n' | |
| output_text += f'Название документа: В РОКС НН {answer_chunks.rocks_nn_search.division} входят:\n' | |
| for company_name in answer_chunks.rocks_nn_search.company_name: | |
| output_text += f'{company_name}\n' | |
| output_text += f'\n\\\n\n' | |
| count += 1 | |
| if answer_chunks.segmentation_search is not None: | |
| output_text += f'Документ: [{count + 1}]\n' | |
| output_text += f'Название документа: {answer_chunks.segmentation_search.segmentation_model}\n' | |
| output_text += f'Название документа: В {answer_chunks.segmentation_search.segmentation_model} входят:\n' | |
| for company_name in answer_chunks.segmentation_search.company_name: | |
| output_text += f'{company_name}\n' | |
| output_text += f'\n\\\n\n' | |
| count += 1 | |
| output_text = output_text.replace('\uf02d', '-').replace('', '-') | |
| return output_text | |