Spaces:
Runtime error
Runtime error
| import os | |
| import re | |
| from logging import Logger | |
| from typing import List, Union | |
| from openai import OpenAI | |
| from common.configuration import FilterChunks, LLMConfiguration, SummaryChunks | |
| from components.nmd.aggregate_answers import preprocessed_chunks | |
| class LLMChunkSearch: | |
| def __init__(self, config: LLMConfiguration, prompt: str, logger: Logger): | |
| self.config = config | |
| self.logger = logger | |
| self.prompt = prompt | |
| self.pattern = r'\d+' | |
| self.pattern_list = [ | |
| r'\[\d+\]', | |
| r'Ответ: [1-9]', | |
| r'Ответ [1-9]', | |
| r'Ответ[1-9]', | |
| r'Ответ:[1-9]', | |
| r'Ответ: \[\d+\]', | |
| ] | |
| # Initialize OpenAI client | |
| if self.config.base_url is not None: | |
| self.client = OpenAI( | |
| base_url=self.config.base_url, | |
| api_key=os.getenv(self.config.api_key_env) | |
| ) | |
| else: | |
| self.client = None | |
| def llm_chunk_search(self, query: str, answer_chunks: SummaryChunks, prompt: str): | |
| """ | |
| Args: | |
| query: User query | |
| answer_chunks: Retrieved chunks to process | |
| prompt: System prompt template | |
| Returns: | |
| Tuple containing processed chunks, LLM response, prompt used, and token count | |
| """ | |
| text_chunks = preprocessed_chunks( | |
| answer_chunks, self.config.base_url, self.logger | |
| ) | |
| self.logger.info('Searching LLM Chunks') | |
| if self.client is None: | |
| return ( | |
| text_chunks, | |
| self.__postprocessing_answer_llm(answer_chunks), | |
| prompt, | |
| 0 | |
| ) | |
| llm_prompt = prompt.format(query=query, answer=text_chunks) | |
| for i in range(5): | |
| try: | |
| response = self.client.chat.completions.create( | |
| model=self.config.model, | |
| messages=[ | |
| {"role": "system", "content": prompt}, | |
| {"role": "user", "content": query} | |
| ], | |
| temperature=self.config.temperature, | |
| top_p=self.config.top_p, | |
| frequency_penalty=self.config.frequency_penalty, | |
| presence_penalty=self.config.presence_penalty, | |
| seed=self.config.seed | |
| ) | |
| answer_llm = response.choices[0].message.content | |
| count_tokens = response.usage.total_tokens | |
| self.logger.info(f'Answer LLM {answer_llm}') | |
| # Process the response | |
| if re.search('%%', answer_llm): | |
| index = re.search('%%', answer_llm).span()[1] | |
| answer_llm = answer_llm[index:] | |
| if re.search('Конец ответа', answer_llm): | |
| index = re.search('Конец ответа', answer_llm).span()[1] | |
| answer_llm = answer_llm[:index] | |
| return text_chunks, answer_llm, llm_prompt, count_tokens | |
| except Exception as e: | |
| self.logger.error(f"Attempt {i+1} failed: {str(e)}") | |
| if i == 4: | |
| self.logger.error("All attempts failed") | |
| return ( | |
| text_chunks, | |
| self.__postprocessing_answer_llm(answer_chunks), | |
| llm_prompt, | |
| 0 | |
| ) | |
| def __postprocessing_answer_llm(answer_chunks: Union[SummaryChunks, List]) -> str: | |
| """ | |
| Postprocess the answer chunks into a formatted string | |
| Args: | |
| answer_chunks: Chunks to process | |
| Returns: | |
| Formatted string response | |
| """ | |
| output_text = '' | |
| if isinstance(answer_chunks, SummaryChunks): | |
| if len(answer_chunks.doc_chunks) == 0: | |
| # TODO: Протестировать как работает и исправить на уведомление о БД и ли | |
| return 'БАЗА ДАННЫХ ПУСТА' | |
| if answer_chunks.doc_chunks is not None: | |
| doc = answer_chunks.doc_chunks[0] | |
| output_text += f'Документ: [1]\n' | |
| if doc.title != 'unknown': | |
| output_text += f'Название документа: {doc.title}\n' | |
| else: | |
| output_text += f'Название документа: {doc.filename}\n' | |
| for chunk in doc.chunks: | |
| if len(chunk.other_info): | |
| for i in chunk.other_info: | |
| output_text += f'{i}' | |
| else: | |
| output_text += f'{chunk.text_answer}' | |
| output_text += '\n\n' | |
| else: | |
| doc = answer_chunks.people_search[0] | |
| output_text += ( | |
| f'Название документа: Информация о сотруднике {doc.person_name}\n' | |
| ) | |
| if doc.organizatinal_structure is not None: | |
| for organizatinal_structure in doc.organizatinal_structure: | |
| output_text += '(' | |
| if organizatinal_structure.position != 'undefined': | |
| output_text += ( | |
| f'Должность: {organizatinal_structure.position}\n' | |
| ) | |
| if organizatinal_structure.leads is not None: | |
| output_text += f'Руководит следующими сотрудниками:\n' | |
| for lead in organizatinal_structure.leads: | |
| if lead.person != "undefined": | |
| output_text += f'{lead.person}\n' | |
| if ( | |
| organizatinal_structure.subordinates.person_name | |
| != "undefined" | |
| ): | |
| output_text += f'Руководителем {doc.person_name} является {organizatinal_structure.subordinates.person_name}\n' | |
| output_text += ')' | |
| if doc.business_processes is not None: | |
| if len(doc.business_processes) >= 2: | |
| output_text += f'Отвечает за Бизнес процессы:\n' | |
| else: | |
| output_text += f'Отвечает за Бизнес процесс: ' | |
| for process in doc.business_processes: | |
| output_text += f'{process.processes_name}\n' | |
| if doc.business_curator is not None: | |
| output_text += 'Является Бизнес-куратором (РОКС НН):\n' | |
| for curator in doc.business_curator: | |
| output_text += f'{curator.company_name}' | |
| if doc.groups is not None: | |
| if len(doc.groups) >= 2: | |
| output_text += 'Входит в состав групп:\n' | |
| else: | |
| output_text += 'Входит в состав группы:\n' | |
| for group in doc.groups: | |
| if 'Члены' in group.position_in_group: | |
| output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group.replace("Члены", "Член")}\n' | |
| else: | |
| output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group}\n' | |
| output_text += f'\\\n\n' | |
| else: | |
| if isinstance(answer_chunks[0], FilterChunks): | |
| doc = answer_chunks[0] | |
| output_text += f'Документ: [1]\n' | |
| if doc.title != 'unknown': | |
| output_text += f'Название документа: {doc.title}\n' | |
| for chunk in doc.chunks: | |
| if len(chunk.other_info): | |
| for i in chunk.other_info: | |
| output_text += f'{i}' | |
| else: | |
| output_text += f'{chunk.text_answer}' | |
| output_text += '\n\n' | |
| else: | |
| doc = answer_chunks[0] | |
| output_text += f'Информация о сотруднике {doc.person_name}\n' | |
| if doc.organizatinal_structure is not None: | |
| for organizatinal_structure in doc.organizatinal_structure: | |
| output_text += ( | |
| f'Должность: {organizatinal_structure.position}\n' | |
| ) | |
| if organizatinal_structure.leads is not None: | |
| output_text += f'Руководит следующими сотрудниками:\n' | |
| for lead in organizatinal_structure.leads: | |
| if lead.person != "undefined": | |
| output_text += f'{lead.person}\n' | |
| if ( | |
| organizatinal_structure.subordinates.person_name | |
| != "undefined" | |
| ): | |
| output_text += f'Руководителем {doc.person_name} является {organizatinal_structure.subordinates.person_name}\n' | |
| if doc.business_processes is not None: | |
| if len(doc.business_processes) >= 2: | |
| output_text += f'Отвечает за Бизнес процессы:\n' | |
| else: | |
| output_text += f'Отвечает за Бизнес процесс: ' | |
| for process in doc.business_processes: | |
| output_text += f'{process.processes_name}\n' | |
| if doc.business_curator is not None: | |
| output_text += 'Является Бизнес-куратором (РОКС НН):\n' | |
| for curator in doc.business_curator: | |
| output_text += f'{curator.company_name}' | |
| if doc.groups is not None: | |
| if len(doc.groups) >= 2: | |
| output_text += 'Входит в состав групп:\n' | |
| else: | |
| output_text += 'Входит в состав группы:\n' | |
| for group in doc.groups: | |
| if 'Члены' in group.position_in_group: | |
| output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group.replace("Члены", "Член")}\n' | |
| else: | |
| output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group}\n' | |
| output_text += f'\\\n\n' | |
| return output_text | |