Spaces:
Sleeping
Sleeping
| # app/services/context_builder.py | |
| import logging | |
| from typing import List, Dict, Tuple # Added Tuple | |
| from app.core import state | |
| logger = logging.getLogger(__name__) | |
| DEFAULT_MAX_CONTEXT_CHARS = 512000 | |
| def build_context_from_ids( | |
| top_chunk_ids: List[str], | |
| max_context_chars: int = DEFAULT_MAX_CONTEXT_CHARS | |
| ) -> Tuple[str, List[str]]: # Return context string AND list of used IDs | |
| """ | |
| Builds context using the chunk_content_map stored in state. | |
| Returns the concatenated context string and the list of chunk IDs | |
| whose content was actually included. | |
| """ | |
| if not top_chunk_ids: | |
| logger.warning("build_context_from_ids called with empty chunk ID list.") | |
| return "", [] # Return empty string and empty list | |
| if not state.chunk_content_map: | |
| logger.error("Chunk content map is not loaded in state. Cannot build context.") | |
| return "", [] # Return empty string and empty list | |
| context_parts: List[str] = [] | |
| used_ids: List[str] = [] # List to track IDs included in the context | |
| current_length = 0 | |
| separator = "\n\n---\n\n" | |
| separator_len = len(separator) | |
| logger.info(f"Building context from {len(top_chunk_ids)} top chunk IDs (max chars: {max_context_chars})...") | |
| for i, chunk_id_str in enumerate(top_chunk_ids): | |
| # Ensure chunk_id is treated as string for dictionary lookup | |
| chunk_id = str(chunk_id_str) | |
| content = state.chunk_content_map.get(chunk_id) | |
| if content: | |
| content_len = len(content) | |
| potential_added_len = content_len + (separator_len if context_parts else 0) | |
| if current_length + potential_added_len <= max_context_chars: | |
| context_parts.append(content) | |
| used_ids.append(chunk_id) # Add the ID to the used list | |
| current_length += potential_added_len | |
| logger.debug(f" Added chunk {i+1} (ID: {chunk_id[:20]}...): Length={content_len}, Total Context Chars={current_length}") | |
| else: | |
| logger.warning(f"Stopping context building: Chunk {i+1} (ID: {chunk_id[:10]}...) with length {content_len} would exceed max chars ({max_context_chars}). Current length: {current_length}.") | |
| break | |
| else: | |
| logger.warning(f"Content not found in state map for chunk ID: {chunk_id}") | |
| if not context_parts: | |
| logger.warning("No content could be added to the context.") | |
| return "", [] | |
| final_context = separator.join(context_parts) | |
| logger.info(f"Final context built. Length: {len(final_context)} chars, Chunks used: {len(used_ids)}/{len(top_chunk_ids)}") | |
| # Return both the context string and the list of used IDs | |
| return final_context, used_ids |