# app/services/context_builder.py import logging from typing import List, Dict, Tuple # Added Tuple from app.core import state logger = logging.getLogger(__name__) DEFAULT_MAX_CONTEXT_CHARS = 512000 def build_context_from_ids( top_chunk_ids: List[str], max_context_chars: int = DEFAULT_MAX_CONTEXT_CHARS ) -> Tuple[str, List[str]]: # Return context string AND list of used IDs """ Builds context using the chunk_content_map stored in state. Returns the concatenated context string and the list of chunk IDs whose content was actually included. """ if not top_chunk_ids: logger.warning("build_context_from_ids called with empty chunk ID list.") return "", [] # Return empty string and empty list if not state.chunk_content_map: logger.error("Chunk content map is not loaded in state. Cannot build context.") return "", [] # Return empty string and empty list context_parts: List[str] = [] used_ids: List[str] = [] # List to track IDs included in the context current_length = 0 separator = "\n\n---\n\n" separator_len = len(separator) logger.info(f"Building context from {len(top_chunk_ids)} top chunk IDs (max chars: {max_context_chars})...") for i, chunk_id_str in enumerate(top_chunk_ids): # Ensure chunk_id is treated as string for dictionary lookup chunk_id = str(chunk_id_str) content = state.chunk_content_map.get(chunk_id) if content: content_len = len(content) potential_added_len = content_len + (separator_len if context_parts else 0) if current_length + potential_added_len <= max_context_chars: context_parts.append(content) used_ids.append(chunk_id) # Add the ID to the used list current_length += potential_added_len logger.debug(f" Added chunk {i+1} (ID: {chunk_id[:20]}...): Length={content_len}, Total Context Chars={current_length}") else: logger.warning(f"Stopping context building: Chunk {i+1} (ID: {chunk_id[:10]}...) with length {content_len} would exceed max chars ({max_context_chars}). Current length: {current_length}.") break else: logger.warning(f"Content not found in state map for chunk ID: {chunk_id}") if not context_parts: logger.warning("No content could be added to the context.") return "", [] final_context = separator.join(context_parts) logger.info(f"Final context built. Length: {len(final_context)} chars, Chunks used: {len(used_ids)}/{len(top_chunk_ids)}") # Return both the context string and the list of used IDs return final_context, used_ids