Spaces:

helal94hb1
/

backend_chatbot

Sleeping

File size: 2,727 Bytes

# app/services/context_builder.py

import logging
from typing import List, Dict, Tuple # Added Tuple

from app.core import state

logger = logging.getLogger(__name__)
DEFAULT_MAX_CONTEXT_CHARS = 512000

def build_context_from_ids(
    top_chunk_ids: List[str],
    max_context_chars: int = DEFAULT_MAX_CONTEXT_CHARS
) -> Tuple[str, List[str]]: # Return context string AND list of used IDs
    """
    Builds context using the chunk_content_map stored in state.
    Returns the concatenated context string and the list of chunk IDs
    whose content was actually included.
    """
    if not top_chunk_ids:
        logger.warning("build_context_from_ids called with empty chunk ID list.")
        return "", [] # Return empty string and empty list

    if not state.chunk_content_map:
        logger.error("Chunk content map is not loaded in state. Cannot build context.")
        return "", [] # Return empty string and empty list

    context_parts: List[str] = []
    used_ids: List[str] = [] # List to track IDs included in the context
    current_length = 0
    separator = "\n\n---\n\n"
    separator_len = len(separator)
    logger.info(f"Building context from {len(top_chunk_ids)} top chunk IDs (max chars: {max_context_chars})...")

    for i, chunk_id_str in enumerate(top_chunk_ids):
        # Ensure chunk_id is treated as string for dictionary lookup
        chunk_id = str(chunk_id_str)
        content = state.chunk_content_map.get(chunk_id)

        if content:
            content_len = len(content)
            potential_added_len = content_len + (separator_len if context_parts else 0)

            if current_length + potential_added_len <= max_context_chars:
                context_parts.append(content)
                used_ids.append(chunk_id) # Add the ID to the used list
                current_length += potential_added_len
                logger.debug(f"  Added chunk {i+1} (ID: {chunk_id[:20]}...): Length={content_len}, Total Context Chars={current_length}")
            else:
                logger.warning(f"Stopping context building: Chunk {i+1} (ID: {chunk_id[:10]}...) with length {content_len} would exceed max chars ({max_context_chars}). Current length: {current_length}.")
                break
        else:
            logger.warning(f"Content not found in state map for chunk ID: {chunk_id}")

    if not context_parts:
        logger.warning("No content could be added to the context.")
        return "", []

    final_context = separator.join(context_parts)
    logger.info(f"Final context built. Length: {len(final_context)} chars, Chunks used: {len(used_ids)}/{len(top_chunk_ids)}")

    # Return both the context string and the list of used IDs
    return final_context, used_ids