eudr_chabo_generator

Running on CPU Upgrade

App Files Files Community

mtyrrell commited on Sep 29

Commit

8f0a9cd

1 Parent(s): f2a3674

ts citation filtering

Browse files

Files changed (1) hide show

utils/generator.py +36 -6

utils/generator.py CHANGED Viewed

@@ -16,6 +16,9 @@ from langchain_core.messages import SystemMessage, HumanMessage
 # Local imports
 from .utils import getconfig, get_auth
 # ---------------------------------------------------------------------
 # Configuration and Model Initialization
 # ---------------------------------------------------------------------
@@ -57,18 +60,45 @@ def _parse_citations(response: str) -> List[int]:
     """Parse citation numbers from response text"""
     citation_pattern = r'\[(\d+)\]'
     matches = re.findall(citation_pattern, response)
-    return sorted(list(set(int(match) for match in matches)))
 def _extract_sources(processed_results: List[Dict[str, Any]], cited_numbers: List[int]) -> List[Dict[str, Any]]:
     """Extract sources that were cited in the response"""
     if not cited_numbers:
         return []
     cited_sources = []
     for citation_num in cited_numbers:
         source_index = citation_num - 1
         if 0 <= source_index < len(processed_results):
-            cited_sources.append(processed_results[source_index])
     return cited_sources
@@ -203,7 +233,7 @@ async def _call_llm(messages: list) -> str:
         response = await chat_model.ainvoke(messages)
         return response.content.strip()
     except Exception as e:
-        logging.exception(f"LLM generation failed with provider '{PROVIDER}' and model '{MODEL}': {e}")
         raise
 async def _call_llm_streaming(messages: list) -> AsyncGenerator[str, None]:
@@ -213,7 +243,7 @@ async def _call_llm_streaming(messages: list) -> AsyncGenerator[str, None]:
             if hasattr(chunk, 'content') and chunk.content:
                 yield chunk.content
     except Exception as e:
-        logging.exception(f"LLM streaming failed with provider '{PROVIDER}' and model '{MODEL}': {e}")
         yield f"Error: {str(e)}"
 # ---------------------------------------------------------------------
@@ -246,7 +276,7 @@ async def generate(query: str, context: Union[str, List[Dict[str, Any]]], chatui
             return answer
     except Exception as e:
-        logging.exception("Generation failed")
         error_msg = str(e)
         return {"error": error_msg} if chatui_format else f"Error: {error_msg}"
@@ -290,7 +320,7 @@ async def generate_streaming(query: str, context: Union[str, List[Dict[str, Any]
             yield {"event": "end", "data": {}}
     except Exception as e:
-        logging.exception("Streaming generation failed")
         error_msg = str(e)
         if chatui_format:
             yield {"event": "error", "data": {"error": error_msg}}

 # Local imports
 from .utils import getconfig, get_auth
+# Set up logger
+logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------
 # Configuration and Model Initialization
 # ---------------------------------------------------------------------
     """Parse citation numbers from response text"""
     citation_pattern = r'\[(\d+)\]'
     matches = re.findall(citation_pattern, response)
+    citation_numbers = sorted(list(set(int(match) for match in matches)))
+    # Debug logging
+    logger.info(f"=== CITATION PARSING DEBUG ===")
+    logger.info(f"Response text length: {len(response)}")
+    logger.info(f"Found citation matches: {matches}")
+    logger.info(f"Parsed citation numbers: {citation_numbers}")
+    return citation_numbers
 def _extract_sources(processed_results: List[Dict[str, Any]], cited_numbers: List[int]) -> List[Dict[str, Any]]:
     """Extract sources that were cited in the response"""
+    # Debug logging - show all available sources before filtering
+    logger.info(f"=== SOURCE FILTERING DEBUG ===")
+    logger.info(f"Total available sources: {len(processed_results)}")
+    for i, source in enumerate(processed_results):
+        logger.info(f"Source {i+1}: filename='{source.get('filename', 'Unknown')}', page='{source.get('page', 'Unknown')}', year='{source.get('year', 'Unknown')}'")
+    logger.info(f"Cited numbers from response: {cited_numbers}")
     if not cited_numbers:
+        logger.info("No citations found - returning empty sources list")
         return []
     cited_sources = []
     for citation_num in cited_numbers:
         source_index = citation_num - 1
+        logger.info(f"Processing citation [{citation_num}] -> source_index: {source_index}")
         if 0 <= source_index < len(processed_results):
+            source = processed_results[source_index]
+            cited_sources.append(source)
+            logger.info(f"✓ Added source {citation_num}: filename='{source.get('filename', 'Unknown')}', page='{source.get('page', 'Unknown')}'")
+        else:
+            logger.warning(f"✗ Citation [{citation_num}] is out of range! source_index {source_index} not in range [0, {len(processed_results)-1}]")
+    logger.info(f"Final filtered sources count: {len(cited_sources)}")
+    for i, source in enumerate(cited_sources):
+        logger.info(f"Filtered source {i+1}: filename='{source.get('filename', 'Unknown')}', page='{source.get('page', 'Unknown')}', year='{source.get('year', 'Unknown')}'")
     return cited_sources
         response = await chat_model.ainvoke(messages)
         return response.content.strip()
     except Exception as e:
+        logger.exception(f"LLM generation failed with provider '{PROVIDER}' and model '{MODEL}': {e}")
         raise
 async def _call_llm_streaming(messages: list) -> AsyncGenerator[str, None]:
             if hasattr(chunk, 'content') and chunk.content:
                 yield chunk.content
     except Exception as e:
+        logger.exception(f"LLM streaming failed with provider '{PROVIDER}' and model '{MODEL}': {e}")
         yield f"Error: {str(e)}"
 # ---------------------------------------------------------------------
             return answer
     except Exception as e:
+        logger.exception("Generation failed")
         error_msg = str(e)
         return {"error": error_msg} if chatui_format else f"Error: {error_msg}"
             yield {"event": "end", "data": {}}
     except Exception as e:
+        logger.exception("Streaming generation failed")
         error_msg = str(e)
         if chatui_format:
             yield {"event": "error", "data": {"error": error_msg}}