Spaces:
Sleeping
Sleeping
| # src/embedding_generator/embedder.py | |
| from langchain_ollama import OllamaEmbeddings # cite: embed_pipeline.py, query_pipeline.py | |
| from config.settings import OLLAMA_URL, EMBED_MODEL | |
| import logging | |
| from typing import List | |
| logger = logging.getLogger(__name__) | |
| class EmbeddingGenerator: | |
| """ | |
| Manages the embedding model and generates embeddings. | |
| """ | |
| def __init__(self): | |
| # Initialize the OllamaEmbeddings model | |
| # --- Financial Ministry Adaptation --- | |
| # Consider adding error handling for unreachable Ollama server. | |
| # For production, evaluate if Ollama is suitable or if a more robust/managed | |
| # embedding service is required based on load and reliability needs. | |
| # ------------------------------------ | |
| try: | |
| self.embedder = OllamaEmbeddings(base_url=OLLAMA_URL, model=EMBED_MODEL) # cite: embed_pipeline.py, query_pipeline.py | |
| logger.info(f"Initialized embedding model: {EMBED_MODEL} at {OLLAMA_URL}") | |
| except Exception as e: | |
| logger.critical(f"Failed to initialize embedding model: {e}") | |
| # Depending on requirements, you might want to re-raise or exit | |
| raise e | |
| def generate_embeddings(self, texts: List[str]) -> List[List[float]]: | |
| """ | |
| Generates embeddings for a list of text inputs. | |
| Args: | |
| texts: A list of strings to embed. | |
| Returns: | |
| A list of embeddings (list of floats). | |
| """ | |
| # --- Financial Ministry Adaptation --- | |
| # Implement retry logic for API calls to the embedding service. | |
| # Consider potential rate limits. | |
| # ------------------------------------ | |
| try: | |
| embeddings = self.embedder.embed_documents(texts) # Used internally by add_documents, but good to have explicit method | |
| # If using embed_query for a single text: | |
| # embedding = self.embedder.embed_query(texts[0]) | |
| logger.debug(f"Generated {len(embeddings)} embeddings.") | |
| return embeddings | |
| except Exception as e: | |
| logger.error(f"Failed to generate embeddings: {e}") | |
| raise e | |
| def generate_query_embedding(self, text: str) -> List[float]: | |
| """ | |
| Generates an embedding for a single query text. | |
| Args: | |
| text: The query string. | |
| Returns: | |
| An embedding (list of floats). | |
| """ | |
| # --- Financial Ministry Adaptation --- | |
| # Implement retry logic for API calls. | |
| # ------------------------------------ | |
| try: | |
| embedding = self.embedder.embed_query(text) # cite: query_pipeline.py (implicitly used by retriever) | |
| logger.debug("Generated query embedding.") | |
| return embedding | |
| except Exception as e: | |
| logger.error(f"Failed to generate query embedding: {e}") | |
| raise e |