from typing import List, Dict, Any, Optional from pydantic import BaseModel, Field class PaperModel(BaseModel): """Model for ArXiv paper information.""" id: str title: str authors: List[str] summary: str published: str updated: Optional[str] = None pdf_url: Optional[str] = None categories: List[str] = [] links: List[str] = [] class DocumentChunkModel(BaseModel): """Model for document chunks.""" id: str paper_id: str title: str authors: str published: str chunk_index: int total_chunks: int text: str pdf_url: Optional[str] = None categories: List[str] = [] class SearchResultModel(BaseModel): """Model for search results.""" document: DocumentChunkModel score: float class QueryRequestModel(BaseModel): """Model for RAG query request.""" query: str top_k: int = Field(5, description="Number of documents to retrieve") model: Optional[str] = None categories: Optional[List[str]] = None class SearchRequestModel(BaseModel): """Model for search request.""" query: str top_k: int = Field(10, description="Number of documents to retrieve") categories: Optional[List[str]] = None class RAGResponseModel(BaseModel): """Model for RAG response.""" query: str answer: str search_results_markdown: str raw_results: List[SearchResultModel] processing_time: float timestamp: str class ModelInfoModel(BaseModel): """Model for LLM model information.""" model_name: str dimension: int max_seq_length: int normalize_embeddings: bool class SystemStatsModel(BaseModel): """Model for system statistics.""" papers_in_cache: int total_chunks: int vector_db: Dict[str, Any] embedding_model: ModelInfoModel default_llm_model: str available_models: List[str]