Spaces:
Sleeping
Sleeping
File size: 1,862 Bytes
24177aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
from typing import List, Dict, Any, Optional
from pydantic import BaseModel, Field
class PaperModel(BaseModel):
"""Model for ArXiv paper information."""
id: str
title: str
authors: List[str]
summary: str
published: str
updated: Optional[str] = None
pdf_url: Optional[str] = None
categories: List[str] = []
links: List[str] = []
class DocumentChunkModel(BaseModel):
"""Model for document chunks."""
id: str
paper_id: str
title: str
authors: str
published: str
chunk_index: int
total_chunks: int
text: str
pdf_url: Optional[str] = None
categories: List[str] = []
class SearchResultModel(BaseModel):
"""Model for search results."""
document: DocumentChunkModel
score: float
class QueryRequestModel(BaseModel):
"""Model for RAG query request."""
query: str
top_k: int = Field(5, description="Number of documents to retrieve")
model: Optional[str] = None
categories: Optional[List[str]] = None
class SearchRequestModel(BaseModel):
"""Model for search request."""
query: str
top_k: int = Field(10, description="Number of documents to retrieve")
categories: Optional[List[str]] = None
class RAGResponseModel(BaseModel):
"""Model for RAG response."""
query: str
answer: str
search_results_markdown: str
raw_results: List[SearchResultModel]
processing_time: float
timestamp: str
class ModelInfoModel(BaseModel):
"""Model for LLM model information."""
model_name: str
dimension: int
max_seq_length: int
normalize_embeddings: bool
class SystemStatsModel(BaseModel):
"""Model for system statistics."""
papers_in_cache: int
total_chunks: int
vector_db: Dict[str, Any]
embedding_model: ModelInfoModel
default_llm_model: str
available_models: List[str] |