Spaces:
Running
Running
Add documentation + fix bugs
Browse files- app.py +92 -10
- classes.py +9 -0
- documentation.md +48 -0
- schemas.py +152 -25
app.py
CHANGED
|
@@ -58,7 +58,7 @@ def get_tdoc_url(doc_id):
|
|
| 58 |
for tdoc in tdoc_locations:
|
| 59 |
if tdoc["doc_id"] == doc_id:
|
| 60 |
return tdoc["url"]
|
| 61 |
-
return "Document not indexed (
|
| 62 |
|
| 63 |
def get_spec_url(document):
|
| 64 |
series = document.split(".")[0].zfill(2)
|
|
@@ -74,7 +74,33 @@ def get_document(spec_id: str, spec_title: str, source: str):
|
|
| 74 |
text.extend([section['section'], section['content']])
|
| 75 |
return text
|
| 76 |
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
app.add_middleware(
|
| 79 |
CORSMiddleware,
|
| 80 |
allow_origins=["*"],
|
|
@@ -92,13 +118,35 @@ valid_3gpp_spec_format = re.compile(r'^\d{2}\.\d{3}(?:-\d+)?')
|
|
| 92 |
valid_etsi_doc_format = re.compile(r'^(?:SET|SCP|SETTEC|SETREQ|SCPTEC|SCPREQ)\(\d+\)\d+(?:r\d+)?', flags=re.IGNORECASE)
|
| 93 |
valid_etsi_spec_format = re.compile(r'^\d{3} \d{3}(?:-\d+)?')
|
| 94 |
|
| 95 |
-
@app.post("/find", response_model=DocResponse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
def find_document(request: DocRequest):
|
| 97 |
start_time = time.time()
|
| 98 |
document = request.doc_id
|
| 99 |
-
source = request.source
|
| 100 |
-
spec_metadatas = spec_metadatas_3gpp if source == "3GPP" else spec_metadatas_etsi if source == "ETSI" else spec_metadatas_3gpp + spec_metadatas_etsi
|
| 101 |
-
is_3gpp = valid_3gpp_doc_format.match(document) or valid_3gpp_spec_format.match(document)
|
| 102 |
|
| 103 |
url = get_tdoc_url(document) if valid_3gpp_doc_format.match(document) else \
|
| 104 |
get_spec_url(document) if valid_3gpp_spec_format.match(document) else \
|
|
@@ -108,9 +156,10 @@ def find_document(request: DocRequest):
|
|
| 108 |
raise HTTPException(status_code=404, detail=url)
|
| 109 |
|
| 110 |
version = None
|
| 111 |
-
if
|
| 112 |
version = url.split("/")[-1].replace(".zip", "").split("-")[-1]
|
| 113 |
scope = None
|
|
|
|
| 114 |
for spec in spec_metadatas:
|
| 115 |
if spec['id'] == document:
|
| 116 |
scope = spec['scope']
|
|
@@ -124,7 +173,23 @@ def find_document(request: DocRequest):
|
|
| 124 |
scope=scope
|
| 125 |
)
|
| 126 |
|
| 127 |
-
@app.post("/batch", response_model=BatchDocResponse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
def find_document_batch(request: BatchDocRequest):
|
| 129 |
start_time = time.time()
|
| 130 |
documents = request.doc_ids
|
|
@@ -148,7 +213,17 @@ def find_document_batch(request: BatchDocRequest):
|
|
| 148 |
search_time=time.time()-start_time
|
| 149 |
)
|
| 150 |
|
| 151 |
-
@app.post('/search
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
def search_specifications(request: KeywordRequest):
|
| 153 |
start_time = time.time()
|
| 154 |
boolSensitiveCase = request.case_sensitive
|
|
@@ -215,7 +290,14 @@ def search_specifications(request: KeywordRequest):
|
|
| 215 |
else:
|
| 216 |
raise HTTPException(status_code=404, detail="Specifications not found")
|
| 217 |
|
| 218 |
-
@app.post("/search
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
def bm25_search_specification(request: BM25KeywordRequest):
|
| 220 |
start_time = time.time()
|
| 221 |
source = request.source
|
|
|
|
| 58 |
for tdoc in tdoc_locations:
|
| 59 |
if tdoc["doc_id"] == doc_id:
|
| 60 |
return tdoc["url"]
|
| 61 |
+
return "Document not indexed (re-indexing documents ?)"
|
| 62 |
|
| 63 |
def get_spec_url(document):
|
| 64 |
series = document.split(".")[0].zfill(2)
|
|
|
|
| 74 |
text.extend([section['section'], section['content']])
|
| 75 |
return text
|
| 76 |
|
| 77 |
+
tags_metadata = [
|
| 78 |
+
{
|
| 79 |
+
"name": "Document Retrieval",
|
| 80 |
+
"description": """
|
| 81 |
+
Direct document lookup operations for retrieving specific documents by their unique identifiers.
|
| 82 |
+
|
| 83 |
+
These endpoints provide fast access to document URLs, versions, and metadata without requiring keyword searches.
|
| 84 |
+
Perfect for when you know the exact document ID you're looking for.
|
| 85 |
+
""",
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"name": "Content Search",
|
| 89 |
+
"description": """
|
| 90 |
+
Advanced search operations for finding documents based on keywords and content matching.
|
| 91 |
+
|
| 92 |
+
Includes both quick metadata-based searches and deep content analysis with flexible filtering options.
|
| 93 |
+
Supports different search modes and logical operators for precise results.
|
| 94 |
+
""",
|
| 95 |
+
},
|
| 96 |
+
]
|
| 97 |
+
|
| 98 |
+
app = FastAPI(
|
| 99 |
+
title="3GPP & ETSI Document Finder API",
|
| 100 |
+
description=open('documentation.md').read(),
|
| 101 |
+
openapi_tags=tags_metadata
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
app.add_middleware(
|
| 105 |
CORSMiddleware,
|
| 106 |
allow_origins=["*"],
|
|
|
|
| 118 |
valid_etsi_doc_format = re.compile(r'^(?:SET|SCP|SETTEC|SETREQ|SCPTEC|SCPREQ)\(\d+\)\d+(?:r\d+)?', flags=re.IGNORECASE)
|
| 119 |
valid_etsi_spec_format = re.compile(r'^\d{3} \d{3}(?:-\d+)?')
|
| 120 |
|
| 121 |
+
@app.post("/find/single", response_model=DocResponse, tags=["Document Retrieval"], summary="Retrieve a single document by ID", responses={
|
| 122 |
+
200: {
|
| 123 |
+
"description": "Document found successfully",
|
| 124 |
+
"content": {
|
| 125 |
+
"application/json": {
|
| 126 |
+
"example": {
|
| 127 |
+
"doc_id": "23.401",
|
| 128 |
+
"url": "https://www.3gpp.org/ftp/Specs/archive/23_series/23.401/23401-h20.zip",
|
| 129 |
+
"version": "h20",
|
| 130 |
+
"scope": "General Packet Radio Service (GPRS) enhancements for Evolved Universal Terrestrial Radio Access Network (E-UTRAN) access",
|
| 131 |
+
"search_time": 0.0234
|
| 132 |
+
}
|
| 133 |
+
}
|
| 134 |
+
}
|
| 135 |
+
},
|
| 136 |
+
404: {
|
| 137 |
+
"description": "Document not found or not indexed",
|
| 138 |
+
"content": {
|
| 139 |
+
"application/json": {
|
| 140 |
+
"example": {
|
| 141 |
+
"detail": "Specification 99.999 not found"
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
}
|
| 145 |
+
}
|
| 146 |
+
})
|
| 147 |
def find_document(request: DocRequest):
|
| 148 |
start_time = time.time()
|
| 149 |
document = request.doc_id
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
url = get_tdoc_url(document) if valid_3gpp_doc_format.match(document) else \
|
| 152 |
get_spec_url(document) if valid_3gpp_spec_format.match(document) else \
|
|
|
|
| 156 |
raise HTTPException(status_code=404, detail=url)
|
| 157 |
|
| 158 |
version = None
|
| 159 |
+
if valid_3gpp_spec_format.match(document):
|
| 160 |
version = url.split("/")[-1].replace(".zip", "").split("-")[-1]
|
| 161 |
scope = None
|
| 162 |
+
spec_metadatas = spec_metadatas_3gpp if valid_3gpp_spec_format.match(document) else spec_metadatas_etsi
|
| 163 |
for spec in spec_metadatas:
|
| 164 |
if spec['id'] == document:
|
| 165 |
scope = spec['scope']
|
|
|
|
| 173 |
scope=scope
|
| 174 |
)
|
| 175 |
|
| 176 |
+
@app.post("/find/batch", response_model=BatchDocResponse, summary="Retrieve multiple documents by IDs", tags=["Document Retrieval"], responses={
|
| 177 |
+
200: {
|
| 178 |
+
"description": "Batch processing completed",
|
| 179 |
+
"content": {
|
| 180 |
+
"application/json": {
|
| 181 |
+
"example": {
|
| 182 |
+
"results": {
|
| 183 |
+
"23.401": "https://www.3gpp.org/ftp/Specs/archive/23_series/23.401/23401-h20.zip",
|
| 184 |
+
"S1-123456": "https://www.3gpp.org/ftp/tsg_sa/WG1_Serv/TSGSI_123/Docs/S1-123456.zip"
|
| 185 |
+
},
|
| 186 |
+
"missing": ["99.999", "INVALID-DOC"],
|
| 187 |
+
"search_time": 0.156
|
| 188 |
+
}
|
| 189 |
+
}
|
| 190 |
+
}
|
| 191 |
+
}
|
| 192 |
+
})
|
| 193 |
def find_document_batch(request: BatchDocRequest):
|
| 194 |
start_time = time.time()
|
| 195 |
documents = request.doc_ids
|
|
|
|
| 213 |
search_time=time.time()-start_time
|
| 214 |
)
|
| 215 |
|
| 216 |
+
@app.post('/search', response_model=KeywordResponse, tags=["Content Search"], summary="Search specifications by keywords", responses={
|
| 217 |
+
200: {
|
| 218 |
+
"description": "Search completed successfully"
|
| 219 |
+
},
|
| 220 |
+
400: {
|
| 221 |
+
"description": "You must enter keywords in deep search mode"
|
| 222 |
+
},
|
| 223 |
+
404: {
|
| 224 |
+
"description": "No specifications found matching the criteria"
|
| 225 |
+
}
|
| 226 |
+
})
|
| 227 |
def search_specifications(request: KeywordRequest):
|
| 228 |
start_time = time.time()
|
| 229 |
boolSensitiveCase = request.case_sensitive
|
|
|
|
| 290 |
else:
|
| 291 |
raise HTTPException(status_code=404, detail="Specifications not found")
|
| 292 |
|
| 293 |
+
@app.post("/search/bm25", response_model=KeywordResponse, tags=["Content Search"], summary="Advanced BM25 search with relevance scoring", responses={
|
| 294 |
+
200: {
|
| 295 |
+
"description": "BM25 search completed successfully"
|
| 296 |
+
},
|
| 297 |
+
404: {
|
| 298 |
+
"description": "No specifications found above the relevance threshold"
|
| 299 |
+
}
|
| 300 |
+
})
|
| 301 |
def bm25_search_specification(request: BM25KeywordRequest):
|
| 302 |
start_time = time.time()
|
| 303 |
source = request.source
|
classes.py
CHANGED
|
@@ -59,6 +59,7 @@ class ETSIDocFinder:
|
|
| 59 |
class ETSISpecFinder:
|
| 60 |
def __init__(self):
|
| 61 |
self.main_url = "https://www.etsi.org/deliver/etsi_ts"
|
|
|
|
| 62 |
self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"}
|
| 63 |
|
| 64 |
def get_spec_path(self, doc_id: str):
|
|
@@ -89,12 +90,20 @@ class ETSISpecFinder:
|
|
| 89 |
original = doc_id
|
| 90 |
|
| 91 |
url = f"{self.main_url}/{self.get_spec_path(original)}/"
|
|
|
|
| 92 |
print(url)
|
|
|
|
| 93 |
|
| 94 |
releases = self.get_docs_from_url(url)
|
| 95 |
files = self.get_docs_from_url(url + releases[-1])
|
| 96 |
for f in files:
|
| 97 |
if f.endswith(".pdf"):
|
| 98 |
return url + releases[-1] + "/" + f
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
return f"Specification {doc_id} not found"
|
|
|
|
| 59 |
class ETSISpecFinder:
|
| 60 |
def __init__(self):
|
| 61 |
self.main_url = "https://www.etsi.org/deliver/etsi_ts"
|
| 62 |
+
self.second_url = "https://www.etsi.org/deliver/etsi_tr"
|
| 63 |
self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"}
|
| 64 |
|
| 65 |
def get_spec_path(self, doc_id: str):
|
|
|
|
| 90 |
original = doc_id
|
| 91 |
|
| 92 |
url = f"{self.main_url}/{self.get_spec_path(original)}/"
|
| 93 |
+
url2 = f"{self.second_url}/{self.get_spec_path(original)}/"
|
| 94 |
print(url)
|
| 95 |
+
print(url2)
|
| 96 |
|
| 97 |
releases = self.get_docs_from_url(url)
|
| 98 |
files = self.get_docs_from_url(url + releases[-1])
|
| 99 |
for f in files:
|
| 100 |
if f.endswith(".pdf"):
|
| 101 |
return url + releases[-1] + "/" + f
|
| 102 |
+
|
| 103 |
+
releases = self.get_docs_from_url(url2)
|
| 104 |
+
files = self.get_docs_from_url(url + releases[-1])
|
| 105 |
+
for f in files:
|
| 106 |
+
if f.endswith('.pdf'):
|
| 107 |
+
return url + releases[-1] + "/" + f
|
| 108 |
|
| 109 |
return f"Specification {doc_id} not found"
|
documentation.md
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 📋 Document Finder Backend API
|
| 2 |
+
|
| 3 |
+
A comprehensive REST API for searching and retrieving technical documents and specifications from **3GPP** and **ETSI** organizations.
|
| 4 |
+
|
| 5 |
+
### 🚀 Key Features
|
| 6 |
+
|
| 7 |
+
* **Document Retrieval**: Get direct download URLs and metadata for specific documents
|
| 8 |
+
* **Batch Processing**: Handle multiple document requests simultaneously
|
| 9 |
+
* **Advanced Search**: Multiple search modes with keyword matching
|
| 10 |
+
* **BM25 Scoring**: State-of-the-art relevance ranking using BM25 algorithm
|
| 11 |
+
* **Cross-Organization**: Search across both 3GPP and ETSI document repositories
|
| 12 |
+
|
| 13 |
+
### 📚 Supported Document Types
|
| 14 |
+
|
| 15 |
+
#### 3GPP Documents
|
| 16 |
+
* **TDocs (Technical Documents)**:
|
| 17 |
+
- Format: `S1-123456`, `C4-234567`, `R2-345678`
|
| 18 |
+
- Working group documents from SA, CT, RAN groups
|
| 19 |
+
* **Technical Specifications**:
|
| 20 |
+
- Format: `23.401`, `38.331-16`
|
| 21 |
+
- Official published specifications
|
| 22 |
+
|
| 23 |
+
#### ETSI Documents
|
| 24 |
+
* **TDocs (Technical Documents)**:
|
| 25 |
+
- Format: `SET(25)000001`, `SCPTEQ(19)000011`
|
| 26 |
+
- Committee working documents
|
| 27 |
+
* **Technical Specifications**:
|
| 28 |
+
- Format: `131 102`, `188 008-2`
|
| 29 |
+
- Published ETSI standards
|
| 30 |
+
|
| 31 |
+
### 🔍 Search Capabilities
|
| 32 |
+
|
| 33 |
+
* **Quick Search**: Lightning-fast metadata-only search
|
| 34 |
+
* **Deep Search**: Comprehensive content-based search within document sections
|
| 35 |
+
* **BM25 Search**: Advanced relevance scoring with normalization
|
| 36 |
+
* **Flexible Filtering**: By source organization, document type, and specification category
|
| 37 |
+
|
| 38 |
+
### 🛡️ Data Sources
|
| 39 |
+
|
| 40 |
+
This API indexes and searches through:
|
| 41 |
+
- 3GPP specification metadata and content
|
| 42 |
+
- ETSI specification metadata and content
|
| 43 |
+
- 3GPP TDoc location mappings
|
| 44 |
+
- Pre-built BM25 search indices
|
| 45 |
+
|
| 46 |
+
### 🔧 Technical Stack
|
| 47 |
+
|
| 48 |
+
Built with FastAPI, featuring automatic OpenAPI documentation, request validation, and comprehensive error handling.
|
schemas.py
CHANGED
|
@@ -1,38 +1,165 @@
|
|
| 1 |
-
from pydantic import BaseModel
|
| 2 |
from typing import *
|
| 3 |
|
| 4 |
class DocRequest(BaseModel):
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
class DocResponse(BaseModel):
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
class BatchDocRequest(BaseModel):
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
class BatchDocResponse(BaseModel):
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
class KeywordRequest(BaseModel):
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
class KeywordResponse(BaseModel):
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
from typing import *
|
| 3 |
|
| 4 |
class DocRequest(BaseModel):
|
| 5 |
+
"""
|
| 6 |
+
Request model for single document retrieval.
|
| 7 |
+
|
| 8 |
+
Used to specify which document or specification to retrieve by its unique identifier.
|
| 9 |
+
"""
|
| 10 |
+
doc_id: str = Field(
|
| 11 |
+
...,
|
| 12 |
+
title="Document Identifier",
|
| 13 |
+
description="Unique identifier for the document or specification.",
|
| 14 |
+
)
|
| 15 |
|
| 16 |
class DocResponse(BaseModel):
|
| 17 |
+
"""
|
| 18 |
+
Response model for single document retrieval.
|
| 19 |
+
|
| 20 |
+
Contains all available metadata and access information for the requested document.
|
| 21 |
+
"""
|
| 22 |
+
doc_id: str = Field(
|
| 23 |
+
...,
|
| 24 |
+
title="Document Identifier",
|
| 25 |
+
description="Echoed document identifier from the request"
|
| 26 |
+
)
|
| 27 |
+
url: str = Field(
|
| 28 |
+
...,
|
| 29 |
+
title="Document URL",
|
| 30 |
+
description="Direct download URL"
|
| 31 |
+
)
|
| 32 |
+
version: Optional[str] = Field(
|
| 33 |
+
None,
|
| 34 |
+
title="Document Version",
|
| 35 |
+
description="Extracted version information (e.g., 'h20', 'v17.9.0') when available"
|
| 36 |
+
)
|
| 37 |
+
scope: Optional[str] = Field(
|
| 38 |
+
None,
|
| 39 |
+
title="Document Scope",
|
| 40 |
+
description="Brief description of the document's scope and purpose from metadata"
|
| 41 |
+
)
|
| 42 |
+
search_time: float = Field(
|
| 43 |
+
...,
|
| 44 |
+
title="Search Duration",
|
| 45 |
+
description="Time spent processing the request in seconds"
|
| 46 |
+
)
|
| 47 |
|
| 48 |
class BatchDocRequest(BaseModel):
|
| 49 |
+
"""
|
| 50 |
+
Request model for batch document retrieval.
|
| 51 |
+
|
| 52 |
+
Allows retrieval of multiple documents in a single API call for efficiency.
|
| 53 |
+
"""
|
| 54 |
+
doc_ids: List[str] = Field(
|
| 55 |
+
...,
|
| 56 |
+
title="Document Identifier List",
|
| 57 |
+
description="List of document identifiers to retrieve."
|
| 58 |
+
)
|
| 59 |
|
| 60 |
class BatchDocResponse(BaseModel):
|
| 61 |
+
"""
|
| 62 |
+
Response model for batch document retrieval.
|
| 63 |
+
|
| 64 |
+
Provides organized results separating found documents from missing ones.
|
| 65 |
+
"""
|
| 66 |
+
results: Dict[str, str] = Field(
|
| 67 |
+
...,
|
| 68 |
+
title="Found Documents",
|
| 69 |
+
description="Dictionary mapping document IDs to their corresponding URLs"
|
| 70 |
+
)
|
| 71 |
+
missing: List[str] = Field(
|
| 72 |
+
...,
|
| 73 |
+
title="Missing Documents",
|
| 74 |
+
description="List of document IDs that could not be found or are not indexed"
|
| 75 |
+
)
|
| 76 |
+
search_time: float = Field(
|
| 77 |
+
...,
|
| 78 |
+
title="Total Search Duration",
|
| 79 |
+
description="Total time spent processing the batch request in seconds"
|
| 80 |
+
)
|
| 81 |
|
| 82 |
class KeywordRequest(BaseModel):
|
| 83 |
+
"""
|
| 84 |
+
Request model for keyword-based specification search.
|
| 85 |
+
|
| 86 |
+
Provides flexible search options with multiple modes and filtering capabilities.
|
| 87 |
+
"""
|
| 88 |
+
keywords: Optional[str] = Field(
|
| 89 |
+
"",
|
| 90 |
+
title="Search Keywords",
|
| 91 |
+
description="Comma-separated keywords for searching specifications.",
|
| 92 |
+
examples=["5G NR,authentication", "handover,mobility", "security,encryption"]
|
| 93 |
+
)
|
| 94 |
+
search_mode: Literal["quick", "deep"] = Field(
|
| 95 |
+
...,
|
| 96 |
+
title="Search Mode",
|
| 97 |
+
description="Search mode: 'quick' searches metadata only, 'deep' searches metadata and document content"
|
| 98 |
+
)
|
| 99 |
+
case_sensitive: Optional[bool] = Field(
|
| 100 |
+
False,
|
| 101 |
+
title="Case Sensitive Search",
|
| 102 |
+
description="Enable case-sensitive keyword matching"
|
| 103 |
+
)
|
| 104 |
+
source: Optional[Literal["3GPP", "ETSI", "all"]] = Field(
|
| 105 |
+
"all",
|
| 106 |
+
title="Document Source",
|
| 107 |
+
description="Limit search to specific organization or search all repositories"
|
| 108 |
+
)
|
| 109 |
+
spec_type: Optional[Literal["TS", "TR"]] = Field(
|
| 110 |
+
None,
|
| 111 |
+
title="Specification Type",
|
| 112 |
+
description="Filter by specification type: 'TS' (Technical Specification) or 'TR' (Technical Report)"
|
| 113 |
+
)
|
| 114 |
+
mode: Optional[Literal["and", "or"]] = Field(
|
| 115 |
+
"and",
|
| 116 |
+
title="Search Logic",
|
| 117 |
+
description="Logical operator: 'and' requires all keywords to match, 'or' matches any keyword"
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
class BM25KeywordRequest(BaseModel):
|
| 121 |
+
"""
|
| 122 |
+
Request model for BM25 advanced search.
|
| 123 |
+
|
| 124 |
+
Provides parameters for relevance-based search using BM25 scoring algorithm.
|
| 125 |
+
"""
|
| 126 |
+
keywords: Optional[str] = Field(
|
| 127 |
+
"",
|
| 128 |
+
title="Search Query",
|
| 129 |
+
description="Natural language search query for BM25 processing",
|
| 130 |
+
examples=["5G authentication procedures", "handover mobility management", "security key derivation"]
|
| 131 |
+
)
|
| 132 |
+
source: Optional[Literal["3GPP", "ETSI", "all"]] = Field(
|
| 133 |
+
"all",
|
| 134 |
+
title="Document Source",
|
| 135 |
+
description="Limit search to specific organization repositories"
|
| 136 |
+
)
|
| 137 |
+
threshold: Optional[int] = Field(
|
| 138 |
+
60,
|
| 139 |
+
title="Relevance Threshold",
|
| 140 |
+
description="Minimum normalized BM25 relevance score (0-100) for results inclusion",
|
| 141 |
+
ge=0,
|
| 142 |
+
le=100
|
| 143 |
+
)
|
| 144 |
+
spec_type: Optional[Literal["TS", "TR"]] = Field(
|
| 145 |
+
None,
|
| 146 |
+
title="Specification Type",
|
| 147 |
+
description="Filter results by specification type"
|
| 148 |
+
)
|
| 149 |
|
| 150 |
class KeywordResponse(BaseModel):
|
| 151 |
+
"""
|
| 152 |
+
Response model for keyword and BM25 search results.
|
| 153 |
+
|
| 154 |
+
Contains ranked search results with metadata and timing information.
|
| 155 |
+
"""
|
| 156 |
+
results: List[Dict[str, Any]] = Field(
|
| 157 |
+
...,
|
| 158 |
+
title="Search Results",
|
| 159 |
+
description="List of matching specifications with complete metadata. In deep search mode, includes 'contains' field with matching content sections."
|
| 160 |
+
)
|
| 161 |
+
search_time: float = Field(
|
| 162 |
+
...,
|
| 163 |
+
title="Search Duration",
|
| 164 |
+
description="Time spent processing the search request in seconds"
|
| 165 |
+
)
|