Spaces:

davanstrien
/

huggingface-datasets-search-v2

Running on CPU Upgrade

App Files Files Community

davanstrien HF Staff commited on Aug 15, 2024

Commit

9ed5b2c

1 Parent(s): abbed11

chore: Refactor error handling in api_query_dataset

Browse files

Files changed (1) hide show

main.py +32 -13

main.py CHANGED Viewed

@@ -9,6 +9,7 @@ from httpx import AsyncClient
 from huggingface_hub import DatasetCard
 from pydantic import BaseModel
 from starlette.responses import RedirectResponse
 from load_data import get_embedding_function, get_save_path, refresh_data
@@ -31,15 +32,6 @@ async_client = AsyncClient(
 )
-class QueryResult(BaseModel):
-    dataset_id: str
-    similarity: float
-class QueryResponse(BaseModel):
-    results: List[QueryResult]
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     global collection
@@ -88,6 +80,23 @@ async def try_get_card(hub_id: str) -> Optional[str]:
         return None
 @app.get("/similar", response_model=QueryResponse)
 @cache(ttl="1h")
 async def api_query_dataset(dataset_id: str, n: int = Query(default=10, ge=1, le=100)):
@@ -101,16 +110,18 @@ async def api_query_dataset(dataset_id: str, n: int = Query(default=10, ge=1, le
                 embedding_function = get_embedding_function()
                 card = await try_get_card(dataset_id)
                 if card is None:
-                    return QueryResponse(message="No dataset card available for recommendations.")
                 embeddings = embedding_function(card)
                 collection.upsert(ids=[dataset_id], embeddings=embeddings[0])
                 logger.info(f"Dataset {dataset_id} added to collection")
                 result = collection.get(ids=[dataset_id], include=["embeddings"])
             except Exception as e:
                 logger.error(
                     f"Error adding dataset {dataset_id} to collection: {str(e)}"
                 )
-                return QueryResponse(message="No dataset card available for recommendations.")
         embedding = result["embeddings"][0]
@@ -121,7 +132,9 @@ async def api_query_dataset(dataset_id: str, n: int = Query(default=10, ge=1, le
         if not query_result["ids"]:
             logger.info(f"No similar datasets found for: {dataset_id}")
-            return QueryResponse(message="No similar datasets found.")
         # Prepare the response
         results = [
@@ -134,9 +147,15 @@ async def api_query_dataset(dataset_id: str, n: int = Query(default=10, ge=1, le
         logger.info(f"Found {len(results)} similar datasets for: {dataset_id}")
         return QueryResponse(results=results)
     except Exception as e:
         logger.error(f"Error querying dataset {dataset_id}: {str(e)}")
-        raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     import uvicorn

 from huggingface_hub import DatasetCard
 from pydantic import BaseModel
 from starlette.responses import RedirectResponse
+from starlette.status import HTTP_404_NOT_FOUND, HTTP_500_INTERNAL_SERVER_ERROR
 from load_data import get_embedding_function, get_save_path, refresh_data
 )
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     global collection
         return None
+class QueryResult(BaseModel):
+    dataset_id: str
+    similarity: float
+class QueryResponse(BaseModel):
+    results: List[QueryResult]
+class DatasetCardNotFoundError(HTTPException):
+    def __init__(self, dataset_id: str):
+        super().__init__(
+            status_code=HTTP_404_NOT_FOUND,
+            detail=f"No dataset card available for dataset: {dataset_id}",
+        )
 @app.get("/similar", response_model=QueryResponse)
 @cache(ttl="1h")
 async def api_query_dataset(dataset_id: str, n: int = Query(default=10, ge=1, le=100)):
                 embedding_function = get_embedding_function()
                 card = await try_get_card(dataset_id)
                 if card is None:
+                    raise DatasetCardNotFoundError(dataset_id)
                 embeddings = embedding_function(card)
                 collection.upsert(ids=[dataset_id], embeddings=embeddings[0])
                 logger.info(f"Dataset {dataset_id} added to collection")
                 result = collection.get(ids=[dataset_id], include=["embeddings"])
+            except DatasetCardNotFoundError:
+                raise
             except Exception as e:
                 logger.error(
                     f"Error adding dataset {dataset_id} to collection: {str(e)}"
                 )
+                raise DatasetCardNotFoundError(dataset_id) from e
         embedding = result["embeddings"][0]
         if not query_result["ids"]:
             logger.info(f"No similar datasets found for: {dataset_id}")
+            raise HTTPException(
+                status_code=HTTP_404_NOT_FOUND, detail="No similar datasets found."
+            )
         # Prepare the response
         results = [
         logger.info(f"Found {len(results)} similar datasets for: {dataset_id}")
         return QueryResponse(results=results)
+    except (HTTPException, DatasetCardNotFoundError):
+        raise
     except Exception as e:
         logger.error(f"Error querying dataset {dataset_id}: {str(e)}")
+        raise HTTPException(
+            status_code=HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="An unexpected error occurred.",
+        ) from e
 if __name__ == "__main__":
     import uvicorn