Spaces:

davanstrien
/

huggingface-datasets-search-v2

Running on CPU Upgrade

App Files Files Community

davanstrien HF Staff commited on Feb 26

Commit

d849643

1 Parent(s): a0c28a9

add trending models and datasets fetching endpoints with summaries

Browse files

Files changed (1) hide show

main.py +153 -0

main.py CHANGED Viewed

@@ -14,12 +14,15 @@ from huggingface_hub import HfApi
 from transformers import AutoTokenizer
 import torch
 import dateutil.parser
 # Configuration constants
 MODEL_NAME = "davanstrien/SmolLM2-360M-tldr-sft-2025-02-12_15-13"
 EMBEDDING_MODEL = "nomic-ai/modernbert-embed-base"
 BATCH_SIZE = 2000
 CACHE_TTL = "60"
 if torch.cuda.is_available():
     DEVICE = "cuda"
@@ -463,6 +466,156 @@ def process_search_results(results, id_field, k, sort_by, exclude_id=None):
     return query_results
 if __name__ == "__main__":
     import uvicorn

 from transformers import AutoTokenizer
 import torch
 import dateutil.parser
+import httpx
+from datetime import datetime
 # Configuration constants
 MODEL_NAME = "davanstrien/SmolLM2-360M-tldr-sft-2025-02-12_15-13"
 EMBEDDING_MODEL = "nomic-ai/modernbert-embed-base"
 BATCH_SIZE = 2000
 CACHE_TTL = "60"
+TRENDING_CACHE_TTL = "900"  # 15 minutes cache for trending data
 if torch.cuda.is_available():
     DEVICE = "cuda"
     return query_results
+async def fetch_trending_models():
+    """Fetch trending models from HuggingFace API"""
+    async with httpx.AsyncClient() as client:
+        response = await client.get("https://huggingface.co/api/models")
+        response.raise_for_status()
+        return response.json()
+@cache(ttl=TRENDING_CACHE_TTL)
+async def get_trending_models_with_summaries(
+    limit: int = 10,
+    min_likes: int = 0,
+    min_downloads: int = 0,
+) -> List[ModelQueryResult]:
+    """Fetch trending models and combine with summaries from database"""
+    try:
+        # Fetch trending models
+        trending_models = await fetch_trending_models()
+        # Filter by minimum likes/downloads
+        trending_models = [
+            model
+            for model in trending_models
+            if model.get("likes", 0) >= min_likes
+            and model.get("downloads", 0) >= min_downloads
+        ]
+        # Sort by trending score and limit
+        trending_models = sorted(
+            trending_models, key=lambda x: x.get("trendingScore", 0), reverse=True
+        )[:limit]
+        # Get model IDs
+        model_ids = [model["modelId"] for model in trending_models]
+        # Fetch summaries from ChromaDB
+        collection = client.get_collection("model_cards")
+        summaries = collection.get(ids=model_ids, include=["documents"])
+        # Create mapping of model_id to summary
+        id_to_summary = dict(zip(summaries["ids"], summaries["documents"]))
+        # Combine data
+        results = []
+        for model in trending_models:
+            if model["modelId"] in id_to_summary:
+                result = ModelQueryResult(
+                    model_id=model["modelId"],
+                    similarity=1.0,  # Not applicable for trending
+                    summary=id_to_summary[model["modelId"]],
+                    likes=model.get("likes", 0),
+                    downloads=model.get("downloads", 0),
+                )
+                results.append(result)
+        return results
+    except Exception as e:
+        logger.error(f"Error fetching trending models: {str(e)}")
+        raise HTTPException(status_code=500, detail="Failed to fetch trending models")
+@app.get("/trending/models", response_model=ModelQueryResponse)
+async def get_trending_models(
+    limit: int = Query(default=10, ge=1, le=100),
+    min_likes: int = Query(default=0, ge=0),
+    min_downloads: int = Query(default=0, ge=0),
+):
+    """Get trending models with their summaries"""
+    results = await get_trending_models_with_summaries(
+        limit=limit, min_likes=min_likes, min_downloads=min_downloads
+    )
+    return ModelQueryResponse(results=results)
+async def fetch_trending_datasets():
+    """Fetch trending datasets from HuggingFace API"""
+    async with httpx.AsyncClient() as client:
+        response = await client.get("https://huggingface.co/api/datasets")
+        response.raise_for_status()
+        return response.json()
+@cache(ttl=TRENDING_CACHE_TTL)
+async def get_trending_datasets_with_summaries(
+    limit: int = 10,
+    min_likes: int = 0,
+    min_downloads: int = 0,
+) -> List[QueryResult]:
+    """Fetch trending datasets and combine with summaries from database"""
+    try:
+        # Fetch trending datasets
+        trending_datasets = await fetch_trending_datasets()
+        # Filter by minimum likes/downloads
+        trending_datasets = [
+            dataset
+            for dataset in trending_datasets
+            if dataset.get("likes", 0) >= min_likes
+            and dataset.get("downloads", 0) >= min_downloads
+        ]
+        # Sort by trending score and limit
+        trending_datasets = sorted(
+            trending_datasets, key=lambda x: x.get("trendingScore", 0), reverse=True
+        )[:limit]
+        # Get dataset IDs
+        dataset_ids = [dataset["id"] for dataset in trending_datasets]
+        # Fetch summaries from ChromaDB
+        collection = client.get_collection("dataset_cards")
+        summaries = collection.get(ids=dataset_ids, include=["documents"])
+        # Create mapping of dataset_id to summary
+        id_to_summary = dict(zip(summaries["ids"], summaries["documents"]))
+        # Combine data
+        results = []
+        for dataset in trending_datasets:
+            if dataset["id"] in id_to_summary:
+                result = QueryResult(
+                    dataset_id=dataset["id"],
+                    similarity=1.0,  # Not applicable for trending
+                    summary=id_to_summary[dataset["id"]],
+                    likes=dataset.get("likes", 0),
+                    downloads=dataset.get("downloads", 0),
+                )
+                results.append(result)
+        return results
+    except Exception as e:
+        logger.error(f"Error fetching trending datasets: {str(e)}")
+        raise HTTPException(status_code=500, detail="Failed to fetch trending datasets")
+@app.get("/trending/datasets", response_model=QueryResponse)
+async def get_trending_datasets(
+    limit: int = Query(default=10, ge=1, le=100),
+    min_likes: int = Query(default=0, ge=0),
+    min_downloads: int = Query(default=0, ge=0),
+):
+    """Get trending datasets with their summaries"""
+    results = await get_trending_datasets_with_summaries(
+        limit=limit, min_likes=min_likes, min_downloads=min_downloads
+    )
+    return QueryResponse(results=results)
 if __name__ == "__main__":
     import uvicorn