Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
6a4b44c
1
Parent(s):
cb13c5d
update embedding model
Browse files- load_viewer_data.py +4 -4
load_viewer_data.py
CHANGED
|
@@ -17,10 +17,10 @@ from utils import get_chroma_client
|
|
| 17 |
logger = logging.getLogger(__name__)
|
| 18 |
logger.setLevel(logging.INFO)
|
| 19 |
|
| 20 |
-
EMBEDDING_MODEL_NAME = "davanstrien/dataset-viewer-descriptions
|
| 21 |
-
EMBEDDING_MODEL_REVISION = "
|
| 22 |
INFERENCE_MODEL_URL = (
|
| 23 |
-
"https://
|
| 24 |
)
|
| 25 |
|
| 26 |
|
|
@@ -61,7 +61,7 @@ def embed_and_upsert_datasets(
|
|
| 61 |
dataset_rows_and_ids: list[dict[str, str]],
|
| 62 |
collection: chromadb.Collection,
|
| 63 |
inference_client: InferenceClient,
|
| 64 |
-
batch_size: int =
|
| 65 |
):
|
| 66 |
logger.info(
|
| 67 |
f"Embedding and upserting {len(dataset_rows_and_ids)} datasets for viewer data"
|
|
|
|
| 17 |
logger = logging.getLogger(__name__)
|
| 18 |
logger.setLevel(logging.INFO)
|
| 19 |
|
| 20 |
+
EMBEDDING_MODEL_NAME = "davanstrien/query-to-dataset-viewer-descriptions"
|
| 21 |
+
EMBEDDING_MODEL_REVISION = "07c71d97861a73695f0c53cd6b4b32980007d908"
|
| 22 |
INFERENCE_MODEL_URL = (
|
| 23 |
+
"https://ecg0by60w2vo9j8h.us-east-1.aws.endpoints.huggingface.cloud"
|
| 24 |
)
|
| 25 |
|
| 26 |
|
|
|
|
| 61 |
dataset_rows_and_ids: list[dict[str, str]],
|
| 62 |
collection: chromadb.Collection,
|
| 63 |
inference_client: InferenceClient,
|
| 64 |
+
batch_size: int = 100,
|
| 65 |
):
|
| 66 |
logger.info(
|
| 67 |
f"Embedding and upserting {len(dataset_rows_and_ids)} datasets for viewer data"
|