Spaces:
Runtime error
Runtime error
Create seed_supabase.py
Browse files- seed_supabase.py +35 -0
seed_supabase.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from datasets import load_dataset
|
| 3 |
+
from langchain.schema import Document
|
| 4 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
| 5 |
+
from langchain.vectorstores import SupabaseVectorStore
|
| 6 |
+
from supabase import create_client
|
| 7 |
+
|
| 8 |
+
# 1. Load GAIA train split
|
| 9 |
+
dataset = load_dataset("gaia-benchmark/GAIA", split="train")
|
| 10 |
+
|
| 11 |
+
# 2. Build Documents: "Q: …\nA: …"
|
| 12 |
+
docs = []
|
| 13 |
+
for ex in dataset:
|
| 14 |
+
q, a = ex["question"], ex["answer"]
|
| 15 |
+
docs.append(Document(
|
| 16 |
+
page_content=f"Q: {q}\nA: {a}",
|
| 17 |
+
metadata={"task_id": ex.get("task_id"), "split": "train"}
|
| 18 |
+
))
|
| 19 |
+
|
| 20 |
+
# 3. Initialize embedding & Supabase client
|
| 21 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
| 22 |
+
supabase_url = os.environ["SUPABASE_URL"]
|
| 23 |
+
supabase_key = os.environ["SUPABASE_SERVICE_KEY"]
|
| 24 |
+
supabase = create_client(supabase_url, supabase_key)
|
| 25 |
+
|
| 26 |
+
# 4. Upload to Supabase
|
| 27 |
+
vectorstore = SupabaseVectorStore.from_documents(
|
| 28 |
+
docs,
|
| 29 |
+
embedding=embeddings,
|
| 30 |
+
client=supabase,
|
| 31 |
+
table_name="documents",
|
| 32 |
+
query_name="match_documents_langchain"
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
print(f"Seeded {len(docs)} GAIA examples into Supabase.")
|