Spaces:
Runtime error
Runtime error
| import os | |
| from datasets import load_dataset | |
| from langchain.schema import Document | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import SupabaseVectorStore | |
| from supabase import create_client | |
| # 1. Load GAIA train split | |
| dataset = load_dataset("gaia-benchmark/GAIA", split="train") | |
| # 2. Build Documents: "Q: β¦\nA: β¦" | |
| docs = [] | |
| for ex in dataset: | |
| q, a = ex["question"], ex["answer"] | |
| docs.append(Document( | |
| page_content=f"Q: {q}\nA: {a}", | |
| metadata={"task_id": ex.get("task_id"), "split": "train"} | |
| )) | |
| # 3. Initialize embedding & Supabase client | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") | |
| supabase_url = os.environ["SUPABASE_URL"] | |
| supabase_key = os.environ["SUPABASE_SERVICE_KEY"] | |
| supabase = create_client(supabase_url, supabase_key) | |
| # 4. Upload to Supabase | |
| vectorstore = SupabaseVectorStore.from_documents( | |
| docs, | |
| embedding=embeddings, | |
| client=supabase, | |
| table_name="documents", | |
| query_name="match_documents_langchain" | |
| ) | |
| print(f"Seeded {len(docs)} GAIA examples into Supabase.") |