axionx-demo / retailgpt_evaluator /dataset_loader.py
deepsodha's picture
Upload 25 files
beb5479 verified
raw
history blame contribute delete
705 Bytes
from datasets import load_dataset
import pandas as pd, os
def load_retail_dataset():
"""
Loads a retail/e-commerce QA dataset from HF (small sample)
or synthetically creates one for evaluation.
"""
dataset = load_dataset("amazon_polarity", split="train[:200]")
df = pd.DataFrame(dataset)
df["question"] = "Customer asks about this review: " + df["title"]
df["answer"] = df["content"]
sample = df[["question", "answer"]]
os.makedirs("datasets", exist_ok=True)
sample.to_json("datasets/retail_sample.jsonl", orient="records", lines=True)
print("βœ… Saved datasets/retail_sample.jsonl")
return sample
if __name__ == "__main__":
load_retail_dataset()