Spaces:
Running
Running
| from datasets import load_dataset | |
| import pandas as pd, os | |
| def load_retail_dataset(): | |
| """ | |
| Loads a retail/e-commerce QA dataset from HF (small sample) | |
| or synthetically creates one for evaluation. | |
| """ | |
| dataset = load_dataset("amazon_polarity", split="train[:200]") | |
| df = pd.DataFrame(dataset) | |
| df["question"] = "Customer asks about this review: " + df["title"] | |
| df["answer"] = df["content"] | |
| sample = df[["question", "answer"]] | |
| os.makedirs("datasets", exist_ok=True) | |
| sample.to_json("datasets/retail_sample.jsonl", orient="records", lines=True) | |
| print("β Saved datasets/retail_sample.jsonl") | |
| return sample | |
| if __name__ == "__main__": | |
| load_retail_dataset() | |