Spaces:
Running
Running
| from datasets import load_dataset | |
| import pandas as pd | |
| import os | |
| def load_finance_dataset(): | |
| """ | |
| Loads a small sample of SEC 10-K/10-Q Q&A style data. | |
| Replace with your own dataset or HF dataset ID. | |
| """ | |
| dataset = load_dataset("Abirate/financial_phrasebank", split="train[:100]") | |
| df = pd.DataFrame(dataset) | |
| # Create synthetic QA pairs for demo | |
| df["question"] = "Summarize this financial statement: " + df["sentence"] | |
| df["answer"] = df["label"].astype(str) | |
| dataset_dict = df[["question", "answer"]].to_dict(orient="records") | |
| os.makedirs("datasets", exist_ok=True) | |
| pd.DataFrame(dataset_dict).to_json("datasets/financegpt_sample.jsonl", orient="records", lines=True) | |
| print("β Saved dataset to datasets/financegpt_sample.jsonl") | |
| return dataset_dict | |
| if __name__ == "__main__": | |
| load_finance_dataset() | |