axionx-demo / financegpt /dataset_loader.py
deepsodha's picture
Upload 25 files
beb5479 verified
raw
history blame contribute delete
857 Bytes
from datasets import load_dataset
import pandas as pd
import os
def load_finance_dataset():
"""
Loads a small sample of SEC 10-K/10-Q Q&A style data.
Replace with your own dataset or HF dataset ID.
"""
dataset = load_dataset("Abirate/financial_phrasebank", split="train[:100]")
df = pd.DataFrame(dataset)
# Create synthetic QA pairs for demo
df["question"] = "Summarize this financial statement: " + df["sentence"]
df["answer"] = df["label"].astype(str)
dataset_dict = df[["question", "answer"]].to_dict(orient="records")
os.makedirs("datasets", exist_ok=True)
pd.DataFrame(dataset_dict).to_json("datasets/financegpt_sample.jsonl", orient="records", lines=True)
print("βœ… Saved dataset to datasets/financegpt_sample.jsonl")
return dataset_dict
if __name__ == "__main__":
load_finance_dataset()