deepsodha's picture
Upload 25 files
beb5479 verified
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
from shared.utils import load_yaml_config, ensure_dir, print_banner
def main():
cfg = load_yaml_config("config.yaml")
print_banner("Training FinanceGPT")
tokenizer = AutoTokenizer.from_pretrained(cfg["base_model"])
model = AutoModelForSeq2SeqLM.from_pretrained(cfg["base_model"])
# LoRA configuration
peft_config = LoraConfig(
r=cfg["train"]["lora_r"],
lora_alpha=cfg["train"]["lora_alpha"],
lora_dropout=cfg["train"]["lora_dropout"],
bias="none",
task_type="SEQ_2_SEQ_LM",
)
model = get_peft_model(model, peft_config)
dataset = load_dataset("json", data_files="datasets/financegpt_sample.jsonl", split="train")
def preprocess(batch):
inputs = tokenizer(batch["question"], truncation=True, padding="max_length", max_length=256)
labels = tokenizer(batch["answer"], truncation=True, padding="max_length", max_length=256)
inputs["labels"] = labels["input_ids"]
return inputs
tokenized = dataset.map(preprocess, batched=True)
args = TrainingArguments(
output_dir="models/financegpt",
per_device_train_batch_size=cfg["train"]["batch_size"],
learning_rate=cfg["train"]["lr"],
num_train_epochs=cfg["train"]["epochs"],
fp16=torch.cuda.is_available(),
save_strategy="epoch",
)
trainer = Trainer(model=model, args=args, train_dataset=tokenized)
trainer.train()
ensure_dir("models/financegpt")
model.save_pretrained("models/financegpt")
tokenizer.save_pretrained("models/financegpt")
print("βœ… Model saved at models/financegpt")
if __name__ == "__main__":
main()