Spaces:
Running
Running
File size: 1,204 Bytes
beb5479 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
import json
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from shared.metrics import compute_rouge, compute_bleu, factuality_score
from shared.utils import print_banner
def evaluate_model(model_path="models/financegpt"):
print_banner("Evaluating FinanceGPT")
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
dataset = load_dataset("json", data_files="datasets/financegpt_sample.jsonl", split="train[:50]")
preds, refs = [], []
for row in dataset:
inputs = tokenizer(row["question"], return_tensors="pt", truncation=True)
output = model.generate(**inputs, max_new_tokens=64)
preds.append(tokenizer.decode(output[0], skip_special_tokens=True))
refs.append(row["answer"])
results = {}
results.update(compute_rouge(preds, refs))
results.update(compute_bleu(preds, refs))
results.update(factuality_score(preds, refs))
with open("models/financegpt/eval_results.json", "w") as f:
json.dump(results, f, indent=2)
print("β
Evaluation complete:", results)
if __name__ == "__main__":
evaluate_model()
|