Spaces:
Runtime error
Runtime error
| from tqdm import tqdm | |
| from pprint import pprint | |
| import pandas as pd | |
| import argparse | |
| import re | |
| import json | |
| import nltk | |
| from nltk.tokenize import word_tokenize | |
| from nltk.stem.porter import PorterStemmer | |
| p_stemmer = PorterStemmer() | |
| # nltk.download('punkt') | |
| # nltk.download('wordnet') | |
| # nltk.download('stopwords') | |
| import language_evaluation | |
| evaluator = language_evaluation.CocoEvaluator() | |
| def nltk_process(text): | |
| # Tokenization | |
| nltk_tokenList = word_tokenize(text) | |
| # Stemming | |
| nltk_stemedList = [] | |
| for word in nltk_tokenList: | |
| nltk_stemedList.append(p_stemmer.stem(word)) | |
| filtered_sentence = nltk_stemedList | |
| # Removing Punctuation | |
| tokens = [re.sub(r'[^a-zA-Z0-9]', '', tok) for tok in filtered_sentence] | |
| text = " ".join(tokens) | |
| return text | |
| def calculate_finegrained_scores(pred_id2sent, id2caption, use_coco_eval=False): | |
| if use_coco_eval: | |
| n_total = 0 | |
| refs = [] | |
| hyps = [] | |
| for id, gt_captions in id2caption.items(): | |
| pred_sent = pred_id2sent[id] | |
| refs.append(gt_captions) | |
| hyps.append(pred_sent) | |
| n_total += 1 | |
| print('caption') | |
| results = evaluator.run_evaluation(hyps, refs) | |
| pprint(results) | |
| n_total = 0 | |
| total_score = 0 | |
| for id, gt_phrases in id2background.items(): | |
| pred_sent = pred_id2sent[id] | |
| score = 0 | |
| n_phrases = len(gt_phrases) | |
| for gt_phrase in gt_phrases: | |
| word_score = 0 | |
| for gt_word in gt_phrase.split(): | |
| if gt_word in pred_sent: | |
| word_score += 1 | |
| if len(gt_phrase.split()) > 0: | |
| score += word_score / len(gt_phrase.split()) | |
| if n_phrases > 0: | |
| score /= n_phrases | |
| total_score += score | |
| n_total += 1 | |
| print('background') | |
| # print('# retrieved words:', n_retrieved) | |
| print(f'Acc: {total_score / n_total * 100:.2f}') | |
| n_total = 0 | |
| total_score = 0 | |
| for id, gt_phrases in id2object.items(): | |
| pred_sent = pred_id2sent[id] | |
| score = 0 | |
| n_phrases = len(gt_phrases) | |
| for gt_phrase in gt_phrases: | |
| word_score = 0 | |
| for gt_word in gt_phrase.split(): | |
| if gt_word in pred_sent: | |
| word_score += 1 | |
| if len(gt_phrase.split()) > 0: | |
| score += word_score / len(gt_phrase.split()) | |
| if n_phrases > 0: | |
| score /= n_phrases | |
| total_score += score | |
| n_total += 1 | |
| print('object') | |
| # print('# retrieved words:', n_retrieved) | |
| print(f'Acc: {total_score / n_total * 100:.2f}') | |
| n_total = 0 | |
| total_score = 0 | |
| for id, gt_phrases in id2relation.items(): | |
| pred_sent = pred_id2sent[id] | |
| score = 0 | |
| n_phrases = len(gt_phrases) | |
| for gt_phrase in gt_phrases: | |
| word_score = 0 | |
| for gt_word in gt_phrase.split(): | |
| if gt_word in pred_sent: | |
| word_score += 1 | |
| if len(gt_phrase.split()) > 0: | |
| score += word_score / len(gt_phrase.split()) | |
| if n_phrases > 0: | |
| score /= n_phrases | |
| total_score += score | |
| n_total += 1 | |
| print('relation') | |
| # print('# retrieved words:', n_retrieved) | |
| print(f'Acc: {total_score / n_total * 100:.2f}') | |
| if __name__ == '__main__': | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('--finecapeval_path', type=str, default="data/FineCapEval.csv") | |
| parser.add_argument('--generated_id2caption', type=str, default="FineCapEval_results/mle.json") | |
| args = parser.parse_args() | |
| df = pd.read_csv(args.finecapeval_path) | |
| assert df.shape == (5000, 5) | |
| generated_id2caption = json.load(open(args.generated_id2caption, 'r')) | |
| print("Preprocessing GT FineCapEval data...") | |
| id2caption = {} | |
| id2background = {} | |
| id2object = {} | |
| id2relation = {} | |
| for row in tqdm(df.itertuples(), total=len(df)): | |
| id = row.image.split('.')[0] | |
| caption = row.caption | |
| background = row.background | |
| object = row.object | |
| relation = row.relation | |
| if not isinstance(caption, str): | |
| continue | |
| if not isinstance(background, str): | |
| continue | |
| if not isinstance(object, str): | |
| continue | |
| if not isinstance(relation, str): | |
| continue | |
| if id not in id2caption: | |
| id2caption[id] = [] | |
| id2background[id] = [] | |
| id2object[id] = [] | |
| id2relation[id] = [] | |
| id2caption[id].append(caption) | |
| phrases = [] | |
| for phrase in background.lower().split('\;'): | |
| if len(phrase) > 1: | |
| phrase = nltk_process(phrase) | |
| phrases.append(phrase) | |
| id2background[id].extend(phrases) | |
| phrases = [] | |
| for phrase in object.lower().split('\;'): | |
| if len(phrase) > 1: | |
| phrase = nltk_process(phrase) | |
| phrases.append(phrase) | |
| id2object[id].extend(phrases) | |
| phrases = [] | |
| for phrase in relation.lower().split('\;'): | |
| if len(phrase) > 1: | |
| phrase = nltk_process(phrase) | |
| phrases.append(phrase) | |
| id2relation[id].extend(phrases) | |
| print("Calculating scores...") | |
| calculate_finegrained_scores( | |
| generated_id2caption, | |
| id2caption, | |
| use_coco_eval=True) | |