Spaces:

NAACL2022
/

CLIP-Caption-Reward

Runtime error

App Files Files Community

CLIP-Caption-Reward / tools /eval_finecapeval.py

akhaliq HF Staff

add files

c80917c over 3 years ago

raw

history blame

5.44 kB


	from tqdm import tqdm
	from pprint import pprint
	import pandas as pd
	import argparse
	import re
	import json
	import nltk
	from nltk.tokenize import word_tokenize
	from nltk.stem.porter import PorterStemmer
	p_stemmer = PorterStemmer()

	# nltk.download('punkt')
	# nltk.download('wordnet')
	# nltk.download('stopwords')

	import language_evaluation
	evaluator = language_evaluation.CocoEvaluator()


	def nltk_process(text):
	# Tokenization
	nltk_tokenList = word_tokenize(text)

	# Stemming
	nltk_stemedList = []
	for word in nltk_tokenList:
	nltk_stemedList.append(p_stemmer.stem(word))

	filtered_sentence = nltk_stemedList

	# Removing Punctuation

	tokens = [re.sub(r'[^a-zA-Z0-9]', '', tok) for tok in filtered_sentence]

	text = " ".join(tokens)

	return text


	def calculate_finegrained_scores(pred_id2sent, id2caption, use_coco_eval=False):
	if use_coco_eval:
	n_total = 0
	refs = []
	hyps = []
	for id, gt_captions in id2caption.items():
	pred_sent = pred_id2sent[id]

	refs.append(gt_captions)
	hyps.append(pred_sent)

	n_total += 1

	print('caption')
	results = evaluator.run_evaluation(hyps, refs)
	pprint(results)

	n_total = 0
	total_score = 0
	for id, gt_phrases in id2background.items():
	pred_sent = pred_id2sent[id]

	score = 0
	n_phrases = len(gt_phrases)

	for gt_phrase in gt_phrases:
	word_score = 0
	for gt_word in gt_phrase.split():
	if gt_word in pred_sent:
	word_score += 1
	if len(gt_phrase.split()) > 0:
	score += word_score / len(gt_phrase.split())

	if n_phrases > 0:
	score /= n_phrases

	total_score += score
	n_total += 1
	print('background')
	# print('# retrieved words:', n_retrieved)
	print(f'Acc: {total_score / n_total * 100:.2f}')

	n_total = 0
	total_score = 0
	for id, gt_phrases in id2object.items():
	pred_sent = pred_id2sent[id]

	score = 0
	n_phrases = len(gt_phrases)

	for gt_phrase in gt_phrases:
	word_score = 0
	for gt_word in gt_phrase.split():
	if gt_word in pred_sent:
	word_score += 1
	if len(gt_phrase.split()) > 0:
	score += word_score / len(gt_phrase.split())

	if n_phrases > 0:
	score /= n_phrases

	total_score += score
	n_total += 1
	print('object')
	# print('# retrieved words:', n_retrieved)
	print(f'Acc: {total_score / n_total * 100:.2f}')

	n_total = 0
	total_score = 0
	for id, gt_phrases in id2relation.items():
	pred_sent = pred_id2sent[id]

	score = 0
	n_phrases = len(gt_phrases)

	for gt_phrase in gt_phrases:
	word_score = 0
	for gt_word in gt_phrase.split():
	if gt_word in pred_sent:
	word_score += 1
	if len(gt_phrase.split()) > 0:
	score += word_score / len(gt_phrase.split())

	if n_phrases > 0:
	score /= n_phrases

	total_score += score
	n_total += 1
	print('relation')
	# print('# retrieved words:', n_retrieved)
	print(f'Acc: {total_score / n_total * 100:.2f}')


	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument('--finecapeval_path', type=str, default="data/FineCapEval.csv")
	parser.add_argument('--generated_id2caption', type=str, default="FineCapEval_results/mle.json")
	args = parser.parse_args()

	df = pd.read_csv(args.finecapeval_path)
	assert df.shape == (5000, 5)

	generated_id2caption = json.load(open(args.generated_id2caption, 'r'))

	print("Preprocessing GT FineCapEval data...")
	id2caption = {}
	id2background = {}
	id2object = {}
	id2relation = {}

	for row in tqdm(df.itertuples(), total=len(df)):

	id = row.image.split('.')[0]
	caption = row.caption
	background = row.background
	object = row.object
	relation = row.relation

	if not isinstance(caption, str):
	continue
	if not isinstance(background, str):
	continue
	if not isinstance(object, str):
	continue
	if not isinstance(relation, str):
	continue

	if id not in id2caption:
	id2caption[id] = []
	id2background[id] = []
	id2object[id] = []
	id2relation[id] = []

	id2caption[id].append(caption)

	phrases = []
	for phrase in background.lower().split('\;'):
	if len(phrase) > 1:
	phrase = nltk_process(phrase)
	phrases.append(phrase)
	id2background[id].extend(phrases)

	phrases = []
	for phrase in object.lower().split('\;'):
	if len(phrase) > 1:
	phrase = nltk_process(phrase)
	phrases.append(phrase)
	id2object[id].extend(phrases)

	phrases = []
	for phrase in relation.lower().split('\;'):
	if len(phrase) > 1:
	phrase = nltk_process(phrase)
	phrases.append(phrase)
	id2relation[id].extend(phrases)

	print("Calculating scores...")
	calculate_finegrained_scores(
	generated_id2caption,
	id2caption,
	use_coco_eval=True)