Spaces:
Running
on
Zero
Running
on
Zero
| """Generate json file for webpage.""" | |
| import json | |
| import os | |
| import re | |
| # models = ['llama', 'alpaca', 'gpt35', 'bard'] | |
| models = ['vicuna'] | |
| def read_jsonl(path: str, key: str=None): | |
| data = [] | |
| with open(os.path.expanduser(path)) as f: | |
| for line in f: | |
| if not line: | |
| continue | |
| data.append(json.loads(line)) | |
| if key is not None: | |
| data.sort(key=lambda x: x[key]) | |
| data = {item[key]: item for item in data} | |
| return data | |
| def trim_hanging_lines(s: str, n: int) -> str: | |
| s = s.strip() | |
| for _ in range(n): | |
| s = s.split('\n', 1)[1].strip() | |
| return s | |
| if __name__ == '__main__': | |
| questions = read_jsonl('table/question.jsonl', key='question_id') | |
| # alpaca_answers = read_jsonl('table/answer/answer_alpaca-13b.jsonl', key='question_id') | |
| # bard_answers = read_jsonl('table/answer/answer_bard.jsonl', key='question_id') | |
| # gpt35_answers = read_jsonl('table/answer/answer_gpt35.jsonl', key='question_id') | |
| # llama_answers = read_jsonl('table/answer/answer_llama-13b.jsonl', key='question_id') | |
| vicuna_answers = read_jsonl('table/answer/answer_vicuna-13b.jsonl', key='question_id') | |
| ours_answers = read_jsonl('table/results/llama-13b-hf-alpaca.jsonl', key='question_id') | |
| review_vicuna = read_jsonl('table/review/review_vicuna-13b_llama-13b-hf-alpaca.jsonl', key='question_id') | |
| # review_alpaca = read_jsonl('table/review/review_alpaca-13b_vicuna-13b.jsonl', key='question_id') | |
| # review_bard = read_jsonl('table/review/review_bard_vicuna-13b.jsonl', key='question_id') | |
| # review_gpt35 = read_jsonl('table/review/review_gpt35_vicuna-13b.jsonl', key='question_id') | |
| # review_llama = read_jsonl('table/review/review_llama-13b_vicuna-13b.jsonl', key='question_id') | |
| records = [] | |
| for qid in questions.keys(): | |
| r = { | |
| 'id': qid, | |
| 'category': questions[qid]['category'], | |
| 'question': questions[qid]['text'], | |
| 'answers': { | |
| # 'alpaca': alpaca_answers[qid]['text'], | |
| # 'llama': llama_answers[qid]['text'], | |
| # 'bard': bard_answers[qid]['text'], | |
| # 'gpt35': gpt35_answers[qid]['text'], | |
| 'vicuna': vicuna_answers[qid]['text'], | |
| 'ours': ours_answers[qid]['text'], | |
| }, | |
| 'evaluations': { | |
| # 'alpaca': review_alpaca[qid]['text'], | |
| # 'llama': review_llama[qid]['text'], | |
| # 'bard': review_bard[qid]['text'], | |
| 'vicuna': review_vicuna[qid]['content'], | |
| # 'gpt35': review_gpt35[qid]['text'], | |
| }, | |
| 'scores': { | |
| 'vicuna': review_vicuna[qid]['tuple'], | |
| # 'alpaca': review_alpaca[qid]['score'], | |
| # 'llama': review_llama[qid]['score'], | |
| # 'bard': review_bard[qid]['score'], | |
| # 'gpt35': review_gpt35[qid]['score'], | |
| }, | |
| } | |
| # cleanup data | |
| cleaned_evals = {} | |
| for k, v in r['evaluations'].items(): | |
| v = v.strip() | |
| lines = v.split('\n') | |
| # trim the first line if it's a pair of numbers | |
| if re.match(r'\d+[, ]+\d+', lines[0]): | |
| lines = lines[1:] | |
| v = '\n'.join(lines) | |
| cleaned_evals[k] = v.replace('Assistant 1', "**Assistant 1**").replace('Assistant 2', '**Assistant 2**') | |
| r['evaluations'] = cleaned_evals | |
| records.append(r) | |
| # Reorder the records, this is optional | |
| for r in records: | |
| if r['id'] <= 20: | |
| r['id'] += 60 | |
| else: | |
| r['id'] -= 20 | |
| for r in records: | |
| if r['id'] <= 50: | |
| r['id'] += 10 | |
| elif 50 < r['id'] <= 60: | |
| r['id'] -= 50 | |
| for r in records: | |
| if r['id'] == 7: | |
| r['id'] = 1 | |
| elif r['id'] < 7: | |
| r['id'] += 1 | |
| records.sort(key=lambda x: x['id']) | |
| # Write to file | |
| with open('webpage/data.json', 'w') as f: | |
| json.dump({'questions': records, 'models': models}, f, indent=2) | |