| import os | |
| import json | |
| import pandas as pd | |
| import openpyxl | |
| from tqdm import tqdm | |
| from win32com.client import Dispatch | |
| def just_open(filename): | |
| xlApp = Dispatch("Excel.Application") | |
| xlApp.Visible = False | |
| xlBook = xlApp.Workbooks.Open(os.path.abspath(filename)) | |
| xlBook.Save() | |
| xlBook.Close() | |
| win_rate_prompt = """ | |
| 考虑以下问题: | |
| “{prompt}” | |
| 我们认定一个好的回复需要形式简约、内容详尽、回答正确,请判断以下哪一个回复更好地回答了这个问题? | |
| 回复A: | |
| “{A}” | |
| 回复B: | |
| “{B}” | |
| 请首先用一句话具体比较以上两个回复,阐述哪一个回复更好以及为什么。然后,在新的一行,写明(且仅写出)“A”或“B”以明确指示哪个回复在你的比较中胜出。按以下格式给出你的答复: | |
| 具体比较: | |
| 胜出的回复:<"A"或"B"> | |
| """.strip() | |
| def make_query(prompt,A,B): | |
| q = win_rate_prompt.format(prompt=prompt.strip(), A=A.strip(), B=B.strip()) | |
| a = None | |
| return {"q":q, "a":a} | |
| def read_excel(file): | |
| just_open(filename=file) | |
| workbook: openpyxl.Workbook = openpyxl.load_workbook(filename=file, read_only=True, data_only=True, keep_links=False, keep_vba=False) | |
| sheet = workbook.active | |
| qas = [] | |
| for row in tqdm(sheet.iter_rows(min_row=2, max_row=sheet.max_row, | |
| min_col=1, max_col=5, values_only=True), total=sheet.max_row-1): | |
| assert all([_ is not None for _ in row]) | |
| prompt, A, B, C, D = row | |
| qas.append(make_query(prompt, A, B)) | |
| qas.append(make_query(prompt, C, A)) | |
| qas.append(make_query(prompt, A, D)) | |
| qas.append(make_query(prompt, B, C)) | |
| qas.append(make_query(prompt, D, B)) | |
| qas.append(make_query(prompt, C, D)) | |
| print(f"include {len(qas)} qas from {file}") | |
| return qas | |
| input_dir = "./" | |
| qas = [] | |
| for file in os.listdir(input_dir): | |
| if file.startswith("~$"): | |
| continue | |
| if not file.endswith(".xlsx"): | |
| continue | |
| qas += read_excel(f"{input_dir}/{file}") | |
| with open(f"{input_dir}/qas.json", "w", encoding="utf-8") as f: | |
| f.write(json.dumps(qas, ensure_ascii=False)) |