| import os | |
| import json | |
| import collections | |
| def read_qs(): | |
| qs, qas = [], [] | |
| directory = "./questions" | |
| filenames = [ | |
| 'math_question.txt', | |
| 'qa_question.txt', | |
| 'summarization_question.txt', | |
| ] | |
| for filename in filenames: | |
| with open(f"{directory}/{filename}", "r", encoding="utf-8") as f: | |
| for idx,line in enumerate(f): | |
| qs.append(line.replace("ηζζθ¦","ηζδΈζζθ¦")) | |
| print(f"read {len(qs)} queries from files") | |
| return qs | |
| def read_qas(): | |
| qas = [] | |
| directory = "./questions" | |
| for filename in os.listdir(directory): | |
| if filename.endswith(".json") and "qas" in filename: | |
| with open(f"{directory}/{filename}", "r", encoding="utf-8") as f: | |
| for qa in json.loads(f.read()): | |
| qas.append(qa) | |
| print(f"read {len(qas)} query-answers from files") | |
| return qas | |
| def merge(qs, qas): | |
| q_to_as = collections.defaultdict(lambda:[]) | |
| for qa in qas: | |
| q_to_as[qa["q"]].append(qa["a"]) | |
| qas = [] | |
| for q in qs: | |
| if len(q_to_as[q])==0: | |
| continue | |
| a = q_to_as[q].pop() | |
| qas.append({"q":q, "a":a}) | |
| print(f"merge {len(qas)} query-answers from files") | |
| return qas | |
| if __name__ == "__main__": | |
| qs = read_qs() | |
| qas = read_qas() | |
| qas = merge(qs, qas) | |
| with open("./questions/qas.json", "w", encoding="utf-8") as f: | |
| f.write(json.dumps(qas, ensure_ascii=False, indent=2)) |