Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import re | |
| def split_markdown_by_title(markdown_file): | |
| with open(markdown_file, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| re_str = "# cola|# mnli|# mrpc|# qnli|# qqp|# rte|# sst2|# wnli|# mmlu|# squad_v2|# iwslt|# un_multi|# math" | |
| datasets = ["# cola", "# mnli", "# mrpc", "# qnli", "# qqp", "# rte", "# sst2", "# wnli", | |
| "# mmlu", "# squad_v2", "# iwslt", "# un_multi", "# math"] | |
| # re_str = "# cola|# mnli|# mrpc|# qnli|# qqp|# rte|# sst2|# wnli" | |
| # datasets = ["# cola", "# mnli", "# mrpc", "# qnli", "# qqp", "# rte", "# sst2", "# wnli"] | |
| primary_sections = re.split(re_str, content)[1:] | |
| assert len(primary_sections) == len(datasets) | |
| all_sections_dict = {} | |
| for dataset, primary_section in zip(datasets, primary_sections): | |
| re_str = "## " | |
| results = re.split(re_str, primary_section) | |
| keywords = ["10 prompts", "bertattack", "checklist", "deepwordbug", "stresstest", | |
| "textfooler", "textbugger", "translation"] | |
| secondary_sections_dict = {} | |
| for res in results: | |
| for keyword in keywords: | |
| if keyword in res.lower(): | |
| secondary_sections_dict[keyword] = res | |
| break | |
| all_sections_dict[dataset] = secondary_sections_dict | |
| return all_sections_dict | |
| # def prompts_understanding(sections_dict): | |
| # for dataset in sections_dict.keys(): | |
| # # print(dataset) | |
| # for title in sections_dict[dataset].keys(): | |
| # if title == "10 prompts": | |
| # prompts = sections_dict[dataset][title].split("\n") | |
| # num = 0 | |
| # task_prompts_acc = [] | |
| # role_prompts_acc = [] | |
| # for prompt in prompts: | |
| # if "Acc: " not in prompt: | |
| # continue | |
| # else: | |
| # import re | |
| # num += 1 | |
| # match = re.search(r'Acc: (\d+\.\d+)%', prompt) | |
| # if match: | |
| # number = float(match.group(1)) | |
| # if num <= 10: | |
| # task_prompts_acc.append(number) | |
| # else: | |
| # role_prompts_acc.append(number) | |
| # print(task_prompts_acc) | |
| # print(role_prompts_acc) | |
| import os | |
| def list_files(directory): | |
| files = [os.path.join(directory, d) for d in os.listdir(directory) if not os.path.isdir(os.path.join(directory, d))] | |
| return files | |
| def convert_model_name(attack): | |
| attack_name = { | |
| "T5": "t5", | |
| "UL2": "ul2", | |
| "Vicuna": "vicuna", | |
| "ChatGPT": "chatgpt", | |
| } | |
| return attack_name[attack] | |
| def convert_attack_name(attack): | |
| attack_name = { | |
| "BertAttack": "bertattack", | |
| "CheckList": "checklist", | |
| "DeepWordBug": "deepwordbug", | |
| "StressTest": "stresstest", | |
| "TextFooler": "textfooler", | |
| "TextBugger": "textbugger", | |
| "Semantic": "translation", | |
| } | |
| return attack_name[attack] | |
| def convert_dataset_name(dataset): | |
| dataset_name = { | |
| "CoLA": "# cola", | |
| "MNLI": "# mnli", | |
| "MRPC": "# mrpc", | |
| "QNLI": "# qnli", | |
| "QQP": "# qqp", | |
| "RTE": "# rte", | |
| "SST-2": "# sst2", | |
| "WNLI": "# wnli", | |
| "MMLU": "# mmlu", | |
| "SQuAD V2": "# squad_v2", | |
| "IWSLT": "# iwslt", | |
| "UN Multi": "# un_multi", | |
| "Math": "# math", | |
| "Avg": "Avg", | |
| } | |
| return dataset_name[dataset] | |
| def retrieve(model_name, dataset_name, attack_name, prompt_type): | |
| model_name = convert_model_name(model_name) | |
| dataset_name = convert_dataset_name(dataset_name) | |
| attack_name = convert_attack_name(attack_name) | |
| if "zero" in prompt_type: | |
| shot = "zeroshot" | |
| else: | |
| shot = "fewshot" | |
| if "task" in prompt_type: | |
| prompt_type = "task" | |
| else: | |
| prompt_type = "role" | |
| directory_path = "./db" | |
| md_dir = os.path.join(directory_path, model_name + "_" + shot + ".md") | |
| sections_dict = split_markdown_by_title(md_dir) | |
| results = {} | |
| for cur_dataset in sections_dict.keys(): | |
| if cur_dataset == dataset_name: | |
| dataset_dict = sections_dict[cur_dataset] | |
| for cur_attack in dataset_dict.keys(): | |
| if cur_attack == attack_name: | |
| if attack_name == "translation": | |
| prompts_dict = dataset_dict[attack_name].split("\n") | |
| for prompt_summary in prompts_dict: | |
| if "acc: " not in prompt_summary: | |
| continue | |
| prompt = prompt_summary.split("prompt: ")[1] | |
| import re | |
| match_atk = re.search(r'acc: (\d+\.\d+)%', result) | |
| number_atk = float(match_atk.group(1)) | |
| results[prompt] = number_atk | |
| sorted_results = sorted(results.items(), key=lambda item: item[1])[:6] | |
| return sorted_results | |
| elif attack_name in ["bertattack", "checklist", "deepwordbug", "stresstest", "textfooler", "textbugger"]: | |
| prompts_dict = dataset_dict[attack_name].split("\n") | |
| num = 0 | |
| for prompt_summary in prompts_dict: | |
| if "Attacked prompt: " not in prompt_summary: | |
| continue | |
| num += 1 | |
| import re | |
| match_origin = re.search(r'Original acc: (\d+\.\d+)%', prompt_summary) | |
| match_atk = re.search(r'attacked acc: (\d+\.\d+)%', prompt_summary) | |
| if match_origin and match_atk: | |
| number_origin = float(match_origin.group(1)) | |
| number_atk = float(match_atk.group(1)) | |
| # print(model_shot, dataset, title, len(summary[attack][dataset]), num) | |
| # for atk in summary.keys(): | |
| # for dataset in summary[atk].keys(): | |
| # # if atk == "translation": | |
| # print(atk, dataset, len(summary[atk][dataset])) | |
| # # print(summary[atk][dataset][:10]) | |
| output_dict = {} | |
| sorted_atk_name = ["TextBugger", "DeepWordBug", "TextFooler", "BertAttack", "CheckList", "StressTest", "Semantic"] | |
| sorted_dataset_name = ["SST-2", "CoLA", "QQP", "MRPC", "MNLI", "QNLI", "RTE", "WNLI", "MMLU", "SQuAD V2", "IWSLT", "UN Multi", "Math"] | |
| for atk in sorted_atk_name: | |
| output_dict[atk] = {} | |
| for dataset in sorted_dataset_name: | |
| output_dict[atk][dataset] = "" | |
| for sorted_atk in sorted_atk_name: | |
| for attack, dataset_drop_rates in summary.items(): | |
| # attack = convert_attack_name(attack) | |
| if convert_attack_name(attack) == sorted_atk: | |
| for sorted_dataset in sorted_dataset_name: | |
| for dataset, drop_rates in dataset_drop_rates.items(): | |
| if convert_dataset_name(dataset) == sorted_dataset: | |
| if len(drop_rates) > 0: | |
| output_dict[sorted_atk][sorted_dataset] = "{:.2f}".format(sum(drop_rates)/len(drop_rates)) + "\scriptsize{$\pm$" + "{:.2f}".format(np.std(drop_rates)) + "}" | |
| else: | |
| output_dict[sorted_atk][sorted_dataset] = "-" | |
| total_drop_rate = summary[attack]["Avg"] | |
| output_dict[sorted_atk]["Avg"] = "{:.2f}".format(np.mean(total_drop_rate)) + "\scriptsize{$\pm$" + "{:.2f}".format(np.std(total_drop_rate)) + "}" | |