Spaces:

DeepLearning101
/

IE101TW

Sleeping

App Files Files Community

DeepLearning101 commited on Oct 15, 2023

Commit

45311fe

1 Parent(s): 2fcdf98

Upload 21 files

Browse files

Files changed (21) hide show

tools/__init__.py +4 -0
tools/analysis_toolkits/__init__.py +0 -0
tools/computations/softmax.py +8 -0
tools/data_structures/__init__.py +0 -0
tools/data_structures/trie.py +152 -0
tools/model_utils/__init__.py +0 -0
tools/model_utils/calibrate.py +202 -0
tools/model_utils/gpt_response.py +138 -0
tools/model_utils/parameter_freeze.py +126 -0
tools/model_utils/uncertainty.py +137 -0
tools/processing_utils/common.py +38 -0
tools/processing_utils/sampler.py +26 -0
tools/processing_utils/tokenizer/JiebaTokenizer.py +24 -0
tools/processing_utils/tokenizer/__init__.py +4 -0
tools/processing_utils/tokenizer/tokenizer_utils.py +19 -0
tools/runner_utils/__init__.py +0 -0
tools/runner_utils/conifg_extensive.py +15 -0
tools/runner_utils/log_util.py +30 -0
tools/runner_utils/retrying.py +288 -0
tools/runner_utils/set_seed.py +21 -0
tools/runner_utils/timecost.py +20 -0

tools/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2021/12/2 5:41 p.m.
+# @Author  : JianingWang
+# @File    : __init__.py

tools/analysis_toolkits/__init__.py ADDED Viewed

File without changes

tools/computations/softmax.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import torch
+"""
+Transform the torch logits into probabilities.
+"""
+def softmax(logits):
+    probs = torch.softmax(torch.from_numpy(logits).float(), -1).numpy()
+    return probs

tools/data_structures/__init__.py ADDED Viewed

File without changes

tools/data_structures/trie.py ADDED Viewed

	@@ -0,0 +1,152 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2022/2/15 7:57 下午
+# @Author  : JianingWang
+# @File    : trie
+import logging
+from typing import List
+from collections import OrderedDict
+logger = logging.getLogger(__name__)
+class Trie:
+    def __init__(self):
+        self.data = {}
+    def add(self, word: str):
+        """
+        Passes over every char (utf-8 char) on word and recursively adds it to the internal `data` trie representation.
+        The special key `""` is used to represent termination.
+        This function is idempotent, adding twice the same word will leave the trie unchanged
+        Example:
+        ```python
+        >>> trie = Trie()
+        >>> trie.add("Hello 友達")
+        >>> trie.data
+        {"H": {"e": {"l": {"l": {"o": {" ": {"友": {"達": {"": 1}}}}}}}}}
+        >>> trie.add("Hello")
+        >>> trie.data
+        {"H": {"e": {"l": {"l": {"o": {"": 1, " ": {"友": {"達": {"": 1}}}}}}}}}
+        ```
+        """
+        if not word:
+            # Prevent empty string
+            return
+        ref = self.data
+        for char in word:
+            ref[char] = char in ref and ref[char] or {}
+            ref = ref[char]
+        ref[""] = 1
+    def find(self, text: str):
+        states = OrderedDict()
+        offsets = []
+        skip = 0
+        for current, current_char in enumerate(text):
+            if skip and current < skip:
+                continue
+            to_remove = set()
+            reset = False
+            for start, trie_pointer in states.items():
+                if "" in trie_pointer:
+                    for lookstart, looktrie_pointer in states.items():
+                        if lookstart > start:
+                            break
+                        elif lookstart < start:
+                            lookahead_index = current + 1
+                            end = current + 1
+                        else:
+                            lookahead_index = current
+                            end = current
+                        next_char = text[lookahead_index] if lookahead_index < len(text) else None
+                        if "" in looktrie_pointer:
+                            start = lookstart
+                            end = lookahead_index
+                            skip = lookahead_index
+                        while next_char in looktrie_pointer:
+                            looktrie_pointer = looktrie_pointer[next_char]
+                            lookahead_index += 1
+                            if "" in looktrie_pointer:
+                                start = lookstart
+                                end = lookahead_index
+                                skip = lookahead_index
+                            if lookahead_index == len(text):
+                                break
+                            next_char = text[lookahead_index]
+                    offsets.append([start, end])
+                    reset = True
+                    break
+                elif current_char in trie_pointer:
+                    trie_pointer = trie_pointer[current_char]
+                    states[start] = trie_pointer
+                else:
+                    to_remove.add(start)
+            if reset:
+                states = {}
+            else:
+                for start in to_remove:
+                    del states[start]
+            if current >= skip and current_char in self.data:
+                states[current] = self.data[current_char]
+        for start, trie_pointer in states.items():
+            if "" in trie_pointer:
+                end = len(text)
+                offsets.append([start, end])
+                break
+        return offsets
+    def split(self, text: str) -> List[str]:
+        """
+        Example:
+        ```python
+        >>> trie = Trie()
+        >>> trie.split("[CLS] This is a extra_id_100")
+        ["[CLS] This is a extra_id_100"]
+        >>> trie.add("[CLS]")
+        >>> trie.add("extra_id_1")
+        >>> trie.add("extra_id_100")
+        >>> trie.split("[CLS] This is a extra_id_100")
+        ["[CLS]", " This is a ", "extra_id_100"]
+        ```
+        """
+        word_sets = self.find(text)
+        offsets = [0]
+        for w in word_sets:
+            offsets.extend(w)
+        return self.cut_text(text, offsets)
+    def cut_text(self, text, offsets):
+        offsets.append(len(text))
+        tokens = []
+        start = 0
+        for end in offsets:
+            if start > end:
+                logger.error(
+                    "There was a bug in Trie algorithm in tokenization. Attempting to recover. Please report it anyway."
+                )
+                continue
+            elif start == end:
+                continue
+            tokens.append(text[start:end])
+            start = end
+        return tokens
+    def __reduce__(self):
+        return None
+if __name__ == "__main__":
+    trie = Trie()
+    for word in ["A", "AB", "BD", "BWA"]:
+        trie.add(word)
+    print(trie.__reduce__())

tools/model_utils/__init__.py ADDED Viewed

File without changes

tools/model_utils/calibrate.py ADDED Viewed

	@@ -0,0 +1,202 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2023/3/20 8:02 p.m.
+# @Author  : Jianing Wang
+# @File    : calibrate.py
+import os
+import numpy as np
+import torch
+"""
+Use LM to classify label words for calibrating CLS
+"""
+class CLSCalibrator:
+    pass
+"""
+Use Causal LM to generate label words for calibrating CLS
+e.g., use gpt2 to generate a label word with in-context prompts, and calibrate for the prediction.
+Paper: http://proceedings.mlr.press/v139/zhao21c.html
+"""
+class CausalCLSCalibrator:
+    def __init__(self, model, tokenizer) -> None:
+        self.model = model
+        self.tokenizer = tokenizer
+    def calibrate(self, all_label_probs, content_free_examples, label2id, mode="diagonal_W"):
+        """Perform calibration for de-biasing and obtain calibrated probability"""
+        p_cf = self.get_content_free_prediction(content_free_examples, label2id)
+        num_classes = all_label_probs.shape[1]
+        if p_cf is None:
+            # do not calibrate
+            W = np.identity(num_classes)
+            b = np.zeros([num_classes, 1])
+        else:
+            # calibrate
+            if mode == "diagonal_W":
+                W = np.linalg.inv(np.identity(num_classes) * p_cf)
+                b = np.zeros([num_classes, 1])
+            elif mode == "identity_W":
+                W = np.identity(num_classes)
+                b = -1 * np.expand_dims(p_cf, axis=-1)
+            else:
+                assert False
+        all_calibrate_label_probs = list()
+        for label_probs in all_label_probs:
+            label_probs = label_probs / np.sum(label_probs) # normalize to 1
+            calibrate_label_probs = np.matmul(W, np.expand_dims(label_probs, axis=-1)) + b
+            all_calibrate_label_probs.append(calibrate_label_probs.squeeze().tolist())
+        return np.array(all_calibrate_label_probs)
+    def get_content_free_prediction(self, content_free_examples, label2id: dict):
+        """Query model with content free input, return its prediction probability for each label"""
+        all_p_y = []
+        for content_free_example in content_free_examples:
+            content_free_prompt = content_free_example["content_free_prompt"]
+            p_y = [0] * len(label2id)
+            for answers, i in label2id.items():
+                prob = 0
+                for a in answers:
+                    prob += np.exp(self.get_causal_cls_prediction(content_free_prompt + " " + a, 0, echo=True, num_log_probs=1)['choices'][0]['logprobs']['token_logprobs'][-1])
+                p_y[i] = prob
+            all_p_y.append(p_y)
+        p_y = np.mean(np.array(all_p_y), axis=0)
+        p_y = p_y / np.sum(p_y) # normalize
+        return p_y
+    def get_causal_cls_prediction(self, prompt, l=10, num_log_probs=None, echo=False):
+        ''' This function runs GPT-2 locally but places the outputs into an json that looks just like the one
+        provided by the OpenAI API. '''
+        if isinstance(prompt, str):
+            prompt = [prompt] # the code below assumes a list
+        input_ids = self.tokenizer.batch_encode_plus(prompt, return_tensors="pt", padding=True)
+        if l + len(input_ids['input_ids'][0]) > 1020:
+            m = l + len(input_ids['input_ids'][0]) - 1024
+            input_ids['input_ids'] = torch.Tensor([input_ids['input_ids'][0][m:].numpy()]).long()
+            input_ids['attention_mask'] = torch.Tensor([input_ids['attention_mask'][0][m:].numpy()]).long()
+        # greedily generate l tokens
+        # print("l=", l)
+        if l > 0:
+            # the generate function can handle left padded inputs automatically in HF
+            # total_sequences is now the input + possible generated output
+            # print("l + len(input_ids[input_ids][0]=", l + len(input_ids['input_ids'][0]))
+            total_sequences = self.model.generate(
+                input_ids=input_ids['input_ids'].to(self.model.device),
+                attention_mask=input_ids['attention_mask'].to(self.model.device),
+                max_length=l + len(input_ids['input_ids'][0]),
+                do_sample=False
+                )
+        else:
+            assert echo == True and l == 0
+            total_sequences = input_ids['input_ids'].to(self.model.device)
+        # print("="*50)
+        # print("total_sequences=", total_sequences) [batch, len+l]
+        # print("total_sequences.shape=", total_sequences.shape)
+        # they want the probs of the top tokens
+        if num_log_probs is not None:
+            # we are left padding, so we need to adjust the position IDs
+            attention_mask = (total_sequences != 50256).float()
+            position_ids = attention_mask.long().cumsum(-1) - 1
+            position_ids.masked_fill_(attention_mask == 0, 1)
+            # get the logits for the context and the next l tokens
+            logits = self.model.forward(input_ids=total_sequences, attention_mask=attention_mask, position_ids=position_ids, return_dict=True).logits.detach().cpu()
+            if not echo:
+                # get the top tokens and probs for the generated l tokens
+                probs = torch.softmax(logits[:,-l-1:], dim=2).cpu()
+            else:
+                # get the top tokens and probs for the context and the generated l tokens
+                probs = torch.softmax(logits, dim=2).cpu()
+            top_probs, top_tokens = torch.topk(probs, k=num_log_probs)
+            logprobs = torch.log(probs)
+            top_log_probs = torch.log(top_probs)
+            # print("top_log_probs=", top_log_probs)
+            # print("top_log_probs.shape=", top_log_probs.shape) # [1, 2, 100] [batch, 2, api_num_log_prob]
+        # create the return value to resemble OpenAI
+        return_json = {}
+        choices = []
+        # print("="*50)
+        for batch_id in range(len(prompt)):
+            curr_json = {}
+            # text is just the optional context and next l tokens
+            if not echo:
+                curr_json['text'] = self.tokenizer.decode(total_sequences[batch_id][-l:], skip_special_tokens=True)
+            else:
+                curr_json['text'] = self.tokenizer.decode(total_sequences[batch_id], skip_special_tokens=True)
+            # fill the return json with the top tokens and probs to match the OpenAI return value.
+            if num_log_probs is not None:
+                curr_json['logprobs'] = {}
+                curr_json['logprobs']['top_logprobs'] = []
+                curr_json['logprobs']['token_logprobs'] = []
+                curr_json['logprobs']['tokens'] = []
+                if not echo:
+                    # cutoff the -1 here because the probs are shifted one over for LMs
+                    for current_element_top_log_probs, current_element_top_tokens in zip(top_log_probs[batch_id][:-1], top_tokens[batch_id][:-1]):
+                        # tokens is a list of the top token at each position
+                        curr_json['logprobs']['tokens'].append(self.tokenizer.decode([current_element_top_tokens[0]]))
+                        # token_logprobs is a list of the logprob of the top token at each position
+                        curr_json['logprobs']['token_logprobs'].append(current_element_top_log_probs[0].item())
+                        # top_logprobs is a list of dicts for the top K tokens. with each entry being {'token_name': log_prob}
+                        temp = {}
+                        for log_prob, token in zip(current_element_top_log_probs, current_element_top_tokens):
+                            temp[self.tokenizer.decode(token.item())] = log_prob.item()
+                        curr_json['logprobs']['top_logprobs'].append(temp)
+                else:
+                    # same as not above but small tweaks
+                    # we add null to the front because for the GPT models, they have null probability for the first token
+                    # (for some reason they don't have an beginning of sentence token)
+                    curr_json['logprobs']['top_logprobs'].append('null')
+                    # cutoff the -1 here because the probs are shifted one over for LMs
+                    for index, (current_element_top_log_probs, current_element_top_tokens) in enumerate(zip(top_log_probs[batch_id][:-1], top_tokens[batch_id][:-1])):
+                        # skip padding tokens
+                        if total_sequences[batch_id][index].item() == 50256:
+                            continue
+                        temp = {}
+                        for log_prob, token in zip(current_element_top_log_probs, current_element_top_tokens):
+                            temp[self.tokenizer.decode(token.item())] = log_prob.item()
+                        curr_json['logprobs']['top_logprobs'].append(temp)
+                    for index in range(len(probs[batch_id])):
+                        curr_json['logprobs']['tokens'].append(self.tokenizer.decode([total_sequences[batch_id][index]]))
+                    curr_json['logprobs']['token_logprobs'].append('null')
+                    for index, log_probs_token_position_j in enumerate(logprobs[batch_id][:-1]):
+                        # probs are left shifted for LMs
+                        curr_json['logprobs']['token_logprobs'].append(log_probs_token_position_j[total_sequences[batch_id][index+1]])
+            choices.append(curr_json)
+            # print("curr_json=", curr_json)
+            '''
+            e.g.,
+            num_tokens_to_predict=1
+            curr_json= {
+                'text': ' I', # 当前生成的top词
+                'logprobs': {'top_logprobs': [{' I': -3.4267239570617676, '\n': -3.5073862075805664, ...], # top100词及其socre
+                'token_logprobs': [-3.4267239570617676], # 当前top词的score
+                'tokens': [' I']}
+            }
+            num_tokens_to_predict=2
+            curr_json= {
+                'text': '\nThe', # 如果指定生成两个词，则为两个词
+                'logprobs': {'top_logprobs': [ # 两个位置对应的预测的score
+                    {'\n': -3.186706304550171, '\xa0': -3.222092390060425, ' We': -6.781067848205566, ...},
+                    {'The': -2.5251243114471436, '"': -2.857935667037964, ...],
+                'token_logprobs': [-3.186706304550171, -2.5251243114471436], # 生成的词的score
+                'tokens': ['\n', 'The']}
+            }
+            '''
+        return_json['choices'] = choices
+        # print("="*50)
+        # print("return_json=", return_json)
+        return return_json

tools/model_utils/gpt_response.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2023/3/23 1:02 p.m.
+# @Author  : Jianing Wang
+# @File    : gpt_response.py
+import os
+import sys
+import torch
+import openai
+import time
+"""
+Call for GPT-style LLM.
+The output format is the same as OpenAI (e.g., GPT-3.5 text-davinci-003)
+"""
+class GPTResponse:
+    def __init__(self, model_type: str, data_path: str) -> None:
+        assert model_type in ["gpt2", "gpt3"]
+        self.model_type = model_type
+        if self.model_type == "gpt3":
+            with open(os.path.join(data_path, 'openai_key.txt'), 'r') as f:
+                key = f.readline().strip()
+                openai.api_key = key
+    def call_for_gpt3_response(self, prompt, l, model_name, temp=0, num_log_probs=None, echo=False, n=None):
+        """
+        call GPT-3 API until result is provided and then return it
+        """
+        response = None
+        received = False
+        while not received:
+            try:
+                response = openai.Completion.create(engine=model_name, prompt=prompt, max_tokens=l, temperature=temp,
+                                                    logprobs=num_log_probs, echo=echo, stop='\n', n=n)
+                received = True
+            except:
+                error = sys.exc_info()[0]
+                if error == openai.error.InvalidRequestError: # something is wrong: e.g. prompt too long
+                    print(f"InvalidRequestError\nPrompt passed in:\n\n{prompt}\n\n")
+                    assert False
+                print("API error:", error)
+                time.sleep(1)
+        return response
+    def call_for_gpt2_response(self, gpt2_tokenizer, logits, total_sequences, l=10, num_log_probs=None, echo=False, n=None):
+        """
+        Obtain the prediction logits from gpt2 in local, and convert it to the value that can match the response from OpenAI
+        """
+        if not echo:
+            # get the top tokens and probs for the generated l tokens
+            probs = torch.softmax(logits[:,-l-1:], dim=2).cpu()
+        else:
+            # get the top tokens and probs for the context and the generated l tokens
+            probs = torch.softmax(logits, dim=2).cpu()
+        # print("probs=", probs)
+        top_probs, top_tokens = torch.topk(probs, k=num_log_probs)
+        logprobs = torch.log(probs)
+        top_log_probs = torch.log(top_probs)
+        # create the return value to resemble OpenAI
+        return_json = {}
+        choices = []
+        # print("="*50)
+        for batch_id in range(len(logits)):
+            curr_json = {}
+            # text is just the optional context and next l tokens
+            if not echo:
+                curr_json['text'] = gpt2_tokenizer.decode(total_sequences[batch_id][-l:], skip_special_tokens=True)
+            else:
+                curr_json['text'] = gpt2_tokenizer.decode(total_sequences[batch_id], skip_special_tokens=True)
+            # fill the return json with the top tokens and probs to match the OpenAI return value.
+            if num_log_probs is not None:
+                curr_json['logprobs'] = {}
+                curr_json['logprobs']['top_logprobs'] = []
+                curr_json['logprobs']['token_logprobs'] = []
+                curr_json['logprobs']['tokens'] = []
+                if not echo:
+                    # cutoff the -1 here because the probs are shifted one over for LMs
+                    for current_element_top_log_probs, current_element_top_tokens in zip(top_log_probs[batch_id][:-1], top_tokens[batch_id][:-1]):
+                        # tokens is a list of the top token at each position
+                        curr_json['logprobs']['tokens'].append(gpt2_tokenizer.decode([current_element_top_tokens[0]]))
+                        # token_logprobs is a list of the logprob of the top token at each position
+                        curr_json['logprobs']['token_logprobs'].append(current_element_top_log_probs[0].item())
+                        # top_logprobs is a list of dicts for the top K tokens. with each entry being {'token_name': log_prob}
+                        temp = {}
+                        for log_prob, token in zip(current_element_top_log_probs, current_element_top_tokens):
+                            temp[gpt2_tokenizer.decode(token.item())] = log_prob.item()
+                        curr_json['logprobs']['top_logprobs'].append(temp)
+                else:
+                    # same as not above but small tweaks
+                    # we add null to the front because for the GPT models, they have null probability for the first token
+                    # (for some reason they don't have an beginning of sentence token)
+                    curr_json['logprobs']['top_logprobs'].append('null')
+                    # cutoff the -1 here because the probs are shifted one over for LMs
+                    for index, (current_element_top_log_probs, current_element_top_tokens) in enumerate(zip(top_log_probs[batch_id][:-1], top_tokens[batch_id][:-1])):
+                        # skip padding tokens
+                        if total_sequences[batch_id][index].item() == 50256:
+                            continue
+                        temp = {}
+                        for log_prob, token in zip(current_element_top_log_probs, current_element_top_tokens):
+                            temp[gpt2_tokenizer.decode(token.item())] = log_prob.item()
+                        curr_json['logprobs']['top_logprobs'].append(temp)
+                    for index in range(len(probs[batch_id])):
+                        curr_json['logprobs']['tokens'].append(gpt2_tokenizer.decode([total_sequences[batch_id][index]]))
+                    curr_json['logprobs']['token_logprobs'].append('null')
+                    for index, log_probs_token_position_j in enumerate(logprobs[batch_id][:-1]):
+                        # probs are left shifted for LMs
+                        curr_json['logprobs']['token_logprobs'].append(log_probs_token_position_j[total_sequences[batch_id][index+1]])
+            choices.append(curr_json)
+            # print("curr_json=", curr_json)
+            '''
+            e.g.,
+            num_tokens_to_predict=1
+            curr_json= {
+                'text': ' I', # 当前生成的top词
+                'logprobs': {'top_logprobs': [{' I': -3.4267239570617676, '\n': -3.5073862075805664, ...], # top100词及其socre
+                'token_logprobs': [-3.4267239570617676], # 当前top词的score
+                'tokens': [' I']}
+            }
+            num_tokens_to_predict=2
+            curr_json= {
+                'text': '\nThe', # 如果指定生成两个词，则为两个词
+                'logprobs': {'top_logprobs': [ # 两个位置对应的预测的score
+                    {'\n': -3.186706304550171, '\xa0': -3.222092390060425, ' We': -6.781067848205566, ...},
+                    {'The': -2.5251243114471436, '"': -2.857935667037964, ...],
+                'token_logprobs': [-3.186706304550171, -2.5251243114471436], # 生成的词的score
+                'tokens': ['\n', 'The']}
+            }
+            '''
+        return_json['choices'] = choices
+        # print("="*50)
+        # print("return_json=", return_json)
+        return return_json

tools/model_utils/parameter_freeze.py ADDED Viewed

	@@ -0,0 +1,126 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2023/02/18 02:07 p.m.
+# @Author  : JianingWang
+# @File    : parameter_freeze.py
+import torch
+"""
+This is use for parameter fixing and unfreezing, which can be viewed as parameter-efficient settings.
+"""
+class ParameterFreeze():
+    # freeze all parameters
+    def freeze_lm(self, model: torch.nn.Module):
+        for name, param in model.named_parameters():
+            param.requires_grad = False
+        return model
+    # freeze all parameters without cls / mlm head
+    def freeze_lm_encoder(self, model: torch.nn.Module):
+        for name, param in model.named_parameters():
+            if "lm_head" in name or ("cls" in name):
+                print(name)
+                continue
+            param.requires_grad = False
+        return model
+    # freeze all parameters without bias
+    def freeze_lm_finetune_bias(self, model: torch.nn.Module):
+        for name, param in model.named_parameters():
+            if "bias" in name:
+                print(name)
+                continue
+            param.requires_grad = False
+        return model
+    # freeze the component that user defined
+    def freeze_lm_component(self, model: torch.nn.Module, component: str):
+        if "attention" in component:
+            for name, param in model.named_parameters():
+                if "attention" in name:
+                    if "output" in component:
+                        if "output" in name:
+                            continue
+                    else:
+                        continue
+                param.requires_grad = False
+            model = self.unfreeze_classification_head(model)
+        elif "feedforward" in component:
+            for name, param in model.named_parameters():
+                if "dense" in name and "attention" not in name:
+                    if "output" in component:
+                        if "output" in name:
+                            continue
+                    else:
+                        if "intermediate" in component:
+                            if "intermediate" in name:
+                                continue
+                param.requires_grad = False
+            model = self.unfreeze_classification_head(model)
+        elif component == "adapter":
+            for name, param in model.named_parameters():
+                if "adapter" in name:
+                    continue
+                param.requires_grad = False
+            model = self.unfreeze_classification_head(model)
+        elif "embedding" in component:
+            for name, param in model.named_parameters():
+                if "embedding" in name:
+                    continue
+                param.requires_grad = False
+            model = self.unfreeze_classification_head(model)
+        elif "bias" in component:
+            for name, param in model.named_parameters():
+                if "bias" in name:
+                    continue
+                param.requires_grad = False
+            model = self.unfreeze_classification_head(model)
+        elif "head" in component:
+            for name, param in model.named_parameters():
+                param.requires_grad = False
+            model = self.unfreeze_classification_head(model)
+        elif "prompt_emb" in component:
+            for name, param in model.named_parameters():
+                if "prompt_emb" in name:
+                    continue
+                param.requires_grad = False
+        return model
+    # unfreeze cls head
+    def unfreeze_classification_head(self, model: torch.nn.Module):
+        for name, param in model.named_parameters():
+            if "lm_head" in name or ("cls" in name) or ("classifier" in name):
+                param.requires_grad = True
+        return model
+    # freeze k layers
+    def freeze_lm_k_layers(self, model: torch.nn.Module, k):
+        keep_layers = []
+        update_parameters = []
+        for i in range(k):
+            keep_layers.append("layer."+str(23-i))
+        for name, param in model.named_parameters():
+            update = False
+            for layer_num in keep_layers:
+                if layer_num in name:
+                    if "dense" in name and "attention" not in name:
+                        if "output" in name:
+                            print(name)
+                            update_parameters.append(name)
+                            update = True
+            if not update:
+                param.requires_grad = False
+        model = self.unfreeze_classification_head(model)
+        return model
+    def unfreeze_lm(self, model: torch.nn.Module):
+        for param in model.parameters():
+            param.requires_grad = True
+        return model

tools/model_utils/uncertainty.py ADDED Viewed

	@@ -0,0 +1,137 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2023/04/18 08:11 p.m.
+# @Author  : JianingWang
+# @File    : uncertainty.py
+from sklearn.utils import shuffle
+import logging
+import numpy as np
+import os
+import random
+logger = logging.getLogger(__name__)
+def get_BALD_acquisition(y_T):
+	expected_entropy = - np.mean(np.sum(y_T * np.log(y_T + 1e-10), axis=-1), axis=0)
+	expected_p = np.mean(y_T, axis=0)
+	entropy_expected_p = - np.sum(expected_p * np.log(expected_p + 1e-10), axis=-1)
+	return (entropy_expected_p - expected_entropy)
+def sample_by_bald_difficulty(tokenizer, X, y_mean, y_var, y, num_samples, num_classes, y_T):
+	logger.info ("Sampling by difficulty BALD acquisition function")
+	BALD_acq = get_BALD_acquisition(y_T)
+	p_norm = np.maximum(np.zeros(len(BALD_acq)), BALD_acq)
+	p_norm = p_norm / np.sum(p_norm)
+	indices = np.random.choice(len(X['input_ids']), num_samples, p=p_norm, replace=False)
+	X_s = {"input_ids": X["input_ids"][indices], "token_type_ids": X["token_type_ids"][indices], "attention_mask": X["attention_mask"][indices]}
+	y_s = y[indices]
+	w_s = y_var[indices][:,0]
+	return X_s, y_s, w_s
+def sample_by_bald_easiness(tokenizer, X, y_mean, y_var, y, num_samples, num_classes, y_T):
+	logger.info ("Sampling by easy BALD acquisition function")
+	BALD_acq = get_BALD_acquisition(y_T)
+	p_norm = np.maximum(np.zeros(len(BALD_acq)), (1. - BALD_acq)/np.sum(1. - BALD_acq))
+	p_norm = p_norm / np.sum(p_norm)
+	logger.info (p_norm[:10])
+	indices = np.random.choice(len(X['input_ids']), num_samples, p=p_norm, replace=False)
+	X_s = {"input_ids": X["input_ids"][indices], "token_type_ids": X["token_type_ids"][indices], "attention_mask": X["attention_mask"][indices]}
+	y_s = y[indices]
+	w_s = y_var[indices][:,0]
+	return X_s, y_s, w_s
+def sample_by_bald_class_easiness(tokenizer, X, y_mean, y_var, y, num_samples, num_classes, y_T):
+	logger.info ("Sampling by easy BALD acquisition function per class")
+	BALD_acq = get_BALD_acquisition(y_T)
+	BALD_acq = (1. - BALD_acq)/np.sum(1. - BALD_acq)
+	logger.info (BALD_acq)
+	samples_per_class = num_samples // num_classes
+	X_s_input_ids, X_s_token_type_ids, X_s_attention_mask, X_s_mask_pos, y_s, w_s = [], [], [], [], [], []
+	for label in range(num_classes):
+		# X_input_ids, X_token_type_ids, X_attention_mask = np.array(X['input_ids'])[y == label], np.array(X['token_type_ids'])[y == label], np.array(X['attention_mask'])[y == label]
+		X_input_ids, X_attention_mask = np.array(X['input_ids'])[y == label], np.array(X['attention_mask'])[y == label]
+		if "token_type_ids" in X.features:
+			X_token_type_ids = np.array(X['token_type_ids'])[y == label]
+		if "mask_pos" in X.features:
+			X_mask_pos = np.array(X['mask_pos'])[y == label]
+		y_ = y[y==label]
+		y_var_ = y_var[y == label]
+		# p = y_mean[y == label]
+		p_norm = BALD_acq[y==label]
+		p_norm = np.maximum(np.zeros(len(p_norm)), p_norm)
+		p_norm = p_norm/np.sum(p_norm)
+		if len(X_input_ids) < samples_per_class:
+			logger.info ("Sampling with replacement.")
+			replace = True
+		else:
+			replace = False
+		if len(X_input_ids) == 0: # add by wjn
+			continue
+		indices = np.random.choice(len(X_input_ids), samples_per_class, p=p_norm, replace=replace)
+		X_s_input_ids.extend(X_input_ids[indices])
+		# X_s_token_type_ids.extend(X_token_type_ids[indices])
+		X_s_attention_mask.extend(X_attention_mask[indices])
+		if "token_type_ids" in X.features:
+			X_s_token_type_ids.extend(X_token_type_ids[indices])
+		if "mask_pos" in X.features:
+			X_s_mask_pos.extend(X_mask_pos[indices])
+		y_s.extend(y_[indices])
+		w_s.extend(y_var_[indices][:,0])
+	# X_s_input_ids, X_s_token_type_ids, X_s_attention_mask, y_s, w_s = shuffle(X_s_input_ids, X_s_token_type_ids, X_s_attention_mask, y_s, w_s)
+	if "token_type_ids" in X.features and "mask_pos" not in X.features:
+		X_s_input_ids, X_s_token_type_ids, X_s_attention_mask, y_s, w_s = shuffle(X_s_input_ids, X_s_token_type_ids, X_s_attention_mask, y_s, w_s)
+	elif "token_type_ids" not in X.features and "mask_pos" in X.features:
+		X_s_input_ids, X_s_mask_pos, X_s_attention_mask, y_s, w_s = shuffle(X_s_input_ids, X_s_mask_pos, X_s_attention_mask, y_s, w_s)
+	elif "token_type_ids" in X.features and "mask_pos" in X.features:
+		X_s_input_ids, X_s_token_type_ids, X_s_mask_pos, X_s_attention_mask, y_s, w_s = shuffle(X_s_input_ids, X_s_token_type_ids, X_s_mask_pos, X_s_attention_mask, y_s, w_s)
+	else:
+		X_s_input_ids, X_s_attention_mask, y_s, w_s = shuffle(X_s_input_ids, X_s_attention_mask, y_s, w_s)
+	pseudo_labeled_input = {
+		'input_ids': np.array(X_s_input_ids),
+		'attention_mask': np.array(X_s_attention_mask)
+	}
+	if "token_type_ids" in X.features:
+		pseudo_labeled_input['token_type_ids'] = np.array(X_s_token_type_ids)
+	if "mask_pos" in X.features:
+		pseudo_labeled_input['mask_pos'] = np.array(X_s_mask_pos)
+	return pseudo_labeled_input, np.array(y_s), np.array(w_s)
+def sample_by_bald_class_difficulty(tokenizer, X, y_mean, y_var, y, num_samples, num_classes, y_T):
+	logger.info ("Sampling by difficulty BALD acquisition function per class")
+	BALD_acq = get_BALD_acquisition(y_T)
+	samples_per_class = num_samples // num_classes
+	X_s_input_ids, X_s_token_type_ids, X_s_attention_mask, y_s, w_s = [], [], [], [], []
+	for label in range(num_classes):
+		X_input_ids, X_token_type_ids, X_attention_mask = X['input_ids'][y == label], X['token_type_ids'][y == label], X['attention_mask'][y == label]
+		y_ = y[y==label]
+		y_var_ = y_var[y == label]
+		p_norm = BALD_acq[y==label]
+		p_norm = np.maximum(np.zeros(len(p_norm)), p_norm)
+		p_norm = p_norm/np.sum(p_norm)
+		if len(X_input_ids) < samples_per_class:
+			replace = True
+			logger.info ("Sampling with replacement.")
+		else:
+			replace = False
+		indices = np.random.choice(len(X_input_ids), samples_per_class, p=p_norm, replace=replace)
+		X_s_input_ids.extend(X_input_ids[indices])
+		X_s_token_type_ids.extend(X_token_type_ids[indices])
+		X_s_attention_mask.extend(X_attention_mask[indices])
+		y_s.extend(y_[indices])
+		w_s.extend(y_var_[indices][:,0])
+	X_s_input_ids, X_s_token_type_ids, X_s_attention_mask, y_s, w_s = shuffle(X_s_input_ids, X_s_token_type_ids, X_s_attention_mask, y_s, w_s)
+	return {'input_ids': np.array(X_s_input_ids), 'token_type_ids': np.array(X_s_token_type_ids), 'attention_mask': np.array(X_s_attention_mask)}, np.array(y_s), np.array(w_s)

tools/processing_utils/common.py ADDED Viewed

	@@ -0,0 +1,38 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2021/12/2 5:41 p.m.
+# @Author  : JianingWang
+# @File    : common.py
+def is_chinese_char(cp):
+    """Checks whether CP is the codepoint of a CJK character."""
+    # This defines a "chinese character" as anything in the CJK Unicode block:
+    #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+    #
+    # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
+    # despite its name. The modern Korean Hangul alphabet is a different block,
+    # as is Japanese Hiragana and Katakana. Those alphabets are used to write
+    # space-separated words, so they are not treated specially and handled
+    # like the all of the other languages.
+    if (
+            (0x4E00 <= cp <= 0x9FFF)
+            or (0x3400 <= cp <= 0x4DBF)  #
+            or (0x20000 <= cp <= 0x2A6DF)  #
+            or (0x2A700 <= cp <= 0x2B73F)  #
+            or (0x2B740 <= cp <= 0x2B81F)  #
+            or (0x2B820 <= cp <= 0x2CEAF)  #
+            or (0xF900 <= cp <= 0xFAFF)
+            or (0x2F800 <= cp <= 0x2FA1F)  #
+    ):  #
+        return True
+    return False
+def is_chinese(word: str):
+    # word like "180" or "身高" or "神"
+    for char in word:
+        char = ord(char)
+        if not is_chinese_char(char):
+            return 0
+    return 1

tools/processing_utils/sampler.py ADDED Viewed

	@@ -0,0 +1,26 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2021/12/2 5:41 p.m.
+# @Author  : JianingWang
+# @File    : sampler.py
+import numpy as np
+from typing import Optional
+"""
+random sampling for each label
+"""
+def random_sampling(raw_datasets, num_examples_per_label: Optional[int]=16):
+    label_list = raw_datasets["label"] # [0, 1, 0, 0, ...]
+    label_dict = dict()
+    # denote index of each label
+    for ei, label in enumerate(label_list):
+        if label not in label_dict.keys():
+            label_dict[label] = list()
+        label_dict[label].append(ei)
+    # random sample k examples of each class
+    few_example_ids = list()
+    for label, eid_list in label_dict.items():
+        idxs = np.random.choice(len(eid_list), size=num_examples_per_label, replace=False)
+        selected_eids = [eid_list[i] for i in idxs]
+        few_example_ids.extend(selected_eids)
+    return few_example_ids

tools/processing_utils/tokenizer/JiebaTokenizer.py ADDED Viewed

	@@ -0,0 +1,24 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2021/12/8 12:07 a.m.
+# @Author  : JianingWang
+# @File    : JiebaTokenizer
+import jieba
+from transformers import BertTokenizer
+class JiebaTokenizer(BertTokenizer):
+    def __init__(
+            self, pre_tokenizer=lambda x: jieba.cut(x, HMM=False), *args, **kwargs
+    ):
+        super().__init__(*args, **kwargs)
+        self.pre_tokenizer = pre_tokenizer
+    def _tokenize(self, text, *arg, **kwargs):
+        split_tokens = []
+        for text in self.pre_tokenizer(text):
+            if text in self.vocab:
+                split_tokens.append(text)
+            else:
+                split_tokens.extend(super()._tokenize(text))
+        return split_tokens

tools/processing_utils/tokenizer/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2021/12/8 12:07 上午
+# @Author  : JianingWang
+# @File    : __init__.py

tools/processing_utils/tokenizer/tokenizer_utils.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from transformers import AutoTokenizer
+"""
+obtain special tokens
+"""
+def get_special_token_mapping(tokenizer: AutoTokenizer):
+    if "t5" in type(tokenizer).__name__.lower():
+        special_token_mapping = {
+            "cls": 3, "mask": 32099, "sep": tokenizer.eos_token_id,
+            "sep+": tokenizer.eos_token_id,
+            "pseudo_token": tokenizer.unk_token_id
+        }
+    else:
+        special_token_mapping = {
+            "cls": tokenizer.cls_token_id, "mask": tokenizer.mask_token_id, "sep": tokenizer.sep_token_id,
+            "sep+": tokenizer.sep_token_id,
+            "pseudo_token": tokenizer.unk_token_id
+        }
+    return special_token_mapping

tools/runner_utils/__init__.py ADDED Viewed

File without changes

tools/runner_utils/conifg_extensive.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from transformers import AutoConfig
+from config import ModelArguments
+# add external config.
+def config_extensive(hf_config: AutoConfig, model_config: ModelArguments):
+    hf_config.use_prompt_for_cls = model_config.use_prompt_for_cls
+    hf_config.use_freezing = model_config.use_freezing
+    hf_config.adapter_choice = model_config.adapter_choice
+    hf_config.adapter_dim = model_config.adapter_dim
+    hf_config.pre_seq_len = model_config.pre_seq_len
+    hf_config.prefix_projection = model_config.prefix_projection
+    hf_config.prefix_hidden_size = model_config.prefix_hidden_size
+    hf_config.hidden_dropout_prob = model_config.hidden_dropout_prob
+    return hf_config

tools/runner_utils/log_util.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import sys
+import logging
+import datasets
+import transformers
+def init_logger(log_file, log_level, dist_rank):
+    datasets.utils.logging.set_verbosity(log_level)
+    transformers.utils.logging.set_verbosity(log_level)
+    transformers.utils.logging.enable_default_handler()
+    transformers.utils.logging.enable_explicit_format()
+    datasets.utils.logging.disable_propagation()
+    # transformers.utils.logging.enable_propagation()
+    logger = logging.getLogger("")
+    log_format = logging.Formatter(fmt="[%(levelname)s|%(filename)s:%(lineno)s] %(asctime)s >> %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
+    logger.setLevel(log_level)
+    console_handler = logging.StreamHandler(sys.stderr)
+    console_handler.setFormatter(log_format)
+    logger.addHandler(console_handler)
+    # transformer_logger = logging.getLogger("transformers")
+    # transformer_logger.handlers = []
+    # transformer_logger.propagate = True
+    if dist_rank in [-1, 0]:
+        file_handler = logging.FileHandler(log_file, mode="a")
+        file_handler.setLevel(log_level)
+        file_handler.setFormatter(log_format)
+        logger.addHandler(file_handler)
+        logging.getLogger("transformers").addHandler(file_handler)

tools/runner_utils/retrying.py ADDED Viewed

	@@ -0,0 +1,288 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2021/12/24 4:05 p.m.
+# @Author  : JianingWang
+# @File    : retrying.py
+import random
+import six
+import sys
+import time
+import traceback
+MAX_WAIT = 1073741823
+def _retry_if_exception_of_type(retryable_types):
+    def _retry_if_exception_these_types(exception):
+        return isinstance(exception, retryable_types)
+    return _retry_if_exception_these_types
+def retry(*dargs, **dkw):
+    """
+    Decorator function that instantiates the Retrying object
+    @param *dargs: positional arguments passed to Retrying object
+    @param **dkw: keyword arguments passed to the Retrying object
+    """
+    # support both @retry and @retry() as valid syntax
+    if len(dargs) == 1 and callable(dargs[0]):
+        def wrap_simple(f):
+            @six.wraps(f)
+            def wrapped_f(*args, **kw):
+                return Retrying().call(f, *args, **kw)
+            return wrapped_f
+        return wrap_simple(dargs[0])
+    else:
+        def wrap(f):
+            @six.wraps(f)
+            def wrapped_f(*args, **kw):
+                return Retrying(*dargs, **dkw).call(f, *args, **kw)
+            return wrapped_f
+        return wrap
+class Retrying(object):
+    def __init__(self,
+                 stop=None, wait=None,
+                 stop_max_attempt_number=None,
+                 stop_max_delay=None,
+                 wait_fixed=None,
+                 wait_random_min=None, wait_random_max=None,
+                 wait_incrementing_start=None, wait_incrementing_increment=None,
+                 wait_incrementing_max=None,
+                 wait_exponential_multiplier=None, wait_exponential_max=None,
+                 retry_on_exception=None,
+                 retry_on_result=None,
+                 wrap_exception=False,
+                 stop_func=None,
+                 wait_func=None,
+                 wait_jitter_max=None,
+                 before_attempts=None,
+                 after_attempts=None,
+                 skip_raise=False):
+        self._stop_max_attempt_number = 5 if stop_max_attempt_number is None else stop_max_attempt_number
+        self._stop_max_delay = 100 if stop_max_delay is None else stop_max_delay
+        self._wait_fixed = 1000 if wait_fixed is None else wait_fixed
+        self._wait_random_min = 0 if wait_random_min is None else wait_random_min
+        self._wait_random_max = 1000 if wait_random_max is None else wait_random_max
+        self._wait_incrementing_start = 0 if wait_incrementing_start is None else wait_incrementing_start
+        self._wait_incrementing_increment = 100 if wait_incrementing_increment is None else wait_incrementing_increment
+        self._wait_exponential_multiplier = 1 if wait_exponential_multiplier is None else wait_exponential_multiplier
+        self._wait_exponential_max = MAX_WAIT if wait_exponential_max is None else wait_exponential_max
+        self._wait_incrementing_max = MAX_WAIT if wait_incrementing_max is None else wait_incrementing_max
+        self._wait_jitter_max = 0 if wait_jitter_max is None else wait_jitter_max
+        self._before_attempts = before_attempts
+        self._after_attempts = after_attempts
+        self._skip_raise = skip_raise
+        # stop behavior
+        stop_funcs = []
+        if stop_max_attempt_number is not None:
+            stop_funcs.append(self.stop_after_attempt)
+        if stop_max_delay is not None:
+            stop_funcs.append(self.stop_after_delay)
+        if stop_func is not None:
+            self.stop = stop_func
+        elif stop is None:
+            self.stop = lambda attempts, delay: any(f(attempts, delay) for f in stop_funcs)
+        else:
+            self.stop = getattr(self, stop)
+        # wait behavior
+        wait_funcs = [lambda *args, **kwargs: 0]
+        if wait_fixed is not None:
+            wait_funcs.append(self.fixed_sleep)
+        if wait_random_min is not None or wait_random_max is not None:
+            wait_funcs.append(self.random_sleep)
+        if wait_incrementing_start is not None or wait_incrementing_increment is not None:
+            wait_funcs.append(self.incrementing_sleep)
+        if wait_exponential_multiplier is not None or wait_exponential_max is not None:
+            wait_funcs.append(self.exponential_sleep)
+        if wait_func is not None:
+            self.wait = wait_func
+        elif wait is None:
+            self.wait = lambda attempts, delay: max(f(attempts, delay) for f in wait_funcs)
+        else:
+            self.wait = getattr(self, wait)
+        # retry on exception filter
+        if retry_on_exception is None:
+            self._retry_on_exception = self.always_reject
+        else:
+            # this allows for providing a tuple of exception types that
+            # should be allowed to retry on, and avoids having to create
+            # a callback that does the same thing
+            if isinstance(retry_on_exception, (tuple)):
+                retry_on_exception = _retry_if_exception_of_type(
+                    retry_on_exception)
+            self._retry_on_exception = retry_on_exception
+        # retry on result filter
+        if retry_on_result is None:
+            self._retry_on_result = self.never_reject
+        else:
+            self._retry_on_result = retry_on_result
+        self._wrap_exception = wrap_exception
+    def stop_after_attempt(self, previous_attempt_number, delay_since_first_attempt_ms):
+        """Stop after the previous attempt >= stop_max_attempt_number."""
+        return previous_attempt_number >= self._stop_max_attempt_number
+    def stop_after_delay(self, previous_attempt_number, delay_since_first_attempt_ms):
+        """Stop after the time from the first attempt >= stop_max_delay."""
+        return delay_since_first_attempt_ms >= self._stop_max_delay
+    @staticmethod
+    def no_sleep(previous_attempt_number, delay_since_first_attempt_ms):
+        """Don"t sleep at all before retrying."""
+        return 0
+    def fixed_sleep(self, previous_attempt_number, delay_since_first_attempt_ms):
+        """Sleep a fixed amount of time between each retry."""
+        return self._wait_fixed
+    def random_sleep(self, previous_attempt_number, delay_since_first_attempt_ms):
+        """Sleep a random amount of time between wait_random_min and wait_random_max"""
+        return random.randint(self._wait_random_min, self._wait_random_max)
+    def incrementing_sleep(self, previous_attempt_number, delay_since_first_attempt_ms):
+        """
+        Sleep an incremental amount of time after each attempt, starting at
+        wait_incrementing_start and incrementing by wait_incrementing_increment
+        """
+        result = self._wait_incrementing_start + (self._wait_incrementing_increment * (previous_attempt_number - 1))
+        if result > self._wait_incrementing_max:
+            result = self._wait_incrementing_max
+        if result < 0:
+            result = 0
+        return result
+    def exponential_sleep(self, previous_attempt_number, delay_since_first_attempt_ms):
+        exp = 2 ** previous_attempt_number
+        result = self._wait_exponential_multiplier * exp
+        if result > self._wait_exponential_max:
+            result = self._wait_exponential_max
+        if result < 0:
+            result = 0
+        return result
+    @staticmethod
+    def never_reject(result):
+        return False
+    @staticmethod
+    def always_reject(result):
+        return True
+    def should_reject(self, attempt):
+        reject = False
+        if attempt.has_exception:
+            reject |= self._retry_on_exception(attempt.value[1])
+        else:
+            reject |= self._retry_on_result(attempt.value)
+        return reject
+    def call(self, fn, *args, **kwargs):
+        start_time = int(round(time.time() * 1000))
+        attempt_number = 1
+        while True:
+            if self._before_attempts:
+                self._before_attempts(attempt_number)
+            try:
+                attempt = Attempt(fn(*args, **kwargs), attempt_number, False)
+            except:
+                tb = sys.exc_info()
+                attempt = Attempt(tb, attempt_number, True)
+            if not self.should_reject(attempt):
+                return attempt.get(self._wrap_exception)
+            if self._after_attempts:
+                self._after_attempts(attempt_number)
+            delay_since_first_attempt_ms = int(round(time.time() * 1000)) - start_time
+            if self.stop(attempt_number, delay_since_first_attempt_ms):
+                if not self._wrap_exception and attempt.has_exception:
+                    # get() on an attempt with an exception should cause it to be raised, but raise just in case
+                    if not self._skip_raise:
+                        raise attempt.get()
+                    else:
+                        break
+                else:
+                    raise RetryError(attempt)
+            else:
+                sleep = self.wait(attempt_number, delay_since_first_attempt_ms)
+                if self._wait_jitter_max:
+                    jitter = random.random() * self._wait_jitter_max
+                    sleep = sleep + max(0, jitter)
+                time.sleep(sleep / 1000.0)
+            attempt_number += 1
+class Attempt(object):
+    """
+    An Attempt encapsulates a call to a target function that may end as a
+    normal return value from the function or an Exception depending on what
+    occurred during the execution.
+    """
+    def __init__(self, value, attempt_number, has_exception):
+        self.value = value
+        self.attempt_number = attempt_number
+        self.has_exception = has_exception
+    def get(self, wrap_exception=False):
+        """
+        Return the return value of this Attempt instance or raise an Exception.
+        If wrap_exception is true, this Attempt is wrapped inside of a
+        RetryError before being raised.
+        """
+        if self.has_exception:
+            if wrap_exception:
+                raise RetryError(self)
+            else:
+                six.reraise(self.value[0], self.value[1], self.value[2])
+        else:
+            return self.value
+    def __repr__(self):
+        if self.has_exception:
+            return "Attempts: {0}, Error:\n{1}".format(self.attempt_number, "".join(traceback.format_tb(self.value[2])))
+        else:
+            return "Attempts: {0}, Value: {1}".format(self.attempt_number, self.value)
+class RetryError(Exception):
+    """
+    A RetryError encapsulates the last Attempt instance right before giving up.
+    """
+    def __init__(self, last_attempt):
+        self.last_attempt = last_attempt
+    def __str__(self):
+        return "RetryError[{0}]".format(self.last_attempt)

tools/runner_utils/set_seed.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import torch
+import random
+import numpy as np
+from transformers.utils import (
+    is_tf_available,
+    is_torch_available,
+)
+def set_seed(seed_value: int):
+    """
+    Helper function for reproducible behavior to set the seed in `random`, `numpy`, `torch` and/or `tf` (if installed).
+    Args:
+        seed (`int`): The seed to set.
+    """
+    random.seed(seed_value)
+    np.random.seed(seed_value)
+    if is_torch_available():
+        torch.manual_seed(seed_value)
+        torch.cuda.manual_seed_all(seed_value)

tools/runner_utils/timecost.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# -*- coding: utf-8 -*-
+# @Time    : 2022/3/11 3:06 p.m.
+# @Author  : JianingWang
+# @File    : time
+import time
+import logging
+logger = logging.getLogger(__name__)
+def timecost(method):
+    def timed(*args, **kw):
+        ts = time.time()
+        result = method(*args, **kw)
+        te = time.time()
+        logger.info("%r  %2.2f ms" % (method.__name__, (te - ts) * 1000))
+        return result
+    return timed