Spaces:

mebubo
/

gpted

Sleeping

mebubo commited on Mar 4

Commit

0235f77

1 Parent(s): 8f6137e

Use the flagging threshold to filter out uninteresting tokens

Files changed (2) hide show

completions.py CHANGED Viewed

@@ -92,7 +92,7 @@ def check_text(input_text: str, model: PreTrainedModel, tokenizer: Tokenizer, de
     contexts = [word.context for _, word in low_prob_words]
-    expander = LLMBatchExpander(model, tokenizer)
     #%%
     series = []

     contexts = [word.context for _, word in low_prob_words]
+    expander = LLMBatchExpander(model, tokenizer, threshold=log_prob_threshold)
     #%%
     series = []

expand_llm.py CHANGED Viewed

@@ -6,7 +6,7 @@ import time
 type Tokenizer = PreTrainedTokenizer | PreTrainedTokenizerFast
-def find_next_tokens(model: PreTrainedModel, inputs: BatchEncoding, tokenizer: Tokenizer) -> list[list[tuple[int, float]]]:
     input_ids = inputs["input_ids"]
     attention_mask = inputs["attention_mask"]
     print("Running inference")
@@ -21,7 +21,6 @@ def find_next_tokens(model: PreTrainedModel, inputs: BatchEncoding, tokenizer: T
     start_time = time.time()
     result = []
     print(f"Resulting tensor: {log_probs.shape}")
-    threshold = -10.0
     for probs in log_probs:
         # Filter out low probability tokens for efficiency
         above_threshold = torch.where(probs > threshold)
@@ -39,10 +38,11 @@ def prepare_inputs(contexts: list[list[int]], tokenizer: Tokenizer, device: torc
 class LLMBatchExpander(BatchExpander):
     model: PreTrainedModel
     tokenizer: Tokenizer
     def expand(self, batch: Batch) -> BatchCandidates:
         inputs = prepare_inputs([s.get_all_tokens() for s in batch.items], self.tokenizer, self.model.device)
-        next_tokens = find_next_tokens(self.model, inputs, self.tokenizer)
         start_time = time.time()
         results = []
         print(f"Batch size: {len(batch.items)}, next tokens size: {len(next_tokens)}")

 type Tokenizer = PreTrainedTokenizer | PreTrainedTokenizerFast
+def find_next_tokens(model: PreTrainedModel, inputs: BatchEncoding, threshold: float) -> list[list[tuple[int, float]]]:
     input_ids = inputs["input_ids"]
     attention_mask = inputs["attention_mask"]
     print("Running inference")
     start_time = time.time()
     result = []
     print(f"Resulting tensor: {log_probs.shape}")
     for probs in log_probs:
         # Filter out low probability tokens for efficiency
         above_threshold = torch.where(probs > threshold)
 class LLMBatchExpander(BatchExpander):
     model: PreTrainedModel
     tokenizer: Tokenizer
+    threshold: float
     def expand(self, batch: Batch) -> BatchCandidates:
         inputs = prepare_inputs([s.get_all_tokens() for s in batch.items], self.tokenizer, self.model.device)
+        next_tokens = find_next_tokens(self.model, inputs, self.threshold)
         start_time = time.time()
         results = []
         print(f"Batch size: {len(batch.items)}, next tokens size: {len(next_tokens)}")