implicit-cot-math

Running on Zero

App Files Files Community

da03 commited on Jun 26, 2024

Commit

9a65236

1 Parent(s): a16dab3

.

Browse files

Files changed (1) hide show

app.py +89 -81

app.py CHANGED Viewed

@@ -3,10 +3,21 @@ import torch
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
-model_name = 'yuntian-deng/gpt2-implicit-cot-multiplication'
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name)
-MAX_PRODUCT_DIGITS = 100
 def preprocess(num):
     num = str(num).strip().replace(' ', '')
@@ -21,97 +32,92 @@ def postprocess(raw_output):
 def predict_product(num1, num2):
     input_text = f'{preprocess(num1)} * {preprocess(num2)} ='
     inputs = tokenizer(input_text, return_tensors='pt').to('cuda' if torch.cuda.is_available() else 'cpu')
-    model.to('cuda' if torch.cuda.is_available() else 'cpu')
     input_ids = inputs['input_ids']
     input_len = input_ids.shape[-1]
     prediction = ""
-    correct_product = ""
     valid_input = True
     try:
         num1_int = int(num1)
         num2_int = int(num2)
-        correct_product = str(num1_int * num2_int)
     except ValueError:
         valid_input = False
-    generated_ids = inputs['input_ids']
-    past_key_values = None
-    for step in range(MAX_PRODUCT_DIGITS):  # Set a maximum limit to prevent infinite loops
-        generation_kwargs = {
-            'input_ids': generated_ids,
-            'max_new_tokens': 1,
-            'do_sample': False,
-            'past_key_values': past_key_values,
-            'return_dict_in_generate': True,
-            'use_cache': True
-        }
-        if step == 0:
-            del generation_kwargs['past_key_values']
-        outputs = model.generate(**generation_kwargs)
-        generated_ids = outputs.sequences
-        next_token_id = generated_ids[0, -1]
-        print (next_token_id)
-        if next_token_id.item() == tokenizer.eos_token_id:
-            print ('berak')
-            break
-        past_key_values = outputs.past_key_values
-        output_text = tokenizer.decode(generated_ids[0, input_len:], skip_special_tokens=True)
-        #prediction = postprocess(output_text)
-        predicted_digits_reversed = output_text.strip().split(' ')
-        print ('p', predicted_digits_reversed)
-        correct_digits_reversed = list(correct_product)[::-1]
-        print ('c', correct_digits_reversed)
-        # Create the diff for HighlightedText
-        diff = []
-        correct_digits = []
-        is_correct_sofar = True
-        for i in range(len(predicted_digits_reversed)):
-            predicted_digit = predicted_digits_reversed[i]
-            correct_digit = correct_digits_reversed[i]
-            correct_digits.append((correct_digit, None))
-            if i >= len(correct_digits_reversed):
-                if predicted_digit == '0' and is_correct_sofar:
-                    is_correct_digit = True
                 else:
-                    is_correct_digit = False
-            else:
-                if predicted_digit == correct_digit:
-                    is_correct_digit = True
                 else:
-                    is_correct_digit = False
-            if not is_correct_digit:
-                is_correct_sofar = False
-            if is_correct_digit:
-                diff.append((predicted_digit, "-"))
-            else:
-                diff.append((predicted_digit, "+"))
-        diff = diff[::-1]
-        correct_digits = correct_digits[::-1]
-        yield correct_digits, diff, ""
-    #if valid_input:
-    #    is_correct = prediction == correct_product
-    #    result_message = "Correct!" if is_correct else f"Incorrect! The correct product is {correct_product}."
-    #else:
-    #    result_message = "Invalid input. Could not evaluate correctness."
-    ## Final diff for the complete prediction
-    #final_diff = []
-    #for i in range(max(len(prediction), len(correct_product))):
-    #    if i < len(prediction) and i < len(correct_product) and prediction[i] == correct_product[i]:
-    #        final_diff.append((prediction[i], None))  # No highlight for correct digits
-    #    elif i < len(prediction) and (i >= len(correct_product) or prediction[i] != correct_product[i]):
-    #        final_diff.append((prediction[i], "+"))  # Highlight incorrect digits in red
-    #    if i < len(correct_product) and (i >= len(prediction) or prediction[i] != correct_product[i]):
-    #        final_diff.append((correct_product[i], "-"))  # Highlight missing/incorrect digits in green
-    #yield final_diff, result_message
 demo = gr.Interface(
     fn=predict_product,
@@ -119,10 +125,12 @@ demo = gr.Interface(
         gr.Textbox(label='First Number (up to 12 digits)', value='123456789'),
         gr.Textbox(label='Second Number (up to 12 digits)', value='987654321'),
     ],
     outputs=[
-        gr.HighlightedText(label='Ground Truth Product', combine_adjacent=False, show_legend=False, color_map={"-": "green", "+": "red"}),
-        gr.HighlightedText(label='GPT2 Predicted Product', combine_adjacent=False, show_legend=False, color_map={"-": "green", "+": "red"}, show_inline_category=False),
-        gr.HTML(label='Result Message')
     ],
     title='GPT2 Direct Multiplication Calculator (Without Using Chain-of-Thought)',
     description='This demo uses GPT2 to directly predict the product of two numbers without using any intermediate reasoning steps. The GPT2 model has been fine-tuned to internalize chain-of-thought reasoning within its hidden states, following our stepwise internalization approach detailed in the paper linked at the bottom of this page.',

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# Load models
+implicit_cot_model_name = 'yuntian-deng/gpt2-implicit-cot-multiplication'
+implicit_cot_model = AutoModelForCausalLM.from_pretrained(implicit_cot_model_name)
+tokenizer = AutoTokenizer.from_pretrained(implicit_cot_model_name)
+no_cot_model_name = 'yuntian-deng/gpt2-no-cot-multiplication'
+no_cot_model = AutoModelForCausalLM.from_pretrained(no_cot_model_name)
+explicit_cot_model_name = 'yuntian-deng/gpt2-explicit-cot-multiplication'
+explicit_cot_model = AutoModelForCausalLM.from_pretrained(explicit_cot_model_name)
+models = {'implicit': implicit_cot_model_name, 'no': no_cot_model, 'explicit': explicit_cot_model}
+# Constants
+MAX_PRODUCT_DIGITS_PER_MODEL = {'implicit': 100, 'no': 100, 'explicit': 900}
 def preprocess(num):
     num = str(num).strip().replace(' ', '')
 def predict_product(num1, num2):
     input_text = f'{preprocess(num1)} * {preprocess(num2)} ='
     inputs = tokenizer(input_text, return_tensors='pt').to('cuda' if torch.cuda.is_available() else 'cpu')
+    [model.to('cuda' if torch.cuda.is_available() else 'cpu') for model in models.values()]
     input_ids = inputs['input_ids']
     input_len = input_ids.shape[-1]
     prediction = ""
+    ground_truth_product = ""
     valid_input = True
     try:
         num1_int = int(num1)
         num2_int = int(num2)
+        ground_truth_product = str(num1_int * num2_int)
+        ground_truth_digits_reversed = list(ground_truth_product)[::-1]
     except ValueError:
         valid_input = False
+    generated_ids_per_model = {model_name: inputs['input_ids'].data.clone() for model_name in models}
+    finished_per_model = {model_name: False for model_name in models}
+    past_key_values_per_model = {model_name: None for model_name in models}
+    predicted_results_per_model = {}
+    for step in range(max(MAX_PRODUCT_DIGITS_PER_MODEL.values())):  # Set a maximum limit to prevent infinite loops
+        # Ground Truth
+        ground_truth_results = []
+        for i in range(step+1):
+            ground_truth_digit = ground_truth_digits_reversed[i]
+            ground_truth_digits.append((ground_truth_digit, None))
+            ground_truth_digits = ground_truth_digits[::-1]
+        # Predicted
+        for model_name in models:
+            model = models[model_name]
+            if finished_per_model[model_name]:
+                continue
+            if step >= MAX_PRODUCT_DIGITS_PER_MODE[model_name]:
+                continue
+            generation_kwargs = {
+                'input_ids': generated_ids_per_model[model_name],
+                'max_new_tokens': 1,
+                'do_sample': False,
+                'past_key_values': past_key_values_per_model[model_name],
+                'return_dict_in_generate': True,
+                'use_cache': True
+            }
+            if step == 0:
+                del generation_kwargs['past_key_values']
+            outputs = model.generate(**generation_kwargs)
+            generated_ids = outputs.sequences
+            next_token_id = generated_ids[0, -1]
+            print (next_token_id)
+            if next_token_id.item() == tokenizer.eos_token_id:
+                print ('berak')
+                break
+            past_key_values_per_model[model_name] = outputs.past_key_values
+            output_text = tokenizer.decode(generated_ids[0, input_len:], skip_special_tokens=True)
+            predicted_digits_reversed = output_text.strip().split(' ')
+            predicted_results = []
+            is_correct_sofar = True
+            for i in range(len(predicted_digits_reversed)):
+                predicted_digit = predicted_digits_reversed[i]
+                ground_truth_digit = ground_truth_digits_reversed[i]
+                if i >= len(ground_truth_digits_reversed):
+                    if predicted_digit == '0' and is_correct_sofar:
+                        is_correct_digit = True
+                    else:
+                        is_correct_digit = False
                 else:
+                    if predicted_digit == ground_truth_digit:
+                        is_correct_digit = True
+                    else:
+                        is_correct_digit = False
+                if not is_correct_digit:
+                    is_correct_sofar = False
+                if is_correct_digit:
+                    predicted_results.append((predicted_digit, "correct"))
                 else:
+                    predicted_results.append((predicted_digit, "wrong"))
+            predicted_results = predicted_results[::-1]
+            predicted_results_per_model[model_name] = predicted_results
+        predicted_results_implicit_cot = predicted_results_per_model['implicit']
+        predicted_results_nocot = predicted_results_per_model['no']
+        predicted_results_explicit_cot = predicted_results_per_model['explicit']
+        yield ground_truth_digits_digits, predicted_results_implicit, predicted_results_nocot, predicted_results_explicit_cot
 demo = gr.Interface(
     fn=predict_product,
         gr.Textbox(label='First Number (up to 12 digits)', value='123456789'),
         gr.Textbox(label='Second Number (up to 12 digits)', value='987654321'),
     ],
+    color_map = {"correct": "green", "wrong": "red"}
     outputs=[
+        gr.HighlightedText(label='Ground Truth Product', combine_adjacent=False, show_legend=False, color_map=color_map),
+        gr.HighlightedText(label='Implicit CoT Predicted Product', combine_adjacent=False, show_legend=False, color_map=color_map, show_inline_category=False),
+        gr.HighlightedText(label='No CoT Predicted Product', combine_adjacent=False, show_legend=False, color_map=color_map, show_inline_category=False),
+        gr.HighlightedText(label='Explicit CoT Predicted Product', combine_adjacent=False, show_legend=False, color_map=color_map, show_inline_category=False),
     ],
     title='GPT2 Direct Multiplication Calculator (Without Using Chain-of-Thought)',
     description='This demo uses GPT2 to directly predict the product of two numbers without using any intermediate reasoning steps. The GPT2 model has been fine-tuned to internalize chain-of-thought reasoning within its hidden states, following our stepwise internalization approach detailed in the paper linked at the bottom of this page.',