Spaces:
Running
on
Zero
Running
on
Zero
da03
commited on
Commit
·
9a65236
1
Parent(s):
a16dab3
app.py
CHANGED
|
@@ -3,10 +3,21 @@ import torch
|
|
| 3 |
import gradio as gr
|
| 4 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
def preprocess(num):
|
| 12 |
num = str(num).strip().replace(' ', '')
|
|
@@ -21,97 +32,92 @@ def postprocess(raw_output):
|
|
| 21 |
def predict_product(num1, num2):
|
| 22 |
input_text = f'{preprocess(num1)} * {preprocess(num2)} ='
|
| 23 |
inputs = tokenizer(input_text, return_tensors='pt').to('cuda' if torch.cuda.is_available() else 'cpu')
|
| 24 |
-
model.to('cuda' if torch.cuda.is_available() else 'cpu')
|
| 25 |
|
| 26 |
input_ids = inputs['input_ids']
|
| 27 |
input_len = input_ids.shape[-1]
|
| 28 |
prediction = ""
|
| 29 |
-
|
| 30 |
valid_input = True
|
| 31 |
|
| 32 |
try:
|
| 33 |
num1_int = int(num1)
|
| 34 |
num2_int = int(num2)
|
| 35 |
-
|
|
|
|
| 36 |
except ValueError:
|
| 37 |
valid_input = False
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
print ('c', correct_digits_reversed)
|
| 68 |
-
|
| 69 |
-
# Create the diff for HighlightedText
|
| 70 |
-
diff = []
|
| 71 |
-
correct_digits = []
|
| 72 |
-
is_correct_sofar = True
|
| 73 |
-
for i in range(len(predicted_digits_reversed)):
|
| 74 |
-
predicted_digit = predicted_digits_reversed[i]
|
| 75 |
-
correct_digit = correct_digits_reversed[i]
|
| 76 |
-
correct_digits.append((correct_digit, None))
|
| 77 |
-
if i >= len(correct_digits_reversed):
|
| 78 |
-
if predicted_digit == '0' and is_correct_sofar:
|
| 79 |
-
is_correct_digit = True
|
| 80 |
else:
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
else:
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
if is_correct_digit:
|
| 90 |
-
diff.append((predicted_digit, "-"))
|
| 91 |
-
else:
|
| 92 |
-
diff.append((predicted_digit, "+"))
|
| 93 |
-
diff = diff[::-1]
|
| 94 |
-
correct_digits = correct_digits[::-1]
|
| 95 |
-
|
| 96 |
-
yield correct_digits, diff, ""
|
| 97 |
-
|
| 98 |
-
#if valid_input:
|
| 99 |
-
# is_correct = prediction == correct_product
|
| 100 |
-
# result_message = "Correct!" if is_correct else f"Incorrect! The correct product is {correct_product}."
|
| 101 |
-
#else:
|
| 102 |
-
# result_message = "Invalid input. Could not evaluate correctness."
|
| 103 |
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
# if i < len(prediction) and i < len(correct_product) and prediction[i] == correct_product[i]:
|
| 108 |
-
# final_diff.append((prediction[i], None)) # No highlight for correct digits
|
| 109 |
-
# elif i < len(prediction) and (i >= len(correct_product) or prediction[i] != correct_product[i]):
|
| 110 |
-
# final_diff.append((prediction[i], "+")) # Highlight incorrect digits in red
|
| 111 |
-
# if i < len(correct_product) and (i >= len(prediction) or prediction[i] != correct_product[i]):
|
| 112 |
-
# final_diff.append((correct_product[i], "-")) # Highlight missing/incorrect digits in green
|
| 113 |
|
| 114 |
-
|
| 115 |
|
| 116 |
demo = gr.Interface(
|
| 117 |
fn=predict_product,
|
|
@@ -119,10 +125,12 @@ demo = gr.Interface(
|
|
| 119 |
gr.Textbox(label='First Number (up to 12 digits)', value='123456789'),
|
| 120 |
gr.Textbox(label='Second Number (up to 12 digits)', value='987654321'),
|
| 121 |
],
|
|
|
|
| 122 |
outputs=[
|
| 123 |
-
gr.HighlightedText(label='Ground Truth Product', combine_adjacent=False, show_legend=False, color_map=
|
| 124 |
-
gr.HighlightedText(label='
|
| 125 |
-
gr.
|
|
|
|
| 126 |
],
|
| 127 |
title='GPT2 Direct Multiplication Calculator (Without Using Chain-of-Thought)',
|
| 128 |
description='This demo uses GPT2 to directly predict the product of two numbers without using any intermediate reasoning steps. The GPT2 model has been fine-tuned to internalize chain-of-thought reasoning within its hidden states, following our stepwise internalization approach detailed in the paper linked at the bottom of this page.',
|
|
|
|
| 3 |
import gradio as gr
|
| 4 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 5 |
|
| 6 |
+
# Load models
|
| 7 |
+
implicit_cot_model_name = 'yuntian-deng/gpt2-implicit-cot-multiplication'
|
| 8 |
+
implicit_cot_model = AutoModelForCausalLM.from_pretrained(implicit_cot_model_name)
|
| 9 |
+
tokenizer = AutoTokenizer.from_pretrained(implicit_cot_model_name)
|
| 10 |
+
|
| 11 |
+
no_cot_model_name = 'yuntian-deng/gpt2-no-cot-multiplication'
|
| 12 |
+
no_cot_model = AutoModelForCausalLM.from_pretrained(no_cot_model_name)
|
| 13 |
+
|
| 14 |
+
explicit_cot_model_name = 'yuntian-deng/gpt2-explicit-cot-multiplication'
|
| 15 |
+
explicit_cot_model = AutoModelForCausalLM.from_pretrained(explicit_cot_model_name)
|
| 16 |
+
|
| 17 |
+
models = {'implicit': implicit_cot_model_name, 'no': no_cot_model, 'explicit': explicit_cot_model}
|
| 18 |
+
|
| 19 |
+
# Constants
|
| 20 |
+
MAX_PRODUCT_DIGITS_PER_MODEL = {'implicit': 100, 'no': 100, 'explicit': 900}
|
| 21 |
|
| 22 |
def preprocess(num):
|
| 23 |
num = str(num).strip().replace(' ', '')
|
|
|
|
| 32 |
def predict_product(num1, num2):
|
| 33 |
input_text = f'{preprocess(num1)} * {preprocess(num2)} ='
|
| 34 |
inputs = tokenizer(input_text, return_tensors='pt').to('cuda' if torch.cuda.is_available() else 'cpu')
|
| 35 |
+
[model.to('cuda' if torch.cuda.is_available() else 'cpu') for model in models.values()]
|
| 36 |
|
| 37 |
input_ids = inputs['input_ids']
|
| 38 |
input_len = input_ids.shape[-1]
|
| 39 |
prediction = ""
|
| 40 |
+
ground_truth_product = ""
|
| 41 |
valid_input = True
|
| 42 |
|
| 43 |
try:
|
| 44 |
num1_int = int(num1)
|
| 45 |
num2_int = int(num2)
|
| 46 |
+
ground_truth_product = str(num1_int * num2_int)
|
| 47 |
+
ground_truth_digits_reversed = list(ground_truth_product)[::-1]
|
| 48 |
except ValueError:
|
| 49 |
valid_input = False
|
| 50 |
|
| 51 |
+
generated_ids_per_model = {model_name: inputs['input_ids'].data.clone() for model_name in models}
|
| 52 |
+
finished_per_model = {model_name: False for model_name in models}
|
| 53 |
+
past_key_values_per_model = {model_name: None for model_name in models}
|
| 54 |
+
predicted_results_per_model = {}
|
| 55 |
+
for step in range(max(MAX_PRODUCT_DIGITS_PER_MODEL.values())): # Set a maximum limit to prevent infinite loops
|
| 56 |
+
# Ground Truth
|
| 57 |
+
ground_truth_results = []
|
| 58 |
+
for i in range(step+1):
|
| 59 |
+
ground_truth_digit = ground_truth_digits_reversed[i]
|
| 60 |
+
ground_truth_digits.append((ground_truth_digit, None))
|
| 61 |
+
ground_truth_digits = ground_truth_digits[::-1]
|
| 62 |
+
# Predicted
|
| 63 |
+
for model_name in models:
|
| 64 |
+
model = models[model_name]
|
| 65 |
+
if finished_per_model[model_name]:
|
| 66 |
+
continue
|
| 67 |
+
if step >= MAX_PRODUCT_DIGITS_PER_MODE[model_name]:
|
| 68 |
+
continue
|
| 69 |
+
generation_kwargs = {
|
| 70 |
+
'input_ids': generated_ids_per_model[model_name],
|
| 71 |
+
'max_new_tokens': 1,
|
| 72 |
+
'do_sample': False,
|
| 73 |
+
'past_key_values': past_key_values_per_model[model_name],
|
| 74 |
+
'return_dict_in_generate': True,
|
| 75 |
+
'use_cache': True
|
| 76 |
+
}
|
| 77 |
+
if step == 0:
|
| 78 |
+
del generation_kwargs['past_key_values']
|
| 79 |
+
outputs = model.generate(**generation_kwargs)
|
| 80 |
+
generated_ids = outputs.sequences
|
| 81 |
+
next_token_id = generated_ids[0, -1]
|
| 82 |
+
print (next_token_id)
|
| 83 |
+
|
| 84 |
+
if next_token_id.item() == tokenizer.eos_token_id:
|
| 85 |
+
print ('berak')
|
| 86 |
+
break
|
| 87 |
+
past_key_values_per_model[model_name] = outputs.past_key_values
|
| 88 |
+
|
| 89 |
+
output_text = tokenizer.decode(generated_ids[0, input_len:], skip_special_tokens=True)
|
| 90 |
+
predicted_digits_reversed = output_text.strip().split(' ')
|
| 91 |
|
| 92 |
+
predicted_results = []
|
| 93 |
+
is_correct_sofar = True
|
| 94 |
+
for i in range(len(predicted_digits_reversed)):
|
| 95 |
+
predicted_digit = predicted_digits_reversed[i]
|
| 96 |
+
ground_truth_digit = ground_truth_digits_reversed[i]
|
| 97 |
+
if i >= len(ground_truth_digits_reversed):
|
| 98 |
+
if predicted_digit == '0' and is_correct_sofar:
|
| 99 |
+
is_correct_digit = True
|
| 100 |
+
else:
|
| 101 |
+
is_correct_digit = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
else:
|
| 103 |
+
if predicted_digit == ground_truth_digit:
|
| 104 |
+
is_correct_digit = True
|
| 105 |
+
else:
|
| 106 |
+
is_correct_digit = False
|
| 107 |
+
if not is_correct_digit:
|
| 108 |
+
is_correct_sofar = False
|
| 109 |
+
if is_correct_digit:
|
| 110 |
+
predicted_results.append((predicted_digit, "correct"))
|
| 111 |
else:
|
| 112 |
+
predicted_results.append((predicted_digit, "wrong"))
|
| 113 |
+
predicted_results = predicted_results[::-1]
|
| 114 |
+
predicted_results_per_model[model_name] = predicted_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
+
predicted_results_implicit_cot = predicted_results_per_model['implicit']
|
| 117 |
+
predicted_results_nocot = predicted_results_per_model['no']
|
| 118 |
+
predicted_results_explicit_cot = predicted_results_per_model['explicit']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
+
yield ground_truth_digits_digits, predicted_results_implicit, predicted_results_nocot, predicted_results_explicit_cot
|
| 121 |
|
| 122 |
demo = gr.Interface(
|
| 123 |
fn=predict_product,
|
|
|
|
| 125 |
gr.Textbox(label='First Number (up to 12 digits)', value='123456789'),
|
| 126 |
gr.Textbox(label='Second Number (up to 12 digits)', value='987654321'),
|
| 127 |
],
|
| 128 |
+
color_map = {"correct": "green", "wrong": "red"}
|
| 129 |
outputs=[
|
| 130 |
+
gr.HighlightedText(label='Ground Truth Product', combine_adjacent=False, show_legend=False, color_map=color_map),
|
| 131 |
+
gr.HighlightedText(label='Implicit CoT Predicted Product', combine_adjacent=False, show_legend=False, color_map=color_map, show_inline_category=False),
|
| 132 |
+
gr.HighlightedText(label='No CoT Predicted Product', combine_adjacent=False, show_legend=False, color_map=color_map, show_inline_category=False),
|
| 133 |
+
gr.HighlightedText(label='Explicit CoT Predicted Product', combine_adjacent=False, show_legend=False, color_map=color_map, show_inline_category=False),
|
| 134 |
],
|
| 135 |
title='GPT2 Direct Multiplication Calculator (Without Using Chain-of-Thought)',
|
| 136 |
description='This demo uses GPT2 to directly predict the product of two numbers without using any intermediate reasoning steps. The GPT2 model has been fine-tuned to internalize chain-of-thought reasoning within its hidden states, following our stepwise internalization approach detailed in the paper linked at the bottom of this page.',
|