Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| import pandas as pd | |
| from datasets import load_dataset | |
| from transformers import T5ForConditionalGeneration, T5Tokenizer | |
| device = 'cpu' # if you have a GPU | |
| tokenizer = T5Tokenizer.from_pretrained('stanfordnlp/SteamSHP-flan-t5-large') | |
| model = T5ForConditionalGeneration.from_pretrained('stanfordnlp/SteamSHP-flan-t5-large').to(device) | |
| model_list = [ | |
| 'google/flan-t5-xxl', | |
| 'bigscience/bloomz-7b1', | |
| 'facebook/opt-iml-max-30b', | |
| 'allenai/tk-instruct-11b-def-pos'] | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| OUTPUTS_DATASET = "HuggingFaceH4/instruction-pilot-outputs-filtered" | |
| ds = load_dataset(OUTPUTS_DATASET, split="train", use_auth_token=HF_TOKEN) | |
| def process(model_A, model_B): | |
| sample_ds = ds.shuffle().select(range(1)) | |
| sample = sample_ds[0] | |
| prompt = sample["prompt"] | |
| df = pd.DataFrame.from_records(sample["filtered_outputs"]) | |
| response_A_df = df[df['model']==model_A]["output"] | |
| response_B_df= df[df['model']==model_B]["output"] | |
| response_A = response_A_df.values[0] | |
| response_B = response_B_df.values[0] | |
| print(response_A) | |
| input_text = "POST: "+ prompt+ "\n\n RESPONSE A: "+response_A+"\n\n RESPONSE B: "+response_B+"\n\n Which response is better? RESPONSE" | |
| x = tokenizer([input_text], return_tensors='pt').input_ids.to(device) | |
| y = model.generate(x, max_new_tokens=1) | |
| prefered = tokenizer.batch_decode(y, skip_special_tokens=True)[0] | |
| result = model_A if prefered == 'A' else model_B | |
| return prompt,df[df['model'].isin([model_A, model_B])], result | |
| title = "Compare Instruction Models to see which one is more helpful" | |
| description = "This app compares the outputs of various open-source, instruction-trained models from a [dataset](https://huggingface.co/datasets/{OUTPUTS_DATASET}) of human demonstrations using the SteamSHP reward model trained on the [Stanford Human Preferences Dataset (SHP)](https://huggingface.co/datasets/stanfordnlp/SHP). Hit the button below to view a few random samples from the generated outputs" | |
| interface = gr.Interface(fn=process, | |
| inputs=[gr.Dropdown(choices=model_list, value=model_list[0], label='Model A'), | |
| gr.Dropdown(choices=model_list, value=model_list[1], label='Model B')], | |
| outputs=[ | |
| gr.Textbox(label = "Prompt"), | |
| gr.DataFrame(label = "Model Responses"), | |
| gr.Textbox(label = "Preferred Option"), | |
| ], | |
| title=title, | |
| description = description | |
| ) | |
| interface.launch(debug=True) |