Spaces:

RamAnanth1
/

human_preference

Runtime error

App Files Files Community

human_preference / app.py

RamAnanth1

Update app.py

c75952a over 2 years ago

raw

history blame contribute delete

2.75 kB

	import gradio as gr
	import os

	import pandas as pd
	from datasets import load_dataset


	from transformers import T5ForConditionalGeneration, T5Tokenizer
	device = 'cpu' # if you have a GPU

	tokenizer = T5Tokenizer.from_pretrained('stanfordnlp/SteamSHP-flan-t5-large')
	model = T5ForConditionalGeneration.from_pretrained('stanfordnlp/SteamSHP-flan-t5-large').to(device)

	model_list = [
	'google/flan-t5-xxl',
	'bigscience/bloomz-7b1',
	'facebook/opt-iml-max-30b',
	'allenai/tk-instruct-11b-def-pos']

	HF_TOKEN = os.getenv("HF_TOKEN")

	OUTPUTS_DATASET = "HuggingFaceH4/instruction-pilot-outputs-filtered"

	ds = load_dataset(OUTPUTS_DATASET, split="train", use_auth_token=HF_TOKEN)

	def process(model_A, model_B):
	sample_ds = ds.shuffle().select(range(1))
	sample = sample_ds[0]
	prompt = sample["prompt"]

	df = pd.DataFrame.from_records(sample["filtered_outputs"])
	response_A_df = df[df['model']==model_A]["output"]
	response_B_df= df[df['model']==model_B]["output"]

	response_A = response_A_df.values[0]
	response_B = response_B_df.values[0]
	print(response_A)


	input_text = "POST: "+ prompt+ "\n\n RESPONSE A: "+response_A+"\n\n RESPONSE B: "+response_B+"\n\n Which response is better? RESPONSE"
	x = tokenizer([input_text], return_tensors='pt').input_ids.to(device)
	y = model.generate(x, max_new_tokens=1)
	prefered = tokenizer.batch_decode(y, skip_special_tokens=True)[0]
	result = model_A if prefered == 'A' else model_B
	return prompt,df[df['model'].isin([model_A, model_B])], result

	title = "Compare Instruction Models to see which one is more helpful"
	description = "This app compares the outputs of various open-source, instruction-trained models from a [dataset](https://huggingface.co/datasets/{OUTPUTS_DATASET}) of human demonstrations using the SteamSHP reward model trained on the [Stanford Human Preferences Dataset (SHP)](https://huggingface.co/datasets/stanfordnlp/SHP). Hit the button below to view a few random samples from the generated outputs"
	interface = gr.Interface(fn=process,
	inputs=[gr.Dropdown(choices=model_list, value=model_list[0], label='Model A'),
	gr.Dropdown(choices=model_list, value=model_list[1], label='Model B')],
	outputs=[
	gr.Textbox(label = "Prompt"),
	gr.DataFrame(label = "Model Responses"),
	gr.Textbox(label = "Preferred Option"),

	],
	title=title,
	description = description

	)

	interface.launch(debug=True)