Spaces:

monsterapi
/

Monster-LLMs

Runtime error

App Files Files Community

Monster-LLMs / gradio_app.py

gvij

Update gradio_app.py

dbb3fca over 2 years ago

raw

history blame contribute delete

3.8 kB

	import gradio as gr
	import requests
	from tqdm import tqdm
	from concurrent.futures import ThreadPoolExecutor, as_completed
	from MonsterAPIClient import MClient
	from MonsterAPIClient import MODELS_TO_DATAMODEL
	client = MClient()

	# Available models list
	MODELS_TO_SERVE = ['llama2-7b-chat','mpt-7b-instruct','falcon-7b-instruct']

	def generate_model_output(model: str, input_text: str, temp: float = 0.98) -> str:
	"""
	Generate output from a specific model.

	Parameters:
	model (str): The name of the model.
	input_text (str): The input prompt for the model.
	temp (float, optional): The temperature value for text generation. Defaults to 0.98.

	Returns:
	str: The generated output text.
	"""
	try:
	response = client.get_response(model, {
	"prompt": input_text,
	"temp": temp,
	})
	output = client.wait_and_get_result(response['process_id'])
	return model, output['text']
	except Exception as e:
	return model, f"Error occurred: {str(e)}"

	def generate_output(selected_models: list, input_text: str, temp: float = 0.98,
	available_models: list = MODELS_TO_SERVE) -> list:
	"""
	Generate outputs from selected models using Monster API.

	Parameters:
	selected_models (list): List of selected model names.
	input_text (str): The input prompt for the models.
	temp (float, optional): The temperature value for text generation. Defaults to 0.98.
	available_models (list, optional): List of available model names. Defaults to global variable.

	Returns:
	list: List of generated output texts corresponding to each model.
	"""
	outputs = {}
	with ThreadPoolExecutor() as executor:
	future_to_model = {executor.submit(generate_model_output, model, input_text, temp): model for model in selected_models}
	for future in tqdm(as_completed(future_to_model), total=len(selected_models)):
	model, output = future.result()
	outputs[model] = output

	ret_outputs = []
	for model in available_models:
	if model not in outputs:
	ret_outputs.append("Model not selected!")
	else:
	ret_outputs.append(outputs[model])

	return ret_outputs

	output_components = [gr.outputs.Textbox(label=model) for model in MODELS_TO_SERVE]

	checkboxes = gr.inputs.CheckboxGroup(MODELS_TO_SERVE, label="Select models to generate outputs:")
	textbox = gr.inputs.Textbox(label="Input Prompt")
	temp = gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.98, label="Temperature", step=0.01)

	input_text = gr.Interface(
	fn=generate_output,
	inputs=[
	checkboxes,
	textbox,
	temp
	],
	outputs=output_components,
	live=False,
	capture_session=True,
	title="LLM Evaluation powered by MonsterAPI",
	description="""This HuggingFace Space has been designed to help you evaluate the output of LLMs like Llama 2 7B, Falcon-7B and MPT-7B in parallel. These models are hosted on [MonsterAPI](https://monsterapi.ai/?utm_source=llm-evaluation&utm_medium=referral) - An AI infrastructure platform built for easily accessing AI models via scalable APIs and [finetuning LLMs](https://docs.monsterapi.ai/fine-tune-a-large-language-model-llm) at very low cost with our no-code implementation. MonsterAPI is powered by our low cost and highly scalable GPU computing platform - [Q Blocks](https://www.qblocks.cloud?utm_source=llm-evaluation&utm_medium=referral). These LLMs are accessible via scalable REST APIs. Checkout our [API documentation](https://documenter.getpostman.com/view/13759598/2s8ZDVZ3Yi) to integrate them in your AI powered applications.""",
	css="body {background-color: black}"
	)

	# Launch the Gradio app
	input_text.launch()