Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import requests | |
| from tqdm import tqdm | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| from MonsterAPIClient import MClient | |
| from MonsterAPIClient import MODELS_TO_DATAMODEL | |
| client = MClient() | |
| # Available models list | |
| MODELS_TO_SERVE = ['llama2-7b-chat','mpt-7b-instruct','falcon-7b-instruct'] | |
| def generate_model_output(model: str, input_text: str, temp: float = 0.98) -> str: | |
| """ | |
| Generate output from a specific model. | |
| Parameters: | |
| model (str): The name of the model. | |
| input_text (str): The input prompt for the model. | |
| temp (float, optional): The temperature value for text generation. Defaults to 0.98. | |
| Returns: | |
| str: The generated output text. | |
| """ | |
| try: | |
| response = client.get_response(model, { | |
| "prompt": input_text, | |
| "temp": temp, | |
| }) | |
| output = client.wait_and_get_result(response['process_id']) | |
| return model, output['text'] | |
| except Exception as e: | |
| return model, f"Error occurred: {str(e)}" | |
| def generate_output(selected_models: list, input_text: str, temp: float = 0.98, | |
| available_models: list = MODELS_TO_SERVE) -> list: | |
| """ | |
| Generate outputs from selected models using Monster API. | |
| Parameters: | |
| selected_models (list): List of selected model names. | |
| input_text (str): The input prompt for the models. | |
| temp (float, optional): The temperature value for text generation. Defaults to 0.98. | |
| available_models (list, optional): List of available model names. Defaults to global variable. | |
| Returns: | |
| list: List of generated output texts corresponding to each model. | |
| """ | |
| outputs = {} | |
| with ThreadPoolExecutor() as executor: | |
| future_to_model = {executor.submit(generate_model_output, model, input_text, temp): model for model in selected_models} | |
| for future in tqdm(as_completed(future_to_model), total=len(selected_models)): | |
| model, output = future.result() | |
| outputs[model] = output | |
| ret_outputs = [] | |
| for model in available_models: | |
| if model not in outputs: | |
| ret_outputs.append("Model not selected!") | |
| else: | |
| ret_outputs.append(outputs[model]) | |
| return ret_outputs | |
| output_components = [gr.outputs.Textbox(label=model) for model in MODELS_TO_SERVE] | |
| checkboxes = gr.inputs.CheckboxGroup(MODELS_TO_SERVE, label="Select models to generate outputs:") | |
| textbox = gr.inputs.Textbox(label="Input Prompt") | |
| temp = gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.98, label="Temperature", step=0.01) | |
| input_text = gr.Interface( | |
| fn=generate_output, | |
| inputs=[ | |
| checkboxes, | |
| textbox, | |
| temp | |
| ], | |
| outputs=output_components, | |
| live=False, | |
| capture_session=True, | |
| title="LLM Evaluation powered by MonsterAPI", | |
| description="""This HuggingFace Space has been designed to help you evaluate the output of LLMs like Llama 2 7B, Falcon-7B and MPT-7B in parallel. These models are hosted on [MonsterAPI](https://monsterapi.ai/?utm_source=llm-evaluation&utm_medium=referral) - An AI infrastructure platform built for easily accessing AI models via scalable APIs and [finetuning LLMs](https://docs.monsterapi.ai/fine-tune-a-large-language-model-llm) at very low cost with our no-code implementation. MonsterAPI is powered by our low cost and highly scalable GPU computing platform - [Q Blocks](https://www.qblocks.cloud?utm_source=llm-evaluation&utm_medium=referral). These LLMs are accessible via scalable REST APIs. Checkout our [API documentation](https://documenter.getpostman.com/view/13759598/2s8ZDVZ3Yi) to integrate them in your AI powered applications.""", | |
| css="body {background-color: black}" | |
| ) | |
| # Launch the Gradio app | |
| input_text.launch() | |