######################################################################################### # Title: Gradio Chatbot Demo # Author: Andreas Fischer # Date: June 22nd, 2024 # Last update: June 22nd, 2024 ########################################################################################## myToken=None # Specify Prompt Formating #--------------------------- import re def format_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=True): if zeichenlimit is None: zeichenlimit=1000000000 # :-) startOfString="" # "" template0=" [INST] {system} [/INST] " # "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|> {system}<|END_OF_TURN_TOKEN|>" template1=" [INST] {message} [/INST]" # "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{message}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" template2=" {response}" # "{response}<|END_OF_TURN_TOKEN|>" prompt = "" # Prompt is built dynamically from components: if RAGAddon is not None: system += RAGAddon if system is not None: prompt += template0.format(system=system) message=message.replace("[INST]","") message=message.replace("[/INST]","") message=message.replace("","") message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message) if history is not None: for user_message, bot_response in history[-historylimit:]: if user_message is None: user_message = "" if bot_response is None: bot_response = "" if removeHTML==True: bot_response = re.sub("<(.*?)>","\n", bot_response) # remove HTML-components in general (may cause bugs with markdown-rendering) if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit]) if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit]) if message is not None: prompt += template1.format(message=message[:zeichenlimit]) if system2 is not None: prompt += system2 return startOfString+prompt # Specify Chatbot Response #-------------------------- import gradio as gr def response(message, history,system, hfToken): global client if(hfToken.startswith("hf_")): # use HF-hub with custom token if token is provided client = InferenceClient(model=myModel, token=hfToken) if(system==""): # set default system prompt is no system prompt is provided system="Du bist ein hilfsbereiter Chatbot und antwortest bevorzugt in deutscher Sprache." prompt=format_prompt( message, # current message of the user (str) history, # complete history (list) system # system prompt (str) ) print(prompt) generate_kwargs = dict(temperature=float(0.9), max_new_tokens=1000, top_p=float(0.95), repetition_penalty=1.0, do_sample=True, seed=42) stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) response = "" for text in stream: # stream response token by token part=text.token.text response += part print(part,end="", flush=True) #response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering) yield response # Connect to Model on the Huggingface Hub #----------------------------------------- from huggingface_hub import InferenceClient myModel="mistralai/Mixtral-8x7B-Instruct-v0.1" client = InferenceClient( model=myModel, token=myToken #token="hf_..." ) # Start Gradio-User-Interace #--------------------------- gr.ChatInterface( response, chatbot=gr.Chatbot(value=None, render_markdown=True), title="Gradio Chatbot Demo", additional_inputs=[ gr.Textbox(label="System Prompt",value="Du bist ein hilfsbereiter Chatbot und antwortest bevorzugt in deutscher Sprache."), gr.Textbox(label="HF_token",value="") ] ).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864) print("Interface up and running!")