Spaces:
Sleeping
Sleeping
| ######################################################################################### | |
| # Title: Gradio Chatbot Demo | |
| # Author: Andreas Fischer | |
| # Date: June 22nd, 2024 | |
| # Last update: June 22nd, 2024 | |
| ########################################################################################## | |
| myToken=None | |
| # Specify Prompt Formating | |
| #--------------------------- | |
| import re | |
| def format_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=True): | |
| if zeichenlimit is None: zeichenlimit=1000000000 # :-) | |
| startOfString="<s>" # "" | |
| template0=" [INST] {system} [/INST] </s>" # "<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|> {system}<|END_OF_TURN_TOKEN|>" | |
| template1=" [INST] {message} [/INST]" # "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{message}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" | |
| template2=" {response}</s>" # "{response}<|END_OF_TURN_TOKEN|>" | |
| prompt = "" # Prompt is built dynamically from components: | |
| if RAGAddon is not None: | |
| system += RAGAddon | |
| if system is not None: | |
| prompt += template0.format(system=system) | |
| message=message.replace("[INST]","") | |
| message=message.replace("[/INST]","") | |
| message=message.replace("</s>","") | |
| message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message) | |
| if history is not None: | |
| for user_message, bot_response in history[-historylimit:]: | |
| if user_message is None: user_message = "" | |
| if bot_response is None: bot_response = "" | |
| if removeHTML==True: bot_response = re.sub("<(.*?)>","\n", bot_response) # remove HTML-components in general (may cause bugs with markdown-rendering) | |
| if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit]) | |
| if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit]) | |
| if message is not None: prompt += template1.format(message=message[:zeichenlimit]) | |
| if system2 is not None: | |
| prompt += system2 | |
| return startOfString+prompt | |
| # Specify Chatbot Response | |
| #-------------------------- | |
| import gradio as gr | |
| def response(message, history,system, hfToken): | |
| global client | |
| if(hfToken.startswith("hf_")): # use HF-hub with custom token if token is provided | |
| client = InferenceClient(model=myModel, token=hfToken) | |
| if(system==""): # set default system prompt is no system prompt is provided | |
| system="Du bist ein hilfsbereiter Chatbot und antwortest bevorzugt in deutscher Sprache." | |
| prompt=format_prompt( | |
| message, # current message of the user (str) | |
| history, # complete history (list) | |
| system # system prompt (str) | |
| ) | |
| print(prompt) | |
| generate_kwargs = dict(temperature=float(0.9), max_new_tokens=1000, top_p=float(0.95), repetition_penalty=1.0, do_sample=True, seed=42) | |
| stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) | |
| response = "" | |
| for text in stream: # stream response token by token | |
| part=text.token.text | |
| response += part | |
| print(part,end="", flush=True) | |
| #response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering) | |
| yield response | |
| # Connect to Model on the Huggingface Hub | |
| #----------------------------------------- | |
| from huggingface_hub import InferenceClient | |
| myModel="mistralai/Mixtral-8x7B-Instruct-v0.1" | |
| client = InferenceClient( | |
| model=myModel, | |
| token=myToken #token="hf_..." | |
| ) | |
| # Start Gradio-User-Interace | |
| #--------------------------- | |
| gr.ChatInterface( | |
| response, | |
| chatbot=gr.Chatbot(value=None, render_markdown=True), | |
| title="Gradio Chatbot Demo", | |
| additional_inputs=[ | |
| gr.Textbox(label="System Prompt",value="Du bist ein hilfsbereiter Chatbot und antwortest bevorzugt in deutscher Sprache."), | |
| gr.Textbox(label="HF_token",value="") | |
| ] | |
| ).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864) | |
| print("Interface up and running!") | |