AFischer1985's picture
Update run.py
584cd0c verified
#########################################################################################
# Title: Gradio Chatbot Demo
# Author: Andreas Fischer
# Date: June 22nd, 2024
# Last update: June 22nd, 2024
##########################################################################################
myToken=None
# Specify Prompt Formating
#---------------------------
import re
def format_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=True):
if zeichenlimit is None: zeichenlimit=1000000000 # :-)
startOfString="<s>" # ""
template0=" [INST] {system} [/INST] </s>" # "<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|> {system}<|END_OF_TURN_TOKEN|>"
template1=" [INST] {message} [/INST]" # "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{message}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
template2=" {response}</s>" # "{response}<|END_OF_TURN_TOKEN|>"
prompt = "" # Prompt is built dynamically from components:
if RAGAddon is not None:
system += RAGAddon
if system is not None:
prompt += template0.format(system=system)
message=message.replace("[INST]","")
message=message.replace("[/INST]","")
message=message.replace("</s>","")
message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message)
if history is not None:
for user_message, bot_response in history[-historylimit:]:
if user_message is None: user_message = ""
if bot_response is None: bot_response = ""
if removeHTML==True: bot_response = re.sub("<(.*?)>","\n", bot_response) # remove HTML-components in general (may cause bugs with markdown-rendering)
if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit])
if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit])
if message is not None: prompt += template1.format(message=message[:zeichenlimit])
if system2 is not None:
prompt += system2
return startOfString+prompt
# Specify Chatbot Response
#--------------------------
import gradio as gr
def response(message, history,system, hfToken):
global client
if(hfToken.startswith("hf_")): # use HF-hub with custom token if token is provided
client = InferenceClient(model=myModel, token=hfToken)
if(system==""): # set default system prompt is no system prompt is provided
system="Du bist ein hilfsbereiter Chatbot und antwortest bevorzugt in deutscher Sprache."
prompt=format_prompt(
message, # current message of the user (str)
history, # complete history (list)
system # system prompt (str)
)
print(prompt)
generate_kwargs = dict(temperature=float(0.9), max_new_tokens=1000, top_p=float(0.95), repetition_penalty=1.0, do_sample=True, seed=42)
stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
response = ""
for text in stream: # stream response token by token
part=text.token.text
response += part
print(part,end="", flush=True)
#response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
yield response
# Connect to Model on the Huggingface Hub
#-----------------------------------------
from huggingface_hub import InferenceClient
myModel="mistralai/Mixtral-8x7B-Instruct-v0.1"
client = InferenceClient(
model=myModel,
token=myToken #token="hf_..."
)
# Start Gradio-User-Interace
#---------------------------
gr.ChatInterface(
response,
chatbot=gr.Chatbot(value=None, render_markdown=True),
title="Gradio Chatbot Demo",
additional_inputs=[
gr.Textbox(label="System Prompt",value="Du bist ein hilfsbereiter Chatbot und antwortest bevorzugt in deutscher Sprache."),
gr.Textbox(label="HF_token",value="")
]
).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
print("Interface up and running!")