#########################################################################################
# Title:  Gradio Chatbot Demo
# Author: Andreas Fischer
# Date:   June 22nd, 2024
# Last update: June 22nd, 2024
##########################################################################################

myToken=None 

# Specify Prompt Formating
#---------------------------

import re
def format_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=True): 
  if zeichenlimit is None: zeichenlimit=1000000000 # :-)
  startOfString="<s>"                        # ""
  template0=" [INST] {system} [/INST] </s>"  # "<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|> {system}<|END_OF_TURN_TOKEN|>" 
  template1=" [INST] {message} [/INST]"      # "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{message}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
  template2=" {response}</s>"                # "{response}<|END_OF_TURN_TOKEN|>"
  prompt = "" # Prompt is built dynamically from components:
  if RAGAddon is not None:
    system += RAGAddon
  if system is not None:
    prompt += template0.format(system=system) 
  message=message.replace("[INST]","")
  message=message.replace("[/INST]","")
  message=message.replace("</s>","")
  message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message)    
  if history is not None:
    for user_message, bot_response in history[-historylimit:]:
      if user_message is None: user_message = "" 
      if bot_response is None: bot_response = ""
      if removeHTML==True: bot_response = re.sub("<(.*?)>","\n", bot_response) # remove HTML-components in general (may cause bugs with markdown-rendering)
      if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit])  
      if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit]) 
  if message is not None: prompt += template1.format(message=message[:zeichenlimit])                
  if system2 is not None:
    prompt += system2
  return startOfString+prompt


# Specify Chatbot Response
#--------------------------

import gradio as gr
def response(message, history,system, hfToken):
  global client
  if(hfToken.startswith("hf_")): # use HF-hub with custom token if token is provided
    client = InferenceClient(model=myModel, token=hfToken)
  if(system==""): # set default system prompt is no system prompt is provided
    system="Du bist ein hilfsbereiter Chatbot und antwortest bevorzugt in deutscher Sprache."  
  prompt=format_prompt(
    message,                  # current message of the user (str)
    history,                  # complete history (list)
    system                    # system prompt (str)
  )
  print(prompt)
  generate_kwargs = dict(temperature=float(0.9), max_new_tokens=1000, top_p=float(0.95), repetition_penalty=1.0, do_sample=True, seed=42)
  stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
  response = ""
  for text in stream: # stream response token by token
    part=text.token.text
    response += part
    print(part,end="", flush=True)
    #response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
    yield response


# Connect to Model on the Huggingface Hub
#-----------------------------------------
from huggingface_hub import InferenceClient
myModel="mistralai/Mixtral-8x7B-Instruct-v0.1"
client = InferenceClient(
  model=myModel,
  token=myToken #token="hf_..."
)

# Start Gradio-User-Interace
#---------------------------
gr.ChatInterface(
  response, 
  chatbot=gr.Chatbot(value=None, render_markdown=True),
  title="Gradio Chatbot Demo",
  additional_inputs=[
    gr.Textbox(label="System Prompt",value="Du bist ein hilfsbereiter Chatbot und antwortest bevorzugt in deutscher Sprache."),
    gr.Textbox(label="HF_token",value="")     
  ]
  ).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
print("Interface up and running!")