GradioChatbotDemo

Sleeping

App Files Files Community

AFischer1985 commited on Jun 22, 2024

Commit

584cd0c

verified ·

1 Parent(s): b568762

Update run.py

Browse files

Files changed (1) hide show

run.py +82 -46

run.py CHANGED Viewed

@@ -1,53 +1,89 @@
-#############################################################################
-# Title:  Gradio Interface to AI hosted by Huggingface
 # Author: Andreas Fischer
-# Date:   October 7th, 2023
-# Last update: December 29th, 2023
-#############################################################################
 import gradio as gr
-import requests
-import time
-import json
-def response(message, history, model):
-  if(model=="Default"): model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-  model_id = model
-  params={"max_new_tokens":600, "return_full_text":False} #, "max_length":500, "stream":True
-  url = f"https://api-inference.huggingface.co/models/{model_id}"
-  correction=1
-  prompt=f"[INST] {message} [/INST]" # skipped <s>
-  print("URL: "+url)
-  print(params)
-  print("User: "+message+"\nAI: ")
-  response=""
-  for text in requests.post(url, json={"inputs":prompt, "parameters":params}, stream=True):
-    text=text.decode('UTF-8')
-    print(text)
-    if(correction==3):
-      text='"}]'+text
-      correction=2
-    if(correction==1):
-      text=text.lstrip('[{"generated_text":"')
-      correction=2
-    if(text.endswith('"}]')):
-      text=text.rstrip('"}]')
-      correction=3
-    response=response+text
-    print(text)
-    time.sleep(0.2)
     yield response
-x=requests.get(f"https://api-inference.huggingface.co/framework/text-generation-inference")
-x=[i["model_id"] for i in x.json()]
-print(x)
-x=[s for s in x if s.startswith("mistral")]
-print(x)
-x.insert(0,"Default")
-gr.ChatInterface(
-  response,
-  title="AI-Interface to HuggingFace-Models",
-  additional_inputs=[gr.Dropdown(x,value="Default",label="Model")]).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)

+#########################################################################################
+# Title:  Gradio Chatbot Demo
 # Author: Andreas Fischer
+# Date:   June 22nd, 2024
+# Last update: June 22nd, 2024
+##########################################################################################
+myToken=None
+# Specify Prompt Formating
+#---------------------------
+import re
+def format_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=True):
+  if zeichenlimit is None: zeichenlimit=1000000000 # :-)
+  startOfString="<s>"                        # ""
+  template0=" [INST] {system} [/INST] </s>"  # "<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|> {system}<|END_OF_TURN_TOKEN|>"
+  template1=" [INST] {message} [/INST]"      # "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{message}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
+  template2=" {response}</s>"                # "{response}<|END_OF_TURN_TOKEN|>"
+  prompt = "" # Prompt is built dynamically from components:
+  if RAGAddon is not None:
+    system += RAGAddon
+  if system is not None:
+    prompt += template0.format(system=system)
+  message=message.replace("[INST]","")
+  message=message.replace("[/INST]","")
+  message=message.replace("</s>","")
+  message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message)
+  if history is not None:
+    for user_message, bot_response in history[-historylimit:]:
+      if user_message is None: user_message = ""
+      if bot_response is None: bot_response = ""
+      if removeHTML==True: bot_response = re.sub("<(.*?)>","\n", bot_response) # remove HTML-components in general (may cause bugs with markdown-rendering)
+      if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit])
+      if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit])
+  if message is not None: prompt += template1.format(message=message[:zeichenlimit])
+  if system2 is not None:
+    prompt += system2
+  return startOfString+prompt
+# Specify Chatbot Response
+#--------------------------
 import gradio as gr
+def response(message, history,system, hfToken):
+  global client
+  if(hfToken.startswith("hf_")): # use HF-hub with custom token if token is provided
+    client = InferenceClient(model=myModel, token=hfToken)
+  if(system==""): # set default system prompt is no system prompt is provided
+    system="Du bist ein hilfsbereiter Chatbot und antwortest bevorzugt in deutscher Sprache."
+  prompt=format_prompt(
+    message,                  # current message of the user (str)
+    history,                  # complete history (list)
+    system                    # system prompt (str)
+  )
+  print(prompt)
+  generate_kwargs = dict(temperature=float(0.9), max_new_tokens=1000, top_p=float(0.95), repetition_penalty=1.0, do_sample=True, seed=42)
+  stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
+  response = ""
+  for text in stream: # stream response token by token
+    part=text.token.text
+    response += part
+    print(part,end="", flush=True)
+    #response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
     yield response
+# Connect to Model on the Huggingface Hub
+#-----------------------------------------
+from huggingface_hub import InferenceClient
+myModel="mistralai/Mixtral-8x7B-Instruct-v0.1"
+client = InferenceClient(
+  model=myModel,
+  token=myToken #token="hf_..."
+)
+# Start Gradio-User-Interace
+#---------------------------
+gr.ChatInterface(
+  response,
+  chatbot=gr.Chatbot(value=None, render_markdown=True),
+  title="Gradio Chatbot Demo",
+  additional_inputs=[
+    gr.Textbox(label="System Prompt",value="Du bist ein hilfsbereiter Chatbot und antwortest bevorzugt in deutscher Sprache."),
+    gr.Textbox(label="HF_token",value="")
+  ]
+  ).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
+print("Interface up and running!")