Advanced-RAG-Demo

Runtime error

App Files Files Community

AFischer1985 commited on Feb 21, 2024

Commit

09eaef4

verified ·

1 Parent(s): 8d8b439

Update run.py

Browse files

Files changed (1) hide show

run.py +353 -74

run.py CHANGED Viewed

@@ -1,18 +1,34 @@
-#############################################################################################################
-# Title:  Gradio Interface to LLM-chatbot (for recommending AI) with RAG-funcionality and ChromaDB on HF-Hub
 # Author: Andreas Fischer
-# Date:   December 30th, 2023
-# Last update: January 2nd, 2023
-##############################################################################################################
 # Chroma-DB
 #-----------
 import os
 import chromadb
 dbPath="/home/af/Schreibtisch/gradio/Chroma/db"
-if(os.path.exists(dbPath)==False):
-  dbPath="/home/user/app/db"
 print(dbPath)
 #client = chromadb.Client()
 path=dbPath
@@ -22,23 +38,38 @@ print(client.get_version())
 print(client.list_collections())
 from chromadb.utils import embedding_functions
 default_ef = embedding_functions.DefaultEmbeddingFunction()
-sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer")
 #instructor_ef = embedding_functions.InstructorEmbeddingFunction(model_name="hkunlp/instructor-large", device="cuda")
 print(str(client.list_collections()))
 global collection
-if("name=ChromaDB1" in str(client.list_collections())):
-  print("ChromaDB1 found!")
-  collection = client.get_collection(name="ChromaDB1", embedding_function=sentence_transformer_ef)
 else:
-  print("ChromaDB1 created!")
   collection = client.create_collection(
-    "ChromaDB1",
-    embedding_function=sentence_transformer_ef,
     metadata={"hnsw:space": "cosine"})
-  collection.add(
-    documents=[
       "Text generating AI model mistralai/Mixtral-8x7B-Instruct-v0.1: Suitable for text generation, e.g., social media content, marketing copy, blog posts, short stories, etc.",
       "Image generating AI model stabilityai/sdxl-turbo: Suitable for image generation, e.g., illustrations, graphics, AI art, etc.",
       "Audio transcribing AI model openai/whisper-large-v3: Suitable for audio-transcription in different languages",
@@ -46,80 +77,328 @@ else:
       "Code generating AI model deepseek-ai/deepseek-coder-6.7b-instruct: Suitable for programming in Python, JavaScript, PHP, Bash and many other programming languages.",
       "Translation AI model Helsinki-NLP/opus-mt: Suitable for translating text, e.g., from English to German or vice versa",
       "Search result-integrating AI model phind/phind-v9-model: Suitable for researching current topics and for obtaining precise and up-to-date answers to questions based on web search results"
-    ],
-    metadatas=[{"source": "AF"}, {"source": "AF"}, {"source": "AF"}, {"source": "AF"}, {"source": "AF"}, {"source": "AF"}, {"source": "AF"}],
-    ids=["ai1", "ai2", "ai3", "ai4", "ai5", "ai6", "ai7"],
   )
 print("Database ready!")
 print(collection.count())
-# Model
-#-------
-from huggingface_hub import InferenceClient
-import gradio as gr
-client = InferenceClient(
-    "mistralai/Mixtral-8x7B-Instruct-v0.1"
-    #"mistralai/Mistral-7B-Instruct-v0.1"
-)
 # Gradio-GUI
 #------------
 import gradio as gr
 import json
-def format_prompt(message, history):
-  prompt = "<s>"
-  #for user_prompt, bot_response in history:
-  #  prompt += f"[INST] {user_prompt} [/INST]"
-  #  prompt += f" {bot_response}</s> "
-  prompt += f"[INST] {message} [/INST]"
-  return prompt
-def response(
-    prompt, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
-):
-    temperature = float(temperature)
-    if temperature < 1e-2: temperature = 1e-2
-    top_p = float(top_p)
-    generate_kwargs = dict(
-        temperature=temperature,
-        max_new_tokens=max_new_tokens,
-        top_p=top_p,
-        repetition_penalty=repetition_penalty,
-        do_sample=True,
-        seed=42,
-    )
-    addon=""
-    results=collection.query(
-      query_texts=[prompt],
       n_results=2,
-      #where={"source": "google-docs"}
       #where_document={"$contains":"search_string"}
     )
-    dists=["<br><small>(relevance: "+str(round((1-d)*100)/100)+";" for d in results['distances'][0]]
-    sources=["source: "+s["source"]+")</small>" for s in results['metadatas'][0]]
-    results=results['documents'][0]
-    combination = zip(results,dists,sources)
     combination = [' '.join(triplets) for triplets in combination]
-    print(combination)
-    if(len(results)>1):
-      addon=" Bitte berücksichtige bei deiner Antwort ggf. folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n"+"\n".join(results)
-    system="Du bist ein deutschsprachiges KI-basiertes Assistenzsystem, das zu jedem Anliegen möglichst geeignete KI-Tools empfiehlt."+addon+"\n\nUser-Anliegen:"
-    #body={"prompt":system+"### Instruktion:\n"+message+"\n\n### Antwort:","max_tokens":500, "echo":"False","stream":"True"} #e.g. SauerkrautLM
-    formatted_prompt = format_prompt(system+"\n"+prompt, history)
-    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-    output = ""
-    for response in stream:
-        output += response.token.text
-        yield output
-    output=output+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
-    yield output
-gr.ChatInterface(response, chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin ein KI-basiertes Assistenzsystem, das für jede Anfrage die am besten geeigneten KI-Tools empfiehlt.<br>Aktuell bin ich wenig mehr als eine Tech-Demo und kenne nur 7 KI-Modelle - also sei bitte nicht zu streng mit mir.<br>Was ist dein Anliegen?"]],render_markdown=True),title="German AI-RAG-Interface to the Hugging Face Hub").queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
-print("Interface up and running!")

+#########################################################################################
+# Title:  German AI-Interface to the Hugging Face Hub with advanced RAG
 # Author: Andreas Fischer
+# Date:   January 31st, 2023
+# Last update: February 21st, 2024
+##########################################################################################
+#https://github.com/abetlen/llama-cpp-python/issues/306
+#sudo apt install libclblast-dev
+#CMAKE_ARGS="-DLLAMA_CLBLAST=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir -v
+# Prepare resources
+#-------------------
+import torch
+import gc
+torch.cuda.empty_cache()
+gc.collect()
+import os
+from datetime import datetime
+global filename
+filename=f"./{datetime.now().strftime('%Y%m%d')}_history.json" # where to store the history as json-file
+if(os.path.exists(filename)==True): os.remove(filename)
 # Chroma-DB
 #-----------
 import os
 import chromadb
 dbPath="/home/af/Schreibtisch/gradio/Chroma/db"
+if(os.path.exists(dbPath)==False): dbPath="/home/user/app/db"
 print(dbPath)
 #client = chromadb.Client()
 path=dbPath
 print(client.list_collections())
 from chromadb.utils import embedding_functions
 default_ef = embedding_functions.DefaultEmbeddingFunction()
+#sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer")
 #instructor_ef = embedding_functions.InstructorEmbeddingFunction(model_name="hkunlp/instructor-large", device="cuda")
+embeddingModel = embedding_functions.InstructorEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer", device="cuda")
 print(str(client.list_collections()))
 global collection
+dbName="myDB"
+if("name="+dbName in str(client.list_collections())): client.delete_collection(name=dbName)
+if("name="+dbName in str(client.list_collections())):
+  print(dbName+" found!")
+  collection = client.get_collection(name=dbName, embedding_function=embeddingModel )
 else:
+  print(dbName+" created!")
   collection = client.create_collection(
+    dbName,
+    embedding_function=embeddingModel,
     metadata={"hnsw:space": "cosine"})
+  # txts0: Intentions
+  #------------------
+  txts0=[
+      "Ich suche ein KI-Programm mit bestimmten Fähigkeiten.",            # 1a
+      #"Ich suche kein KI-Programm mit bestimmten Fähigkeiten.",          # !1a
+      "Ich habe ein KI-Programm und habe Fragen zur Benutzung.",          # !1a (besser, um 1a und 1b abzugrenzen)
+      "Ich habe ein KI-Programm und habe Fragen zur Benutzung.",          # 1b
+      #"Ich habe kein KI-Programm und habe keine Fragen zur Benutzung.",  # !1b
+      "Ich habe eine allgemeine Frage ohne KI-Bezug."                     # !1b (greift besser bei Alltagsfragen)
+    ]
+  # txts1a: RAG-Infos for first intention:
+  #---------------------------------------
+  txts1a=[
       "Text generating AI model mistralai/Mixtral-8x7B-Instruct-v0.1: Suitable for text generation, e.g., social media content, marketing copy, blog posts, short stories, etc.",
       "Image generating AI model stabilityai/sdxl-turbo: Suitable for image generation, e.g., illustrations, graphics, AI art, etc.",
       "Audio transcribing AI model openai/whisper-large-v3: Suitable for audio-transcription in different languages",
       "Code generating AI model deepseek-ai/deepseek-coder-6.7b-instruct: Suitable for programming in Python, JavaScript, PHP, Bash and many other programming languages.",
       "Translation AI model Helsinki-NLP/opus-mt: Suitable for translating text, e.g., from English to German or vice versa",
       "Search result-integrating AI model phind/phind-v9-model: Suitable for researching current topics and for obtaining precise and up-to-date answers to questions based on web search results"
+    ]
+  # txts1b: RAG-Infos for second intention
+  #----------------------------------------
+  txts1b=[
+    "Für Fragen zur Umsetzung von KI-Verfahren ist das KI-basierte Assistenzsystem nicht geeignet. Möglicherweise empfiehlt sich ein KI-Modell mit Internetzugriff, wie beispielsweise phind.com, oder das Kontaktieren eines Experten wie Dr. Andreas Fischer (andreasfischer1985@web.de)."
+    ]
+  #meta=[{"type":"0", "type2":"0","source":"AF"}]*len(txts0)+[{"type":"1a","type2":"0","source":"AF"}]*len(txts1a)+[{"type":"1b","type2":"0","source":"AF"}]*len(txts1b)
+  meta = []
+  for _ in range(len(txts0)):
+    meta.append({"type":"0", "type2":"0","source":"AF"})
+  for _ in range(len(txts1a)):
+    meta.append({"type":"1a","type2":"0","source":"AF"})
+  for _ in range(len(txts1b)):
+    meta.append({"type":"1b","type2":"0","source":"AF"})
+  #Change type2 for txt0-entries
+  #-----------------------------
+  meta[0]["type2"]="1a" # RAG mit txts1a
+  meta[1]["type2"]="!1a" # else
+  meta[2]["type2"]="1b" # RAG mit txts1b
+  meta[3]["type2"]="!1b" # else
+  txts=txts0+txts1a+txts1b
+  collection.add(
+    documents=txts,
+    ids=[str(i) for i in list(range(len(txts)))],
+    metadatas=meta
   )
+  # Add entry to episodic memory
+  x=collection.get(include=[])["ids"]
+  if(True): #len(x)==0):
+    message="Ich bin der User."
+    response="Hallo User, wie kann ich dienen?"
+    x=collection.get(include=[])["ids"]
+    collection.add(
+      documents=[message,response],
+      metadatas=[
+        {"source": "ICH", "dialog": f"ICH: {message}\nDU: {response}", "type":"episode"},
+        {"source": "DU",  "dialog": f"ICH: {message}\nDU: {response}", "type":"episode"}
+      ],
+      ids=[str(len(x)+1),str(len(x)+2)]
+    )
+    RAGResults=collection.query(
+      query_texts=[message],
+      n_results=1,
+      #where={"source": "USER"}
+    )
+    RAGResults["metadatas"][0][0]["dialog"]
+x=collection.get(include=[])["ids"]
+x
+collection.get() # Inspect db-entries
 print("Database ready!")
 print(collection.count())
+rag0=collection.query(
+  query_texts=[message],
+  n_results=4,
+  where={"type": "0"}
+  )
+x=rag0["metadatas"][0][0]["type2"]
+x=[x["type2"] for x in rag0["metadatas"][0]]
+x.index("1c") if "1c" in x else len(x)+1
+# Get model
+#-----------
+import os
+import requests
+modelPath="/home/af/gguf/models/discolm_german_7b_v1.Q4_0.gguf"
+if(os.path.exists(modelPath)==False):
+  #url="https://huggingface.co/TheBloke/WizardLM-13B-V1.2-GGUF/resolve/main/wizardlm-13b-v1.2.Q4_0.gguf"
+  #url="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_0.gguf?download=true"
+  #url="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf?download=true"
+  url="https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q4_0.gguf?download=true"
+  response = requests.get(url)
+  with open("./model.gguf", mode="wb") as file:
+    file.write(response.content)
+  print("Model downloaded")
+  modelPath="./model.gguf"
+print(modelPath)
+# Llama-cpp-Server
+#------------------
+import subprocess
+n="20"
+if("mixtral-8x7b-instruct" in modelPath): n="0" # mixtral seems to cause problems here...
+command = ["python3", "-m", "llama_cpp.server", "--model", modelPath, "--host", "0.0.0.0", "--port", "2600", "--n_threads", "8", "--n_gpu_layers", n]
+subprocess.Popen(command)
+print("Server ready!")
 # Gradio-GUI
 #------------
+def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4): #float("Inf")
+  if zeichenlimit is None: zeichenlimit=1000000000 # :-)
+  template0="[INST] {system} [/INST]</s>" #<s>
+  template1="[INST] {message} [/INST] "
+  template2="{response}</s>"
+  if("mixtral-8x7b-instruct" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
+    template0="[INST] {system} [/INST]</s>" #<s>
+    template1="[INST] {message} [/INST] "
+    template2="{response}</s>"
+  if("Mistral-7B-Instruct" in modelPath): #https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2
+    template0="[INST] {system} [/INST]</s>" #<s>
+    template1="[INST] {message} [/INST] "
+    template2="{response}</s>"
+  if("openchat-3.5" in modelPath): #https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF
+    template0="GPT4 Correct User: {system}<|end_of_turn|>GPT4 Correct Assistant: Okay.<|end_of_turn|>"
+    template1="GPT4 Correct User: {message}<|end_of_turn|>GPT4 Correct Assistant: "
+    template2="{response}<|end_of_turn|>"
+  if("SauerkrautLM-7b-HerO" in modelPath):  #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
+    template0="<|im_start|>system\n{system}<|im_end|>\n"
+    template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
+    template2="{response}<|im_end|>\n"
+  if("discolm_german_7b" in modelPath): #https://huggingface.co/DiscoResearch/DiscoLM_German_7b_v1
+    template0="<|im_start|>system\n{system}<|im_end|>\n"
+    template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
+    template2="{response}<|im_end|>\n"
+  if("WizardLM-13B-V1.2" in modelPath): #https://huggingface.co/WizardLM/WizardLM-13B-V1.2
+    template0="{system} " #<s>
+    template1="USER: {message} ASSISTANT: "
+    template2="{response}</s>"
+  if("phi-2" in modelPath): #https://huggingface.co/TheBloke/phi-2-GGUF
+    template0="Instruct: {system}\nOutput: Okay.\n"
+    template1="Instruct: {message}\nOutput:"
+    template2="{response}\n"
+  prompt = ""
+  if RAGAddon is not None:
+    system += RAGAddon
+  if system is not None:
+    prompt += template0.format(system=system) #"<s>"
+  if history is not None:
+    for user_message, bot_response in history[-historylimit:]:
+      if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit])  #"[INST] {user_prompt} [/INST] "
+      if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit]) #"{bot_response}</s> "
+  if message is not None: prompt += template1.format(message=message[:zeichenlimit])                #"[INST] {message} [/INST]"
+  if system2 is not None:
+    prompt += system2
+  return prompt
 import gradio as gr
+import requests
 import json
+from datetime import datetime
+import os
+import re
+def response(message, history):
+  settings="Temporär"
+  # Preprocessing to revent simple forms of prompt injection:
+  #----------------------------------------------------------
+  message=message.replace("[INST]","")
+  message=message.replace("[/INST]","")
+  message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message)
+  # Load Memory if settings=="Permanent"
+  #-------------------------------------
+  if (settings=="Permanent"):
+    if((len(history)==0)&(os.path.isfile(filename))): history=json.load(open(filename,'r',encoding="utf-8")) # retrieve history (if available)
+  system="Du bist ein deutschsprachiges KI-basiertes Assistenzsystem."
+  #RAG-layer 0: Intention-RAG
+  #---------------------------
+  typeResults=collection.query(
+    query_texts=[message],
+    n_results=4,
+    where={"type": "0"}
+  )
+  myType=typeResults["metadatas"][0][0]["type2"] # einfachste Variante
+  x=[x["type2"] for x in typeResults["metadatas"][0]] # liste die type2-Einträge auf
+  myType="1a" if ((x.index("1a") if "1a" in x else len(x)+1) < (x.index("!1a") if "!1a" in x else len(x)+1)) else "else" # setze 1a wenn es besser passt als !1a
+  if ((x.index("1b") if "1b" in x else len(x)+1) < (x.index("1a") if "1a" in x else len(x)+1)): # prüfe 1b wenn 1b besser passt als 1a
+    if ((x.index("1b") if "1b" in x else len(x)+1) < (x.index("!1b") if "!1b" in x else len(x)+1)): myType="1b" # setze 1b wenn besser als !1b (sonst lass 1a/else)
+  print("Message:"+message+"\n\nIntention-Type: "+myType+"\n\n"+str(typeResults))
+  #RAG-layer 1: Respond with CustomDB-RAG (1a, 1b) or Memory-RAG
+  #--------------------------------------------------------------
+  rag=None
+  historylimit=4
+  combination=None
+  ## RAG 1a: Respond with CustomDB-RAG
+  #-----------------------------------
+  if(myType=="1a"):
+    RAGResults=collection.query(
+      query_texts=[message],
       n_results=2,
+      where={"type": myType}
       #where_document={"$contains":"search_string"}
     )
+    dists=["<br><small>(relevance: "+str(round((1-d)*100)/100)+";" for d in RAGResults['distances'][0]]
+    sources=["source: "+s["source"]+")</small>" for s in RAGResults['metadatas'][0]]
+    texts=RAGResults['documents'][0]
+    combination = zip(texts,dists,sources)
     combination = [' '.join(triplets) for triplets in combination]
+    #print(combination)
+    rag="\n\n"
+    rag += "Mit Blick auf die aktuelle Äußerung des Users erinnerst du dich insb. an folgende KI-Verfahren aus unserer Datenbank:\n"
+    rag += str(texts)
+    rag += "\n\nIm Folgenden siehst du den jüngsten Dialog-Verlauf:"
+  else:
+    ## RAG 1a: Respond with CustomDB-RAG
+    #-----------------------------------
+    if(myType=="1b"):
+      RAGResults=collection.query(
+        query_texts=[message],
+        n_results=2,
+        where={"type": myType}
+        #where_document={"$contains":"search_string"}
+      )
+      dists=["<br><small>(relevance: "+str(round((1-d)*100)/100)+";" for d in RAGResults['distances'][0]]
+      sources=["source: "+s["source"]+")</small>" for s in RAGResults['metadatas'][0]]
+      texts=RAGResults['documents'][0]
+      combination = zip(texts,dists,sources)
+      combination = [' '.join(triplets) for triplets in combination]
+      #print(combination)
+      rag="\n\n"
+      rag += "Beziehe dich in deiner Antwort AUSSCHLIEßLICH auf die folgenden Informationen:\n"
+      rag += str(texts)
+      rag += "\n\nIm Folgenden siehst du den jüngsten Dialog-Verlauf:"
+    ## Else: Respond with Memory-RAG
+    #--------------------------------
+    else:
+      x=collection.get(include=[])["ids"]
+      if(len(x)>(historylimit*2)): # turn on RAG when the database contains entries that are not shown within historylimit
+        RAGResults=collection.query(
+          query_texts=[message],
+          n_results=1,
+          where={"type": "episode"}
+        )
+        texts=RAGResults["metadatas"][0][0]["dialog"] #str()
+        #print("Message: "+message+"\n\nBest Match: "+texts)
+        rag="\n\n"
+        rag += "Mit Blick auf die aktuelle Äußerung des Users erinnerst du dich insb. an folgende Episode aus eurem Dialog:\n"
+        rag += str(texts)
+        rag += "\n\nIm Folgenden siehst du den jüngsten Dialog-Verlauf:"
+  # Request Response from LLM:
+  system2=None # system2 can be used as fictive first words of the AI, which are not displayed or stored
+  print("RAG: "+rag)
+  print("System: "+system+"\n\nMessage: "+message)
+  prompt=extend_prompt(
+    message,                  # current message of the user
+    history,                  # complete history
+    system,                   # system prompt
+    rag,                      # RAG-component added to the system prompt
+    system2,                  # fictive first words of the AI (neither displayed nor stored)
+    historylimit=historylimit # number of past messages to consider for response to current message
+    )
+  print(prompt)
+  # url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
+  url="http://0.0.0.0:2600/v1/completions"
+  body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"}      # e.g. Mixtral-Instruct
+  if("discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]})   # fix stop-token of DiscoLM
+  response="" #+"("+myType+")\n"
+  buffer=""
+  print("URL: "+url)
+  print("User: "+message+"\nAI: ")
+  for text in requests.post(url, json=body, stream=True):  #-H 'accept: application/json' -H 'Content-Type: application/json'
+    if buffer is None: buffer=""
+    buffer=str("".join(buffer))
+    # print("*** Raw String: "+str(text)+"\n***\n")
+    text=text.decode('utf-8')
+    if((text.startswith(": ping -")==False) & (len(text.strip("\n\r"))>0)): buffer=buffer+str(text)
+    # print("\n*** Buffer: "+str(buffer)+"\n***\n")
+    buffer=buffer.split('"finish_reason": null}]}')
+    if(len(buffer)==1):
+      buffer="".join(buffer)
+      pass
+    if(len(buffer)==2):
+      part=buffer[0]+'"finish_reason": null}]}'
+      if(part.lstrip('\n\r').startswith("data: ")): part=part.lstrip('\n\r').replace("data: ", "")
+      try:
+        part = str(json.loads(part)["choices"][0]["text"])
+        print(part, end="", flush=True)
+        response=response+part
+        buffer="" # reset buffer
+      except Exception as e:
+        print("Exception:"+str(e))
+        pass
+    yield response
+  if((myType=="1a")|(myType=="1b")): #add RAG-results to chat-output if appropriate
+    response=response+"\n\n<br><details><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
+    yield response
+  history.append((message, response)) # add current dialog to history
+  # Store current state in DB if settings=="Permanent"
+  if (settings=="Permanent"):
+    x=collection.get(include=[])["ids"] # add current dialog to db
+    collection.add(
+      documents=[message,response],
+      metadatas=[
+        { "source": "ICH", "dialog": f"ICH: {message.strip()}\n DU: {response.strip()}", "type":"episode"},
+        { "source": "DU",  "dialog": f"ICH: {message.strip()}\n DU: {response.strip()}", "type":"episode"}
+      ],
+      ids=[str(len(x)+1),str(len(x)+2)]
+    )
+    json.dump(history,open(filename,'w',encoding="utf-8"),ensure_ascii=False)
+gr.ChatInterface(
+  response,
+  chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin ein KI-basiertes Assistenzsystem, das für jede Anfrage die am besten geeigneten KI-Tools empfiehlt.<br>Aktuell bin ich wenig mehr als eine Tech-Demo und kenne nur 7 KI-Modelle - also sei bitte nicht zu streng mit mir.<br>Was ist dein Anliegen?"]],render_markdown=True)
+  title="German AI-Interface to the Hugging Face Hub with advanced RAG",
+  #additional_inputs=[gr.Dropdown(["Permanent","Temporär"],value="Temporär",label="Dialog sichern?")]
+  ).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
+print("Interface up and running!")