Spaces:
Runtime error
Runtime error
Update run.py
Browse files
run.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
# Title: German AI-Interface with advanced RAG
|
| 3 |
# Author: Andreas Fischer
|
| 4 |
# Date: January 31st, 2023
|
| 5 |
-
# Last update: February
|
| 6 |
##########################################################################################
|
| 7 |
|
| 8 |
#https://github.com/abetlen/llama-cpp-python/issues/306
|
|
@@ -30,7 +30,7 @@ dbPath = "/home/af/Schreibtisch/Code/gradio/Chroma/db"
|
|
| 30 |
onPrem = True if(os.path.exists(dbPath)) else False
|
| 31 |
if(onPrem==False): dbPath="/home/user/app/db"
|
| 32 |
|
| 33 |
-
onPrem=
|
| 34 |
print(dbPath)
|
| 35 |
|
| 36 |
#client = chromadb.Client()
|
|
@@ -164,12 +164,11 @@ else:
|
|
| 164 |
import os
|
| 165 |
import requests
|
| 166 |
import subprocess
|
| 167 |
-
modelPath="/home/af/gguf/models/Discolm_german_7b_v1.Q4_0.gguf"
|
|
|
|
| 168 |
if(os.path.exists(modelPath)==False):
|
| 169 |
-
#url="https://huggingface.co/TheBloke/WizardLM-13B-V1.2-GGUF/resolve/main/wizardlm-13b-v1.2.Q4_0.gguf"
|
| 170 |
-
url="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_0.gguf?download=true"
|
| 171 |
-
#url="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf?download=true"
|
| 172 |
#url="https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q4_0.gguf?download=true"
|
|
|
|
| 173 |
response = requests.get(url)
|
| 174 |
with open("./Mixtral-8x7b-instruct.gguf", mode="wb") as file:
|
| 175 |
file.write(response.content)
|
|
@@ -183,10 +182,15 @@ else:
|
|
| 183 |
print("Server ready!")
|
| 184 |
|
| 185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
# Gradio-GUI
|
| 187 |
#------------
|
| 188 |
-
|
| 189 |
-
def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4):
|
| 190 |
startOfString=""
|
| 191 |
if zeichenlimit is None: zeichenlimit=1000000000 # :-)
|
| 192 |
template0=" [INST]{system}\n [/INST] </s>"
|
|
@@ -229,13 +233,18 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
|
|
| 229 |
prompt += template0.format(system=system) #"<s>"
|
| 230 |
if history is not None:
|
| 231 |
for user_message, bot_response in history[-historylimit:]:
|
| 232 |
-
if user_message is
|
| 233 |
-
if bot_response is
|
| 234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
if system2 is not None:
|
| 236 |
prompt += system2
|
| 237 |
return startOfString+prompt
|
| 238 |
|
|
|
|
| 239 |
import gradio as gr
|
| 240 |
import requests
|
| 241 |
import json
|
|
@@ -244,7 +253,8 @@ import os
|
|
| 244 |
import re
|
| 245 |
|
| 246 |
def response(message, history):
|
| 247 |
-
settings="
|
|
|
|
| 248 |
|
| 249 |
# Preprocessing to revent simple forms of prompt injection:
|
| 250 |
#----------------------------------------------------------
|
|
@@ -253,12 +263,12 @@ def response(message, history):
|
|
| 253 |
message=message.replace("[/INST]","")
|
| 254 |
message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message)
|
| 255 |
|
| 256 |
-
# Load Memory if
|
| 257 |
#-------------------------------------
|
| 258 |
-
if (settings=="
|
| 259 |
if((len(history)==0)&(os.path.isfile(filename))): history=json.load(open(filename,'r',encoding="utf-8")) # retrieve history (if available)
|
| 260 |
|
| 261 |
-
system="Du bist ein deutschsprachiges wortkarges KI-basiertes Assistenzsystem.
|
| 262 |
|
| 263 |
#RAG-layer 0: Intention-RAG
|
| 264 |
#---------------------------
|
|
@@ -354,7 +364,13 @@ def response(message, history):
|
|
| 354 |
rag, # RAG-component added to the system prompt
|
| 355 |
system2, # fictive first words of the AI (neither displayed nor stored)
|
| 356 |
historylimit=historylimit # number of past messages to consider for response to current message
|
|
|
|
| 357 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
print("\n\n*** Prompt:\n"+prompt+"\n***\n\n")
|
| 359 |
|
| 360 |
## Request response from model
|
|
@@ -383,13 +399,14 @@ def response(message, history):
|
|
| 383 |
part=text.token.text
|
| 384 |
#print(part, end="", flush=True)
|
| 385 |
response += part
|
|
|
|
| 386 |
yield response
|
| 387 |
if((myType=="1a")): #add RAG-results to chat-output if appropriate
|
| 388 |
-
|
| 389 |
-
yield
|
| 390 |
history.append((message, response)) # add current dialog to history
|
| 391 |
-
# Store current state in DB if
|
| 392 |
-
if (settings=="
|
| 393 |
x=collection.get(include=[])["ids"] # add current dialog to db
|
| 394 |
collection.add(
|
| 395 |
documents=[message,response],
|
|
@@ -405,7 +422,8 @@ def response(message, history):
|
|
| 405 |
# url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
|
| 406 |
url="http://0.0.0.0:2600/v1/completions"
|
| 407 |
body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"} # e.g. Mixtral-Instruct
|
| 408 |
-
if("
|
|
|
|
| 409 |
response="" #+"("+myType+")\n"
|
| 410 |
buffer=""
|
| 411 |
#print("URL: "+url)
|
|
@@ -432,13 +450,13 @@ def response(message, history):
|
|
| 432 |
except Exception as e:
|
| 433 |
print("Exception:"+str(e))
|
| 434 |
pass
|
|
|
|
| 435 |
yield response
|
| 436 |
if((myType=="1a")): #add RAG-results to chat-output if appropriate
|
| 437 |
-
|
| 438 |
-
yield
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
if (settings=="Permanent"):
|
| 442 |
x=collection.get(include=[])["ids"] # add current dialog to db
|
| 443 |
collection.add(
|
| 444 |
documents=[message,response],
|
|
@@ -453,9 +471,11 @@ def response(message, history):
|
|
| 453 |
|
| 454 |
gr.ChatInterface(
|
| 455 |
response,
|
| 456 |
-
chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin ein KI-basiertes Assistenzsystem, das für jede Anfrage die am besten geeigneten KI-Tools empfiehlt
|
| 457 |
-
title="German AI-Interface with advanced RAG",
|
| 458 |
-
#additional_inputs=[gr.Dropdown(["
|
| 459 |
).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
|
| 460 |
print("Interface up and running!")
|
| 461 |
|
|
|
|
|
|
|
|
|
| 2 |
# Title: German AI-Interface with advanced RAG
|
| 3 |
# Author: Andreas Fischer
|
| 4 |
# Date: January 31st, 2023
|
| 5 |
+
# Last update: February 26st, 2024
|
| 6 |
##########################################################################################
|
| 7 |
|
| 8 |
#https://github.com/abetlen/llama-cpp-python/issues/306
|
|
|
|
| 30 |
onPrem = True if(os.path.exists(dbPath)) else False
|
| 31 |
if(onPrem==False): dbPath="/home/user/app/db"
|
| 32 |
|
| 33 |
+
#onPrem=True # uncomment to override automatic detection
|
| 34 |
print(dbPath)
|
| 35 |
|
| 36 |
#client = chromadb.Client()
|
|
|
|
| 164 |
import os
|
| 165 |
import requests
|
| 166 |
import subprocess
|
| 167 |
+
#modelPath="/home/af/gguf/models/Discolm_german_7b_v1.Q4_0.gguf"
|
| 168 |
+
modelPath="/home/af/gguf/models/Mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
|
| 169 |
if(os.path.exists(modelPath)==False):
|
|
|
|
|
|
|
|
|
|
| 170 |
#url="https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q4_0.gguf?download=true"
|
| 171 |
+
url="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_0.gguf?download=true"
|
| 172 |
response = requests.get(url)
|
| 173 |
with open("./Mixtral-8x7b-instruct.gguf", mode="wb") as file:
|
| 174 |
file.write(response.content)
|
|
|
|
| 182 |
print("Server ready!")
|
| 183 |
|
| 184 |
|
| 185 |
+
#import llama_cpp
|
| 186 |
+
#llama_cpp.llama_backend_init(numa=False)
|
| 187 |
+
#params=llama_cpp.llama_context_default_params()
|
| 188 |
+
#params.n_ctx
|
| 189 |
+
|
| 190 |
# Gradio-GUI
|
| 191 |
#------------
|
| 192 |
+
import re
|
| 193 |
+
def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=True):
|
| 194 |
startOfString=""
|
| 195 |
if zeichenlimit is None: zeichenlimit=1000000000 # :-)
|
| 196 |
template0=" [INST]{system}\n [/INST] </s>"
|
|
|
|
| 233 |
prompt += template0.format(system=system) #"<s>"
|
| 234 |
if history is not None:
|
| 235 |
for user_message, bot_response in history[-historylimit:]:
|
| 236 |
+
if user_message is None: user_message = ""
|
| 237 |
+
if bot_response is None: bot_response = ""
|
| 238 |
+
bot_response = re.sub("\n\n<details>((.|\n)*?)</details>","", bot_response) # remove RAG-compontents
|
| 239 |
+
if removeHTML==True: bot_response = re.sub("<(.*?)>","\n", bot_response) # remove HTML-components in general (may cause bugs with markdown-rendering)
|
| 240 |
+
if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit])
|
| 241 |
+
if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit])
|
| 242 |
+
if message is not None: prompt += template1.format(message=message[:zeichenlimit])
|
| 243 |
if system2 is not None:
|
| 244 |
prompt += system2
|
| 245 |
return startOfString+prompt
|
| 246 |
|
| 247 |
+
|
| 248 |
import gradio as gr
|
| 249 |
import requests
|
| 250 |
import json
|
|
|
|
| 253 |
import re
|
| 254 |
|
| 255 |
def response(message, history):
|
| 256 |
+
settings="Memory Off"
|
| 257 |
+
removeHTML=True
|
| 258 |
|
| 259 |
# Preprocessing to revent simple forms of prompt injection:
|
| 260 |
#----------------------------------------------------------
|
|
|
|
| 263 |
message=message.replace("[/INST]","")
|
| 264 |
message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message)
|
| 265 |
|
| 266 |
+
# Load Memory if memory is turned on
|
| 267 |
#-------------------------------------
|
| 268 |
+
if (settings=="Memory On"):
|
| 269 |
if((len(history)==0)&(os.path.isfile(filename))): history=json.load(open(filename,'r',encoding="utf-8")) # retrieve history (if available)
|
| 270 |
|
| 271 |
+
system="Du bist ein deutschsprachiges wortkarges KI-basiertes Assistenzsystem. Antworte kurz, in deutsche Sprache und verzichte auf HTML und Code jeder Art."
|
| 272 |
|
| 273 |
#RAG-layer 0: Intention-RAG
|
| 274 |
#---------------------------
|
|
|
|
| 364 |
rag, # RAG-component added to the system prompt
|
| 365 |
system2, # fictive first words of the AI (neither displayed nor stored)
|
| 366 |
historylimit=historylimit # number of past messages to consider for response to current message
|
| 367 |
+
removeHTML=removeHTML # remove HTML-components from History (to prevent bugs with Markdown)
|
| 368 |
)
|
| 369 |
+
#print("\n\nMESSAGE:"+str(message))
|
| 370 |
+
#print("\n\nHISTORY:"+str(history))
|
| 371 |
+
#print("\n\nSYSTEM:"+str(system))
|
| 372 |
+
#print("\n\nRAG:"+str(rag))
|
| 373 |
+
#print("\n\nSYSTEM2:"+str(system2))
|
| 374 |
print("\n\n*** Prompt:\n"+prompt+"\n***\n\n")
|
| 375 |
|
| 376 |
## Request response from model
|
|
|
|
| 399 |
part=text.token.text
|
| 400 |
#print(part, end="", flush=True)
|
| 401 |
response += part
|
| 402 |
+
if removeHTML==True: response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
|
| 403 |
yield response
|
| 404 |
if((myType=="1a")): #add RAG-results to chat-output if appropriate
|
| 405 |
+
response=response+"\n\n<details><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
|
| 406 |
+
yield response
|
| 407 |
history.append((message, response)) # add current dialog to history
|
| 408 |
+
# Store current state in DB if memory is turned on
|
| 409 |
+
if (settings=="Memory On"):
|
| 410 |
x=collection.get(include=[])["ids"] # add current dialog to db
|
| 411 |
collection.add(
|
| 412 |
documents=[message,response],
|
|
|
|
| 422 |
# url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
|
| 423 |
url="http://0.0.0.0:2600/v1/completions"
|
| 424 |
body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"} # e.g. Mixtral-Instruct
|
| 425 |
+
if("Discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]}) # fix stop-token of DiscoLM
|
| 426 |
+
if("Gemma-" in modelPath): body.update({"stop": ["<|im_end|>","</end_of_turn>"]}) # fix stop-token of Gemma
|
| 427 |
response="" #+"("+myType+")\n"
|
| 428 |
buffer=""
|
| 429 |
#print("URL: "+url)
|
|
|
|
| 450 |
except Exception as e:
|
| 451 |
print("Exception:"+str(e))
|
| 452 |
pass
|
| 453 |
+
if removeHTML==True: response = re.sub("<(.*?)>","\n", response) # remove HTML-components in general (may cause bugs with markdown-rendering)
|
| 454 |
yield response
|
| 455 |
if((myType=="1a")): #add RAG-results to chat-output if appropriate
|
| 456 |
+
response=response+"\n\n<details><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
|
| 457 |
+
yield response
|
| 458 |
+
# Store current state in DB if memory is turned on
|
| 459 |
+
if (settings=="Memory On"):
|
|
|
|
| 460 |
x=collection.get(include=[])["ids"] # add current dialog to db
|
| 461 |
collection.add(
|
| 462 |
documents=[message,response],
|
|
|
|
| 471 |
|
| 472 |
gr.ChatInterface(
|
| 473 |
response,
|
| 474 |
+
chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin ein KI-basiertes Assistenzsystem, das für jede Anfrage die am besten geeigneten KI-Tools empfiehlt.\nAktuell bin ich wenig mehr als eine Tech-Demo und kenne nur 7 KI-Modelle - also sei bitte nicht zu streng mit mir.<ul><li>Wenn du ein KI-Modell suchst, antworte ich auf Basis der Liste</li><li>Wenn du Fragen zur Benutzung eines KI-Modells hast, verweise ich an andere Stellen</li><li>Wenn du andre Fragen hast, antworte ich frei und berücksichtige dabei Relevantes aus dem gesamten bisherigen Dialog.</li></ul>\nWas ist dein Anliegen?"]],render_markdown=True),
|
| 475 |
+
title="German AI-Interface with advanced RAG (on prem)" if onPrem else "German AI-Interface with advanced RAG (HFHub)",
|
| 476 |
+
#additional_inputs=[gr.Dropdown(["Memory On","Memory Off"],value="Memory Off",label="Memory")]
|
| 477 |
).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
|
| 478 |
print("Interface up and running!")
|
| 479 |
|
| 480 |
+
|
| 481 |
+
|