Update app.py
Browse files
app.py
CHANGED
|
@@ -28,7 +28,7 @@ def respond(
|
|
| 28 |
temperature,
|
| 29 |
top_p,
|
| 30 |
):
|
| 31 |
-
|
| 32 |
chat_template = '<s>[INST] ' + system_message
|
| 33 |
# for human, assistant in history:
|
| 34 |
# chat_template += human + ' [/INST] ' + assistant + '</s>[INST]'
|
|
@@ -54,11 +54,17 @@ def respond(
|
|
| 54 |
completion_to_prompt=completion_to_prompt,
|
| 55 |
verbose=True,
|
| 56 |
)
|
| 57 |
-
response = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
for chunk in llm.stream_complete(message):
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
|
|
|
| 62 |
|
| 63 |
demo = gr.ChatInterface(
|
| 64 |
respond,
|
|
|
|
| 28 |
temperature,
|
| 29 |
top_p,
|
| 30 |
):
|
| 31 |
+
stop_tokens = ["</s>", "[INST]", "[INST] ", "<s>", "[/INST]", "[/INST] "]
|
| 32 |
chat_template = '<s>[INST] ' + system_message
|
| 33 |
# for human, assistant in history:
|
| 34 |
# chat_template += human + ' [/INST] ' + assistant + '</s>[INST]'
|
|
|
|
| 54 |
completion_to_prompt=completion_to_prompt,
|
| 55 |
verbose=True,
|
| 56 |
)
|
| 57 |
+
# response = ""
|
| 58 |
+
# for chunk in llm.stream_complete(message):
|
| 59 |
+
# print(chunk.delta, end="", flush=True)
|
| 60 |
+
# response += str(chunk.delta)
|
| 61 |
+
# yield response
|
| 62 |
+
outputs = []
|
| 63 |
for chunk in llm.stream_complete(message):
|
| 64 |
+
outputs.append(chunk.delta)
|
| 65 |
+
if chunk.delta in stop_tokens:
|
| 66 |
+
break
|
| 67 |
+
yield "".join(outputs)
|
| 68 |
|
| 69 |
demo = gr.ChatInterface(
|
| 70 |
respond,
|