llama-cpp-agent

Paused

pabloce commited on May 20, 2024

Commit

63c66b0

verified ·

1 Parent(s): 00a2173

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -28,7 +28,7 @@ def respond(
     temperature,
     top_p,
 ):
-    # stop_tokens = ["</s>", "[INST]", "[INST] ", "<s>", "[/INST]", "[/INST] "]
     chat_template = '<s>[INST] ' + system_message
     # for human, assistant in history:
     #     chat_template += human + ' [/INST] ' + assistant + '</s>[INST]'
@@ -54,11 +54,17 @@ def respond(
         completion_to_prompt=completion_to_prompt,
         verbose=True,
     )
-    response = ""
     for chunk in llm.stream_complete(message):
-        print(chunk.delta, end="", flush=True)
-        response += str(chunk.delta)
-        yield response
 demo = gr.ChatInterface(
     respond,

     temperature,
     top_p,
 ):
+    stop_tokens = ["</s>", "[INST]", "[INST] ", "<s>", "[/INST]", "[/INST] "]
     chat_template = '<s>[INST] ' + system_message
     # for human, assistant in history:
     #     chat_template += human + ' [/INST] ' + assistant + '</s>[INST]'
         completion_to_prompt=completion_to_prompt,
         verbose=True,
     )
+    # response = ""
+    # for chunk in llm.stream_complete(message):
+    #     print(chunk.delta, end="", flush=True)
+    #     response += str(chunk.delta)
+    #     yield response
+    outputs = []
     for chunk in llm.stream_complete(message):
+        outputs.append(chunk.delta)
+        if chunk.delta in stop_tokens:
+            break
+        yield "".join(outputs)
 demo = gr.ChatInterface(
     respond,