Spaces:

chrispie
/

llama-hqq-1-bit

Runtime error

chrispie commited on Mar 30, 2024

Commit

d676cb8

verified ·

1 Parent(s): 6c3e1ec

fixed gradio error on completed stream

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
 import torch, transformers
 from threading import Thread
 #Load the model
 model_id = 'mobiuslabsgmbh/Llama-2-7b-chat-hf_1bitgs8_hqq'
@@ -53,9 +54,11 @@ def chat(message, history):
     t, stream = chat_processor(chat=message)
     response = ""
     for character in stream:
-        response += character
-        yield response
     t.join()
     torch.cuda.empty_cache()

 from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
 import torch, transformers
 from threading import Thread
+import time
 #Load the model
 model_id = 'mobiuslabsgmbh/Llama-2-7b-chat-hf_1bitgs8_hqq'
     t, stream = chat_processor(chat=message)
     response = ""
     for character in stream:
+        if character is not None:
+            response += character
+            # print(character)
+            yield response
+    time.sleep(0.1)
     t.join()
     torch.cuda.empty_cache()