stream
Browse files
app.py
CHANGED
|
@@ -6,14 +6,18 @@ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
|
|
| 6 |
|
| 7 |
|
| 8 |
def generate_text(messages):
|
|
|
|
|
|
|
| 9 |
generated = ""
|
| 10 |
for token in client.chat_completion(messages, max_tokens=50,stream=True):
|
| 11 |
content = (token.choices[0].delta.content)
|
| 12 |
-
generated+=content
|
| 13 |
-
#
|
|
|
|
|
|
|
| 14 |
#print(token.choices[0].delta)
|
| 15 |
|
| 16 |
-
return generated+"." #no stram version
|
| 17 |
|
| 18 |
def call_generate_text(message, history):
|
| 19 |
#if len(message) == 0:
|
|
@@ -25,14 +29,19 @@ def call_generate_text(message, history):
|
|
| 25 |
user_message = [{"role":"user","content":message}]
|
| 26 |
messages = history + user_message
|
| 27 |
try:
|
| 28 |
-
|
| 29 |
-
assistant_message=
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
except RuntimeError as e:
|
| 33 |
print(f"An unexpected error occurred: {e}")
|
| 34 |
-
|
| 35 |
-
return "",history
|
| 36 |
|
| 37 |
head = '''
|
| 38 |
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
|
|
@@ -131,8 +140,8 @@ window.reset_tts_text = reset_tts_text
|
|
| 131 |
</script>
|
| 132 |
'''
|
| 133 |
|
| 134 |
-
with gr.Blocks(title="LLM with TTS",head=head) as demo:
|
| 135 |
-
gr.Markdown("## LLM is unstable:The inference client used in this demo exhibits inconsistent performance. While it can provide responses in milliseconds, it sometimes becomes unresponsive and times out.")
|
| 136 |
gr.Markdown("## TTS talke a long loading time:Please be patient, the first response may have a delay of up to over 20 seconds while loading.")
|
| 137 |
gr.Markdown("**Mistral-7B-Instruct-v0.3/LJSpeech**.LLM and TTS models will change without notice.")
|
| 138 |
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def generate_text(messages):
|
| 9 |
+
print("generate_text")
|
| 10 |
+
print(messages)
|
| 11 |
generated = ""
|
| 12 |
for token in client.chat_completion(messages, max_tokens=50,stream=True):
|
| 13 |
content = (token.choices[0].delta.content)
|
| 14 |
+
generated += content
|
| 15 |
+
#print(content)
|
| 16 |
+
#print(''.join(list(content)))
|
| 17 |
+
yield generated
|
| 18 |
#print(token.choices[0].delta)
|
| 19 |
|
| 20 |
+
#return generated+"." #no stram version
|
| 21 |
|
| 22 |
def call_generate_text(message, history):
|
| 23 |
#if len(message) == 0:
|
|
|
|
| 29 |
user_message = [{"role":"user","content":message}]
|
| 30 |
messages = history + user_message
|
| 31 |
try:
|
| 32 |
+
|
| 33 |
+
assistant_message={"role":"assistant","content":""}
|
| 34 |
+
text_generator = generate_text(messages)
|
| 35 |
+
|
| 36 |
+
for text_chunk in text_generator:
|
| 37 |
+
print(f"chunk={text_chunk}")
|
| 38 |
+
assistant_message["content"] = text_chunk
|
| 39 |
+
updated_history = messages + [assistant_message]
|
| 40 |
+
yield "", updated_history
|
| 41 |
+
|
| 42 |
except RuntimeError as e:
|
| 43 |
print(f"An unexpected error occurred: {e}")
|
| 44 |
+
yield "",history
|
|
|
|
| 45 |
|
| 46 |
head = '''
|
| 47 |
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
|
|
|
|
| 140 |
</script>
|
| 141 |
'''
|
| 142 |
|
| 143 |
+
with gr.Blocks(title="LLM with TTS",head=head,js="scroll.js") as demo:
|
| 144 |
+
gr.Markdown("## A LLM is unstable:The inference client used in this demo exhibits inconsistent performance. While it can provide responses in milliseconds, it sometimes becomes unresponsive and times out.")
|
| 145 |
gr.Markdown("## TTS talke a long loading time:Please be patient, the first response may have a delay of up to over 20 seconds while loading.")
|
| 146 |
gr.Markdown("**Mistral-7B-Instruct-v0.3/LJSpeech**.LLM and TTS models will change without notice.")
|
| 147 |
|
scroll.js
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// see https://github.com/gradio-app/gradio/issues/8253
|
| 2 |
+
// UNKNOWN LICENSE
|
| 3 |
+
|
| 4 |
+
function Scrolldown() {
|
| 5 |
+
let targetNode = document.querySelector('[aria-label="chatbot conversation"]')
|
| 6 |
+
// Options for the observer (which mutations to observe)
|
| 7 |
+
const config = { attributes: true, childList: true, subtree: true };
|
| 8 |
+
|
| 9 |
+
// Callback function to execute when mutations are observed
|
| 10 |
+
const callback = (mutationList, observer) => {
|
| 11 |
+
targetNode.scrollTop = targetNode.scrollHeight;
|
| 12 |
+
};
|
| 13 |
+
|
| 14 |
+
// Create an observer instance linked to the callback function
|
| 15 |
+
const observer = new MutationObserver(callback);
|
| 16 |
+
|
| 17 |
+
// Start observing the target node for configured mutations
|
| 18 |
+
observer.observe(targetNode, config);
|
| 19 |
+
}
|