Rm queue and move launch
Browse files
app.py
CHANGED
|
@@ -55,7 +55,6 @@ def generate_text(message, history):
|
|
| 55 |
|
| 56 |
history = ["init", input_prompt]
|
| 57 |
|
| 58 |
-
|
| 59 |
demo = gr.ChatInterface(
|
| 60 |
generate_text,
|
| 61 |
title="llama-cpp-python on GPU",
|
|
@@ -65,6 +64,21 @@ demo = gr.ChatInterface(
|
|
| 65 |
retry_btn=None,
|
| 66 |
undo_btn="Delete Previous",
|
| 67 |
clear_btn="Clear",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
)
|
| 69 |
-
|
| 70 |
-
demo.
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
history = ["init", input_prompt]
|
| 57 |
|
|
|
|
| 58 |
demo = gr.ChatInterface(
|
| 59 |
generate_text,
|
| 60 |
title="llama-cpp-python on GPU",
|
|
|
|
| 64 |
retry_btn=None,
|
| 65 |
undo_btn="Delete Previous",
|
| 66 |
clear_btn="Clear",
|
| 67 |
+
additional_inputs=[
|
| 68 |
+
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
| 69 |
+
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
| 70 |
+
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
| 71 |
+
gr.Slider(
|
| 72 |
+
minimum=0.1,
|
| 73 |
+
maximum=1.0,
|
| 74 |
+
value=0.95,
|
| 75 |
+
step=0.05,
|
| 76 |
+
label="Top-p (nucleus sampling)",
|
| 77 |
+
),
|
| 78 |
+
],
|
| 79 |
)
|
| 80 |
+
|
| 81 |
+
#demo.queue(concurrency_count=1, max_size=5)?
|
| 82 |
+
|
| 83 |
+
if __name__ == "__main__":
|
| 84 |
+
demo.launch()
|