Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,19 +22,19 @@ def generate(prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, t
|
|
| 22 |
print("\nPROMPT: \n\t" + formatted_prompt)
|
| 23 |
|
| 24 |
# Generate text from the HF inference
|
| 25 |
-
|
| 26 |
-
output = ""
|
| 27 |
|
| 28 |
-
for response in stream:
|
| 29 |
-
|
| 30 |
-
|
| 31 |
return output
|
| 32 |
|
| 33 |
|
| 34 |
|
| 35 |
additional_inputs=[
|
| 36 |
gr.Slider( label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs", ),
|
| 37 |
-
gr.Slider( label="Max new tokens", value=
|
| 38 |
gr.Slider( label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens", ),
|
| 39 |
gr.Slider( label="Top-k", value=50, minimum=0, maximum=100, step=1, interactive=True, info="Limits the number of top-k tokens considered at each step"),
|
| 40 |
]
|
|
|
|
| 22 |
print("\nPROMPT: \n\t" + formatted_prompt)
|
| 23 |
|
| 24 |
# Generate text from the HF inference
|
| 25 |
+
output = client.text_generation(formatted_prompt, **generate_kwargs, details=True, return_full_text=True)
|
| 26 |
+
#output = ""
|
| 27 |
|
| 28 |
+
#for response in stream:
|
| 29 |
+
# output += response.token.text
|
| 30 |
+
# yield output
|
| 31 |
return output
|
| 32 |
|
| 33 |
|
| 34 |
|
| 35 |
additional_inputs=[
|
| 36 |
gr.Slider( label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs", ),
|
| 37 |
+
gr.Slider( label="Max new tokens", value=150, minimum=0, maximum=250, step=64, interactive=True, info="The maximum numbers of new tokens", ),
|
| 38 |
gr.Slider( label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens", ),
|
| 39 |
gr.Slider( label="Top-k", value=50, minimum=0, maximum=100, step=1, interactive=True, info="Limits the number of top-k tokens considered at each step"),
|
| 40 |
]
|