Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -109,12 +109,11 @@ MODEL_CONTEXT_SIZES = {
|
|
| 109 |
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
| 110 |
"microsoft/phi-3-mini-4k-instruct": 4096,
|
| 111 |
"microsoft/Phi-3.5-mini-instruct": 4096,
|
| 112 |
-
"microsoft/Phi-3-mini-128k-instruct": 131072,
|
| 113 |
"HuggingFaceH4/zephyr-7b-beta": 8192,
|
| 114 |
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
|
| 115 |
"google/gemma-2-2b-it": 2048,
|
| 116 |
"microsoft/phi-2": 2048,
|
| 117 |
-
# Add other model contexts here
|
| 118 |
}
|
| 119 |
}
|
| 120 |
|
|
@@ -522,14 +521,28 @@ def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model
|
|
| 522 |
elif model_selection == "GLHF API":
|
| 523 |
if not glhf_api_key:
|
| 524 |
return "Error: GLHF API key required", None
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
hf_custom_model if hf_model_choice ==
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 533 |
|
| 534 |
else:
|
| 535 |
return "Error: Invalid model selection", None
|
|
@@ -583,7 +596,7 @@ def send_to_hf_inference(prompt: str, model_name: str, api_key: str = None, use_
|
|
| 583 |
|
| 584 |
def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model: str,
|
| 585 |
api_key: str, use_rate_limit: bool = False) -> str:
|
| 586 |
-
"""Send prompt to GLHF API with model selection
|
| 587 |
def _send():
|
| 588 |
try:
|
| 589 |
import openai
|
|
@@ -594,42 +607,22 @@ def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model:
|
|
| 594 |
|
| 595 |
model_id = f"hf:{model_name if use_hf_model else custom_model}"
|
| 596 |
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
)
|
| 607 |
-
return completion.choices[0].message.content
|
| 608 |
-
except Exception as non_stream_error:
|
| 609 |
-
logging.warning(f"Non-streaming GLHF failed, trying streaming: {non_stream_error}")
|
| 610 |
-
|
| 611 |
-
# Fallback to streaming if needed
|
| 612 |
-
completion = client.chat.completions.create(
|
| 613 |
-
stream=True,
|
| 614 |
-
model=model_id,
|
| 615 |
-
messages=[
|
| 616 |
-
{"role": "system", "content": "You are a helpful assistant."},
|
| 617 |
-
{"role": "user", "content": prompt}
|
| 618 |
-
],
|
| 619 |
-
)
|
| 620 |
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
response_text.append(chunk.choices[0].delta.content)
|
| 626 |
-
except Exception as stream_error:
|
| 627 |
-
if response_text: # If we got partial response, return it
|
| 628 |
-
logging.warning(f"Streaming interrupted but got partial response: {stream_error}")
|
| 629 |
-
return "".join(response_text)
|
| 630 |
-
raise # Re-raise if we got nothing
|
| 631 |
|
| 632 |
-
|
| 633 |
|
| 634 |
except Exception as e:
|
| 635 |
logging.error(f"GLHF API error: {e}")
|
|
@@ -702,41 +695,27 @@ def send_to_cohere(prompt: str, api_key: str = None, model: str = None, use_rate
|
|
| 702 |
|
| 703 |
return apply_rate_limit(_send, 16) if use_rate_limit else _send()
|
| 704 |
|
| 705 |
-
def
|
| 706 |
-
|
| 707 |
-
"""Send prompt to GLHF API with model selection."""
|
| 708 |
def _send():
|
| 709 |
try:
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
stream=True,
|
| 721 |
-
model=model_id,
|
| 722 |
-
messages=[
|
| 723 |
-
{"role": "system", "content": "You are a helpful assistant."},
|
| 724 |
-
{"role": "user", "content": prompt}
|
| 725 |
-
],
|
| 726 |
)
|
| 727 |
-
|
| 728 |
-
response_text = []
|
| 729 |
-
for chunk in completion:
|
| 730 |
-
if chunk.choices[0].delta.content is not None:
|
| 731 |
-
response_text.append(chunk.choices[0].delta.content)
|
| 732 |
-
|
| 733 |
-
return "".join(response_text)
|
| 734 |
-
|
| 735 |
except Exception as e:
|
| 736 |
-
logging.error(f"
|
| 737 |
-
return f"Error with
|
| 738 |
|
| 739 |
-
return apply_rate_limit(_send,
|
| 740 |
|
| 741 |
def estimate_tokens(text: str) -> int:
|
| 742 |
"""Rough token estimation: ~4 characters per token on average"""
|
|
@@ -1342,20 +1321,20 @@ with gr.Blocks(css="""
|
|
| 1342 |
send_to_model_btn.click(
|
| 1343 |
fn=send_to_model,
|
| 1344 |
inputs=[
|
| 1345 |
-
generated_prompt,
|
| 1346 |
-
model_choice,
|
| 1347 |
-
hf_model,
|
| 1348 |
-
hf_custom_model,
|
| 1349 |
-
hf_api_key,
|
| 1350 |
-
groq_model,
|
| 1351 |
-
groq_api_key,
|
| 1352 |
-
openai_api_key,
|
| 1353 |
-
openai_model,
|
| 1354 |
-
cohere_api_key,
|
| 1355 |
-
cohere_model,
|
| 1356 |
-
glhf_api_key,
|
| 1357 |
-
glhf_model,
|
| 1358 |
-
glhf_custom_model
|
| 1359 |
],
|
| 1360 |
outputs=[summary_output, download_summary]
|
| 1361 |
)
|
|
|
|
| 109 |
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
| 110 |
"microsoft/phi-3-mini-4k-instruct": 4096,
|
| 111 |
"microsoft/Phi-3.5-mini-instruct": 4096,
|
| 112 |
+
"microsoft/Phi-3-mini-128k-instruct": 131072,
|
| 113 |
"HuggingFaceH4/zephyr-7b-beta": 8192,
|
| 114 |
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
|
| 115 |
"google/gemma-2-2b-it": 2048,
|
| 116 |
"microsoft/phi-2": 2048,
|
|
|
|
| 117 |
}
|
| 118 |
}
|
| 119 |
|
|
|
|
| 521 |
elif model_selection == "GLHF API":
|
| 522 |
if not glhf_api_key:
|
| 523 |
return "Error: GLHF API key required", None
|
| 524 |
+
|
| 525 |
+
# Determine the actual model ID to use
|
| 526 |
+
if glhf_model == "Use HuggingFace Model":
|
| 527 |
+
model_id = f"hf:{hf_custom_model if hf_model_choice == 'Custom Model' else model_registry.hf_models[hf_model_choice]}"
|
| 528 |
+
else:
|
| 529 |
+
model_id = f"hf:{glhf_custom_model}"
|
| 530 |
+
|
| 531 |
+
summary = send_to_glhf(prompt, glhf_api_key, model_id, use_rate_limits)
|
| 532 |
+
|
| 533 |
+
if not summary:
|
| 534 |
+
return "Error: No response from model", None
|
| 535 |
+
|
| 536 |
+
if not isinstance(summary, str):
|
| 537 |
+
return "Error: Invalid response type from model", None
|
| 538 |
+
|
| 539 |
+
# Create download file for valid responses
|
| 540 |
+
if not summary.startswith("Error"):
|
| 541 |
+
with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as f:
|
| 542 |
+
f.write(summary)
|
| 543 |
+
return summary, f.name
|
| 544 |
+
|
| 545 |
+
return summary, None
|
| 546 |
|
| 547 |
else:
|
| 548 |
return "Error: Invalid model selection", None
|
|
|
|
| 596 |
|
| 597 |
def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model: str,
|
| 598 |
api_key: str, use_rate_limit: bool = False) -> str:
|
| 599 |
+
"""Send prompt to GLHF API with model selection."""
|
| 600 |
def _send():
|
| 601 |
try:
|
| 602 |
import openai
|
|
|
|
| 607 |
|
| 608 |
model_id = f"hf:{model_name if use_hf_model else custom_model}"
|
| 609 |
|
| 610 |
+
# For GLHF, always use streaming for reliability
|
| 611 |
+
completion = client.chat.completions.create(
|
| 612 |
+
stream=True,
|
| 613 |
+
model=model_id,
|
| 614 |
+
messages=[
|
| 615 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
| 616 |
+
{"role": "user", "content": prompt}
|
| 617 |
+
],
|
| 618 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 619 |
|
| 620 |
+
response_text = []
|
| 621 |
+
for chunk in completion:
|
| 622 |
+
if chunk.choices[0].delta.content is not None:
|
| 623 |
+
response_text.append(chunk.choices[0].delta.content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 624 |
|
| 625 |
+
return "".join(response_text)
|
| 626 |
|
| 627 |
except Exception as e:
|
| 628 |
logging.error(f"GLHF API error: {e}")
|
|
|
|
| 695 |
|
| 696 |
return apply_rate_limit(_send, 16) if use_rate_limit else _send()
|
| 697 |
|
| 698 |
+
def send_to_groq(prompt: str, model_name: str, api_key: str, use_rate_limit: bool = False) -> str:
|
| 699 |
+
"""Send prompt to Groq API."""
|
|
|
|
| 700 |
def _send():
|
| 701 |
try:
|
| 702 |
+
client = Groq(api_key=api_key)
|
| 703 |
+
response = client.chat.completions.create(
|
| 704 |
+
model=model_name,
|
| 705 |
+
messages=[{
|
| 706 |
+
"role": "user",
|
| 707 |
+
"content": prompt
|
| 708 |
+
}],
|
| 709 |
+
temperature=0.7,
|
| 710 |
+
max_tokens=500,
|
| 711 |
+
top_p=0.95
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 712 |
)
|
| 713 |
+
return response.choices[0].message.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 714 |
except Exception as e:
|
| 715 |
+
logging.error(f"Groq API error: {e}")
|
| 716 |
+
return f"Error with Groq API: {str(e)}"
|
| 717 |
|
| 718 |
+
return apply_rate_limit(_send, 4) if use_rate_limit else _send()
|
| 719 |
|
| 720 |
def estimate_tokens(text: str) -> int:
|
| 721 |
"""Rough token estimation: ~4 characters per token on average"""
|
|
|
|
| 1321 |
send_to_model_btn.click(
|
| 1322 |
fn=send_to_model,
|
| 1323 |
inputs=[
|
| 1324 |
+
generated_prompt, # prompt
|
| 1325 |
+
model_choice, # model_selection
|
| 1326 |
+
hf_model, # hf_model_choice
|
| 1327 |
+
hf_custom_model, # hf_custom_model
|
| 1328 |
+
hf_api_key, # hf_api_key
|
| 1329 |
+
groq_model, # groq_model_choice
|
| 1330 |
+
groq_api_key, # groq_api_key
|
| 1331 |
+
openai_api_key, # openai_api_key
|
| 1332 |
+
openai_model, # openai_model_choice
|
| 1333 |
+
cohere_api_key, # cohere_api_key
|
| 1334 |
+
cohere_model, # cohere_model
|
| 1335 |
+
glhf_api_key, # glhf_api_key
|
| 1336 |
+
glhf_model, # glhf_model
|
| 1337 |
+
glhf_custom_model # glhf_custom_model
|
| 1338 |
],
|
| 1339 |
outputs=[summary_output, download_summary]
|
| 1340 |
)
|