Spaces:
Running
Running
use hf inference provider for new qwen model
Browse files
app.py
CHANGED
|
@@ -381,8 +381,8 @@ AVAILABLE_MODELS = [
|
|
| 381 |
},
|
| 382 |
{
|
| 383 |
"name": "Qwen3-235B-A22B-Thinking",
|
| 384 |
-
"id": "
|
| 385 |
-
"description": "Qwen3-235B-A22B-Thinking model with advanced reasoning capabilities
|
| 386 |
}
|
| 387 |
]
|
| 388 |
|
|
@@ -456,23 +456,14 @@ if not HF_TOKEN:
|
|
| 456 |
|
| 457 |
def get_inference_client(model_id, provider="auto"):
|
| 458 |
"""Return an InferenceClient with provider based on model_id and user selection."""
|
| 459 |
-
# Special case for Dashscope Qwen thinking model
|
| 460 |
-
if model_id == "qwen3-235b-a22b-thinking-2507":
|
| 461 |
-
dashscope_api_key = os.getenv("DASHSCOPE_API_KEY")
|
| 462 |
-
if not dashscope_api_key:
|
| 463 |
-
raise RuntimeError("DASHSCOPE_API_KEY environment variable is not set. Please set it to your Dashscope API key.")
|
| 464 |
-
return OpenAI(
|
| 465 |
-
api_key=dashscope_api_key,
|
| 466 |
-
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
|
| 467 |
-
)
|
| 468 |
-
|
| 469 |
-
# Handle other models with HuggingFace InferenceClient
|
| 470 |
if model_id == "moonshotai/Kimi-K2-Instruct":
|
| 471 |
provider = "groq"
|
| 472 |
elif model_id == "Qwen/Qwen3-235B-A22B":
|
| 473 |
provider = "cerebras"
|
| 474 |
elif model_id == "Qwen/Qwen3-32B":
|
| 475 |
provider = "cerebras"
|
|
|
|
|
|
|
| 476 |
return InferenceClient(
|
| 477 |
provider=provider,
|
| 478 |
api_key=HF_TOKEN,
|
|
@@ -1456,19 +1447,12 @@ This will help me create a better design for you."""
|
|
| 1456 |
else:
|
| 1457 |
messages.append({'role': 'user', 'content': enhanced_query})
|
| 1458 |
try:
|
| 1459 |
-
|
| 1460 |
-
|
| 1461 |
-
|
| 1462 |
-
|
| 1463 |
-
|
| 1464 |
-
|
| 1465 |
-
}
|
| 1466 |
-
|
| 1467 |
-
# Add stream_options for Dashscope models for better streaming performance
|
| 1468 |
-
if _current_model["id"] == "qwen3-235b-a22b-thinking-2507":
|
| 1469 |
-
completion_params["stream_options"] = {"include_usage": True}
|
| 1470 |
-
|
| 1471 |
-
completion = client.chat.completions.create(**completion_params)
|
| 1472 |
content = ""
|
| 1473 |
for chunk in completion:
|
| 1474 |
# Only process if chunk.choices is non-empty
|
|
|
|
| 381 |
},
|
| 382 |
{
|
| 383 |
"name": "Qwen3-235B-A22B-Thinking",
|
| 384 |
+
"id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
| 385 |
+
"description": "Qwen3-235B-A22B-Thinking model with advanced reasoning capabilities"
|
| 386 |
}
|
| 387 |
]
|
| 388 |
|
|
|
|
| 456 |
|
| 457 |
def get_inference_client(model_id, provider="auto"):
|
| 458 |
"""Return an InferenceClient with provider based on model_id and user selection."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
if model_id == "moonshotai/Kimi-K2-Instruct":
|
| 460 |
provider = "groq"
|
| 461 |
elif model_id == "Qwen/Qwen3-235B-A22B":
|
| 462 |
provider = "cerebras"
|
| 463 |
elif model_id == "Qwen/Qwen3-32B":
|
| 464 |
provider = "cerebras"
|
| 465 |
+
elif model_id == "Qwen/Qwen3-235B-A22B-Thinking-2507":
|
| 466 |
+
provider = "auto" # Let HuggingFace handle provider selection
|
| 467 |
return InferenceClient(
|
| 468 |
provider=provider,
|
| 469 |
api_key=HF_TOKEN,
|
|
|
|
| 1447 |
else:
|
| 1448 |
messages.append({'role': 'user', 'content': enhanced_query})
|
| 1449 |
try:
|
| 1450 |
+
completion = client.chat.completions.create(
|
| 1451 |
+
model=_current_model["id"],
|
| 1452 |
+
messages=messages,
|
| 1453 |
+
stream=True,
|
| 1454 |
+
max_tokens=10000
|
| 1455 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1456 |
content = ""
|
| 1457 |
for chunk in completion:
|
| 1458 |
# Only process if chunk.choices is non-empty
|