Spaces:
Running
Running
Update local tool definitions to match mcp format & improve sys prompt
Browse files- .gitattributes +1 -0
- .gitignore +2 -1
- app.py +50 -41
- mcp_host/agent.py +110 -72
- mcp_host/tts/gradio_api_tts.py +1 -1
- mcp_host/tts/hf_zero_gpu_tts.py +1 -0
- mcp_host/ui.py +1 -1
- static/welcome-to-vibe-shopping.webp +0 -0
.gitattributes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
__pycache__
|
| 2 |
.DS_Store
|
| 3 |
.env
|
| 4 |
-
.gradio
|
|
|
|
|
|
| 1 |
__pycache__
|
| 2 |
.DS_Store
|
| 3 |
.env
|
| 4 |
+
.gradio
|
| 5 |
+
*.webp
|
app.py
CHANGED
|
@@ -78,32 +78,36 @@ async def handle_audio_stream(
|
|
| 78 |
image_with_mask: dict | None = None,
|
| 79 |
gradio_client: Client | None = None,
|
| 80 |
):
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
|
| 109 |
async def set_client_for_session(request: gr.Request):
|
|
@@ -115,6 +119,7 @@ async def set_client_for_session(request: gr.Request):
|
|
| 115 |
raise gr.Error(
|
| 116 |
f"Inference server is not available. Status code: {health_check_response.status}"
|
| 117 |
)
|
|
|
|
| 118 |
if not vibe_shopping_agent.clients_connected:
|
| 119 |
await vibe_shopping_agent.connect_clients()
|
| 120 |
|
|
@@ -128,17 +133,17 @@ async def set_client_for_session(request: gr.Request):
|
|
| 128 |
|
| 129 |
x_ip_token = request.headers["x-ip-token"]
|
| 130 |
|
| 131 |
-
return Client("sitatech/Kokoro-TTS", headers={"X-IP-Token": x_ip_token}), Modal(
|
|
|
|
|
|
|
| 132 |
|
| 133 |
|
| 134 |
-
with gr.Blocks(
|
|
|
|
|
|
|
|
|
|
| 135 |
gradio_client = gr.State()
|
| 136 |
|
| 137 |
-
with Modal(visible=True) as modal:
|
| 138 |
-
ColdBootUI()
|
| 139 |
-
|
| 140 |
-
vibe_shopping_app.load(set_client_for_session, None, [gradio_client, modal])
|
| 141 |
-
|
| 142 |
debuging_options = {
|
| 143 |
"Echo user speech": "debug_echo_user_speech",
|
| 144 |
"USE HF ZeroGPU STT": "debug_use_hf_zero_gpu_stt",
|
|
@@ -147,7 +152,7 @@ with gr.Blocks(theme=gr.themes.Ocean()) as vibe_shopping_app:
|
|
| 147 |
chat_history = gr.State(value=[])
|
| 148 |
displayed_products = gr.State(value=[])
|
| 149 |
displayed_image = gr.State(value=None)
|
| 150 |
-
with gr.Column():
|
| 151 |
voice = gr.Dropdown(
|
| 152 |
label="Language & Voice",
|
| 153 |
choices=list(VOICES.items()) + list(debuging_options.items()),
|
|
@@ -164,14 +169,14 @@ with gr.Blocks(theme=gr.themes.Ocean()) as vibe_shopping_app:
|
|
| 164 |
mode="send-receive",
|
| 165 |
modality="audio",
|
| 166 |
button_labels={"start": "Start Vibe Shopping"},
|
| 167 |
-
rtc_configuration=
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
server_rtc_configuration=
|
| 171 |
-
|
| 172 |
-
|
| 173 |
scale=0,
|
| 174 |
-
time_limit=
|
| 175 |
)
|
| 176 |
with gr.Accordion(open=False, label="Input Image"):
|
| 177 |
gr.Markdown(
|
|
@@ -203,4 +208,8 @@ with gr.Blocks(theme=gr.themes.Ocean()) as vibe_shopping_app:
|
|
| 203 |
show_progress="hidden",
|
| 204 |
)
|
| 205 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
vibe_shopping_app.launch()
|
|
|
|
| 78 |
image_with_mask: dict | None = None,
|
| 79 |
gradio_client: Client | None = None,
|
| 80 |
):
|
| 81 |
+
try:
|
| 82 |
+
image, mask = handle_image_upload(image_with_mask)
|
| 83 |
+
|
| 84 |
+
def update_ui(products, image, clear_ui):
|
| 85 |
+
nonlocal displayed_products, displayed_image
|
| 86 |
+
if clear_ui:
|
| 87 |
+
displayed_products = None
|
| 88 |
+
displayed_image = None
|
| 89 |
+
else:
|
| 90 |
+
displayed_products = products
|
| 91 |
+
displayed_image = image
|
| 92 |
+
|
| 93 |
+
async for ai_speech in vibe_shopping_agent.chat(
|
| 94 |
+
user_speech=audio,
|
| 95 |
+
chat_history=chat_history,
|
| 96 |
+
voice=voice,
|
| 97 |
+
update_ui=update_ui,
|
| 98 |
+
input_image=image,
|
| 99 |
+
input_mask=mask,
|
| 100 |
+
gradio_client=gradio_client,
|
| 101 |
+
):
|
| 102 |
+
# Yield the audio chunk to the WebRTC stream
|
| 103 |
+
yield ai_speech
|
| 104 |
+
|
| 105 |
+
yield AdditionalOutputs(
|
| 106 |
+
chat_history, displayed_products, displayed_image, None
|
| 107 |
+
) # None for resetting the input_image state
|
| 108 |
+
except Exception as e:
|
| 109 |
+
print(f"Error in handle_audio_stream: {e}")
|
| 110 |
+
raise gr.Error(f"An error occurred: {e}")
|
| 111 |
|
| 112 |
|
| 113 |
async def set_client_for_session(request: gr.Request):
|
|
|
|
| 119 |
raise gr.Error(
|
| 120 |
f"Inference server is not available. Status code: {health_check_response.status}"
|
| 121 |
)
|
| 122 |
+
|
| 123 |
if not vibe_shopping_agent.clients_connected:
|
| 124 |
await vibe_shopping_agent.connect_clients()
|
| 125 |
|
|
|
|
| 133 |
|
| 134 |
x_ip_token = request.headers["x-ip-token"]
|
| 135 |
|
| 136 |
+
return Client("sitatech/Kokoro-TTS", headers={"X-IP-Token": x_ip_token}), Modal(
|
| 137 |
+
visible=False
|
| 138 |
+
)
|
| 139 |
|
| 140 |
|
| 141 |
+
with gr.Blocks(
|
| 142 |
+
theme=gr.themes.Ocean(),
|
| 143 |
+
css="#main-container { max-width: 1200px; margin: 0 auto; }",
|
| 144 |
+
) as vibe_shopping_app:
|
| 145 |
gradio_client = gr.State()
|
| 146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
debuging_options = {
|
| 148 |
"Echo user speech": "debug_echo_user_speech",
|
| 149 |
"USE HF ZeroGPU STT": "debug_use_hf_zero_gpu_stt",
|
|
|
|
| 152 |
chat_history = gr.State(value=[])
|
| 153 |
displayed_products = gr.State(value=[])
|
| 154 |
displayed_image = gr.State(value=None)
|
| 155 |
+
with gr.Column(elem_id="main-container"):
|
| 156 |
voice = gr.Dropdown(
|
| 157 |
label="Language & Voice",
|
| 158 |
choices=list(VOICES.items()) + list(debuging_options.items()),
|
|
|
|
| 169 |
mode="send-receive",
|
| 170 |
modality="audio",
|
| 171 |
button_labels={"start": "Start Vibe Shopping"},
|
| 172 |
+
rtc_configuration=(
|
| 173 |
+
get_cloudflare_turn_credentials_async if not IS_LOCAL else None
|
| 174 |
+
),
|
| 175 |
+
server_rtc_configuration=(
|
| 176 |
+
get_cloudflare_turn_credentials(ttl=360_000) if not IS_LOCAL else None
|
| 177 |
+
),
|
| 178 |
scale=0,
|
| 179 |
+
time_limit=3600,
|
| 180 |
)
|
| 181 |
with gr.Accordion(open=False, label="Input Image"):
|
| 182 |
gr.Markdown(
|
|
|
|
| 208 |
show_progress="hidden",
|
| 209 |
)
|
| 210 |
|
| 211 |
+
with Modal(visible=True) as modal:
|
| 212 |
+
ColdBootUI()
|
| 213 |
+
|
| 214 |
+
vibe_shopping_app.load(set_client_for_session, None, [gradio_client, modal])
|
| 215 |
vibe_shopping_app.launch()
|
mcp_host/agent.py
CHANGED
|
@@ -48,12 +48,12 @@ Then, you can say what you think about the displayed item(s), tell how they fit
|
|
| 48 |
Always ask the user for confirmation before taking any action that requires payment or purchase.
|
| 49 |
If a function requires an input that you don't have based on your knowledge and the conversation history, you should ask the user for it. For example, if the user asks to try a product, but you don't have the target image, you should ask the user to provide it.
|
| 50 |
|
| 51 |
-
When calling a function, ALWAYS
|
| 52 |
-
|
| 53 |
-
Then when you get the response from the function, you say
|
| 54 |
|
| 55 |
The maximum number of products you can search at once is 10, don't exceed this limit.
|
| 56 |
-
Make sure to only output raw text. NEVER output
|
| 57 |
"""
|
| 58 |
|
| 59 |
def __init__(
|
|
@@ -84,7 +84,7 @@ Make sure to only output raw text. NEVER output formatted text, markdown or emoj
|
|
| 84 |
self.fewsats_client,
|
| 85 |
self.virtual_try_client,
|
| 86 |
]
|
| 87 |
-
self.display_tool =
|
| 88 |
self.image_uploader = image_uploader
|
| 89 |
self.clients_connected = False
|
| 90 |
|
|
@@ -98,10 +98,10 @@ Make sure to only output raw text. NEVER output formatted text, markdown or emoj
|
|
| 98 |
await self.virtual_try_client.connect_to_server("python", ["./mcp_server.py"])
|
| 99 |
|
| 100 |
self.tools = (
|
| 101 |
-
|
|
|
|
| 102 |
+ await self.fewsats_client.tools
|
| 103 |
+ await self.virtual_try_client.tools
|
| 104 |
-
+ [self.display_tool]
|
| 105 |
)
|
| 106 |
self.clients_connected = True
|
| 107 |
|
|
@@ -224,6 +224,7 @@ Make sure to only output raw text. NEVER output formatted text, markdown or emoj
|
|
| 224 |
messages=chat_history,
|
| 225 |
stream=True,
|
| 226 |
tools=self.tools,
|
|
|
|
| 227 |
)
|
| 228 |
pending_tool_calls: dict[int, ChoiceDeltaToolCall] = {}
|
| 229 |
|
|
@@ -282,48 +283,52 @@ Make sure to only output raw text. NEVER output formatted text, markdown or emoj
|
|
| 282 |
}
|
| 283 |
)
|
| 284 |
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
"role": "tool",
|
| 291 |
"tool_call_id": call_id,
|
| 292 |
-
"content":
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
}
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
)
|
| 305 |
-
tool_response: ChatCompletionToolMessageParam = {
|
| 306 |
-
"role": "tool",
|
| 307 |
-
"tool_call_id": call_id,
|
| 308 |
-
"content": "Content displayed successfully.",
|
| 309 |
-
}
|
| 310 |
else:
|
| 311 |
tool_response = await mcp_client.call_tool(
|
| 312 |
call_id=call_id,
|
| 313 |
tool_name=tool_name,
|
| 314 |
tool_args=json.loads(tool_args) if tool_args else None,
|
| 315 |
)
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
|
| 328 |
def _build_input_image_content(
|
| 329 |
self, input_image: Image.Image, image_label: str
|
|
@@ -348,43 +353,76 @@ Make sure to only output raw text. NEVER output formatted text, markdown or emoj
|
|
| 348 |
)
|
| 349 |
|
| 350 |
|
| 351 |
-
def
|
| 352 |
-
return
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
"
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
""",
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
"type": "object",
|
| 368 |
-
"properties": {
|
| 369 |
-
"name": {"type": "string"},
|
| 370 |
-
"image_url": {"type": "string"},
|
| 371 |
-
"price": {"type": "string"},
|
| 372 |
},
|
| 373 |
-
"
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
},
|
| 377 |
-
"image_url": {
|
| 378 |
-
"type": "string",
|
| 379 |
-
"description": "An optional URL of an image to display.",
|
| 380 |
},
|
| 381 |
-
"
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
},
|
| 388 |
},
|
| 389 |
},
|
| 390 |
-
|
|
|
|
| 48 |
Always ask the user for confirmation before taking any action that requires payment or purchase.
|
| 49 |
If a function requires an input that you don't have based on your knowledge and the conversation history, you should ask the user for it. For example, if the user asks to try a product, but you don't have the target image, you should ask the user to provide it.
|
| 50 |
|
| 51 |
+
When calling a function, ALWAYS let the user know what you are doing while they are waiting.
|
| 52 |
+
Something like: One moment, I will search for products matching your request \n<tool_call>\n<call-function-to-search-products>\n</tool_call>. \
|
| 53 |
+
Then when you get the response from the function, you can say Here are some products I found for you \n<tool_call>\n<call-function-to-display-products>\n</tool_call>.
|
| 54 |
|
| 55 |
The maximum number of products you can search at once is 10, don't exceed this limit.
|
| 56 |
+
Make sure to only output raw text. NEVER output markdown or emoji.
|
| 57 |
"""
|
| 58 |
|
| 59 |
def __init__(
|
|
|
|
| 84 |
self.fewsats_client,
|
| 85 |
self.virtual_try_client,
|
| 86 |
]
|
| 87 |
+
self.display_tool = _build_display_tool_definitions()
|
| 88 |
self.image_uploader = image_uploader
|
| 89 |
self.clients_connected = False
|
| 90 |
|
|
|
|
| 98 |
await self.virtual_try_client.connect_to_server("python", ["./mcp_server.py"])
|
| 99 |
|
| 100 |
self.tools = (
|
| 101 |
+
self.display_tool
|
| 102 |
+
+ await self.agora_client.tools
|
| 103 |
+ await self.fewsats_client.tools
|
| 104 |
+ await self.virtual_try_client.tools
|
|
|
|
| 105 |
)
|
| 106 |
self.clients_connected = True
|
| 107 |
|
|
|
|
| 224 |
messages=chat_history,
|
| 225 |
stream=True,
|
| 226 |
tools=self.tools,
|
| 227 |
+
temperature=0.7,
|
| 228 |
)
|
| 229 |
pending_tool_calls: dict[int, ChoiceDeltaToolCall] = {}
|
| 230 |
|
|
|
|
| 283 |
}
|
| 284 |
)
|
| 285 |
|
| 286 |
+
try:
|
| 287 |
+
print(f"Calling tool {tool_name} with args: {tool_args}")
|
| 288 |
+
if tool_name.startswith("Display."):
|
| 289 |
+
args = json.loads(tool_args) if tool_args else {}
|
| 290 |
+
update_ui(
|
| 291 |
+
args.get("products"),
|
| 292 |
+
args.get("image_url"),
|
| 293 |
+
tool_name == "Display.clear_display",
|
| 294 |
+
)
|
| 295 |
+
tool_response: ChatCompletionToolMessageParam = {
|
| 296 |
"role": "tool",
|
| 297 |
"tool_call_id": call_id,
|
| 298 |
+
"content": (
|
| 299 |
+
"Content displayed successfully."
|
| 300 |
+
if tool_name != "clear_display"
|
| 301 |
+
else "Display cleared."
|
| 302 |
+
),
|
| 303 |
}
|
| 304 |
+
else:
|
| 305 |
+
mcp_client = self._get_mcp_client_for_tool(tool_name)
|
| 306 |
+
if mcp_client is None:
|
| 307 |
+
print(f"Tool {tool_name} not found in any MCP client.")
|
| 308 |
+
tool_responses.append(
|
| 309 |
+
{
|
| 310 |
+
"role": "tool",
|
| 311 |
+
"tool_call_id": call_id,
|
| 312 |
+
"content": f"Unable to find tool '{tool_name}'.",
|
| 313 |
+
}
|
| 314 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
else:
|
| 316 |
tool_response = await mcp_client.call_tool(
|
| 317 |
call_id=call_id,
|
| 318 |
tool_name=tool_name,
|
| 319 |
tool_args=json.loads(tool_args) if tool_args else None,
|
| 320 |
)
|
| 321 |
+
print("Tool responded")
|
| 322 |
+
tool_responses.append(tool_response)
|
| 323 |
+
except Exception as e:
|
| 324 |
+
print(f"Error calling tool {tool_name}: {e}")
|
| 325 |
+
tool_responses.append(
|
| 326 |
+
{
|
| 327 |
+
"role": "tool",
|
| 328 |
+
"tool_call_id": call_id,
|
| 329 |
+
"content": f"Error calling tool '{tool_name}', Error: {str(e)[:500]}",
|
| 330 |
+
}
|
| 331 |
+
)
|
| 332 |
|
| 333 |
def _build_input_image_content(
|
| 334 |
self, input_image: Image.Image, image_label: str
|
|
|
|
| 353 |
)
|
| 354 |
|
| 355 |
|
| 356 |
+
def _build_display_tool_definitions() -> list[ChatCompletionToolParam]:
|
| 357 |
+
return [
|
| 358 |
+
{
|
| 359 |
+
"type": "function",
|
| 360 |
+
"function": {
|
| 361 |
+
"name": "Display.display_products",
|
| 362 |
+
"description": """
|
| 363 |
+
Display a list of products. Use this to show search results, cart items, or products from conversation history.
|
| 364 |
+
|
| 365 |
+
Args:
|
| 366 |
+
products: A list of products to display. Each product should have a name, image URL, and formatted price.
|
| 367 |
+
example:
|
| 368 |
+
products: [
|
| 369 |
+
{
|
| 370 |
+
"name": "Stylish Green Shirt",
|
| 371 |
+
"image_url": "https://example.com/images/green-shirt.jpg",
|
| 372 |
+
"price": "$29.99"
|
| 373 |
+
},
|
| 374 |
+
{
|
| 375 |
+
"name": "Comfortable Jeans",
|
| 376 |
+
"image_url": "https://example.com/images/jeans.jpg",
|
| 377 |
+
"price": "$49.99"
|
| 378 |
+
}
|
| 379 |
+
]
|
| 380 |
""",
|
| 381 |
+
"parameters": {
|
| 382 |
+
"properties": {
|
| 383 |
+
"products": {
|
| 384 |
+
"items": {
|
| 385 |
+
"additionalProperties": {"type": "string"},
|
| 386 |
+
"type": "object",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
},
|
| 388 |
+
"title": "Products",
|
| 389 |
+
"type": "array",
|
| 390 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
},
|
| 392 |
+
"required": ["products"],
|
| 393 |
+
"title": "display_productsArguments",
|
| 394 |
+
"type": "object",
|
| 395 |
+
},
|
| 396 |
+
},
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"type": "function",
|
| 400 |
+
"function": {
|
| 401 |
+
"name": "Display.display_image",
|
| 402 |
+
"description": "Display a single standalone image. Use this for virtual try-on results, a specific product image requested by the user, or any other relevant single image.\n\nArgs:\n image_url: The URL of the image to display.",
|
| 403 |
+
"parameters": {
|
| 404 |
+
"properties": {
|
| 405 |
+
"image_url": {
|
| 406 |
+
"title": "Image URL",
|
| 407 |
+
"type": "string",
|
| 408 |
+
},
|
| 409 |
},
|
| 410 |
+
"required": ["image_url"],
|
| 411 |
+
"title": "display_imageArguments",
|
| 412 |
+
"type": "object",
|
| 413 |
+
},
|
| 414 |
+
},
|
| 415 |
+
},
|
| 416 |
+
{
|
| 417 |
+
"type": "function",
|
| 418 |
+
"function": {
|
| 419 |
+
"name": "Display.clear_display",
|
| 420 |
+
"description": "Clear any content currently displayed in the user interface. Removes everything from the visual display area.\n\nArgs: None",
|
| 421 |
+
"parameters": {
|
| 422 |
+
"properties": {},
|
| 423 |
+
"title": "clear_displayArguments",
|
| 424 |
+
"type": "object",
|
| 425 |
},
|
| 426 |
},
|
| 427 |
},
|
| 428 |
+
]
|
mcp_host/tts/gradio_api_tts.py
CHANGED
|
@@ -37,7 +37,7 @@ async def stream_text_to_speech(
|
|
| 37 |
standard_lang_code = KOKORO_TO_STD_LANG[kokoro_lang]
|
| 38 |
|
| 39 |
for text in generate_sentences(text_stream, language=standard_lang_code):
|
| 40 |
-
print(f"Streaming audio for text: {text
|
| 41 |
audio = client.submit(
|
| 42 |
text=text, voice=voice, speed=1, use_gpu=True, api_name="/stream"
|
| 43 |
)
|
|
|
|
| 37 |
standard_lang_code = KOKORO_TO_STD_LANG[kokoro_lang]
|
| 38 |
|
| 39 |
for text in generate_sentences(text_stream, language=standard_lang_code):
|
| 40 |
+
print(f"Streaming audio for text: {text}")
|
| 41 |
audio = client.submit(
|
| 42 |
text=text, voice=voice, speed=1, use_gpu=True, api_name="/stream"
|
| 43 |
)
|
mcp_host/tts/hf_zero_gpu_tts.py
CHANGED
|
@@ -58,6 +58,7 @@ async def stream_text_to_speech(
|
|
| 58 |
standard_lang_code = KOKORO_TO_STD_LANG[kokoro_lang]
|
| 59 |
|
| 60 |
for text in generate_sentences(text_stream, language=standard_lang_code):
|
|
|
|
| 61 |
for audio in text_to_speech(text, pipe_key=kokoro_lang, voice=voice):
|
| 62 |
yield 24000, audio
|
| 63 |
|
|
|
|
| 58 |
standard_lang_code = KOKORO_TO_STD_LANG[kokoro_lang]
|
| 59 |
|
| 60 |
for text in generate_sentences(text_stream, language=standard_lang_code):
|
| 61 |
+
print(f"Streaming audio for text: {text}")
|
| 62 |
for audio in text_to_speech(text, pipe_key=kokoro_lang, voice=voice):
|
| 63 |
yield 24000, audio
|
| 64 |
|
mcp_host/ui.py
CHANGED
|
@@ -13,7 +13,7 @@ def UI(products_state: gr.State, image_state: gr.State):
|
|
| 13 |
justify-content: center;
|
| 14 |
height: 600px;
|
| 15 |
width: 100%;
|
| 16 |
-
background: linear-gradient(rgba(0,0,0,0.3), rgba(0,0,0,0.3)), url('{get_hf_space_file_url_prefix()}static/welcome-to-vibe-shopping.webp');
|
| 17 |
background-size: cover;
|
| 18 |
background-position: center;
|
| 19 |
background-repeat: no-repeat;
|
|
|
|
| 13 |
justify-content: center;
|
| 14 |
height: 600px;
|
| 15 |
width: 100%;
|
| 16 |
+
background: linear-gradient(rgba(0,0,0,0.3), rgba(0,0,0,0.3)), url('{get_hf_space_file_url_prefix()}static/welcome-to-vibe-shopping-upscaled.webp');
|
| 17 |
background-size: cover;
|
| 18 |
background-position: center;
|
| 19 |
background-repeat: no-repeat;
|
static/welcome-to-vibe-shopping.webp
CHANGED
|
|
Git LFS Details
|