Update app.py
Browse files
app.py
CHANGED
|
@@ -7,8 +7,10 @@ from typing import Any, Dict, List
|
|
| 7 |
import gradio as gr
|
| 8 |
from openai import OpenAI
|
| 9 |
|
| 10 |
-
|
|
|
|
| 11 |
|
|
|
|
| 12 |
_client = OpenAI(
|
| 13 |
base_url=os.getenv("BASE_URL", ""),
|
| 14 |
api_key=os.getenv("API_KEY", ""),
|
|
@@ -37,6 +39,15 @@ def _message(role: str, content: Any) -> Dict[str, Any]:
|
|
| 37 |
def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]:
|
| 38 |
files = message.get("files") or []
|
| 39 |
text = (message.get("text") or "").strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
content: List[Dict[str, Any]] = [_image_content(p) for p in files]
|
| 41 |
if text:
|
| 42 |
content.append(_text_content(text))
|
|
@@ -53,8 +64,7 @@ def _convert_history(history: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
| 53 |
if isinstance(content, str):
|
| 54 |
user_content.append(_text_content(content))
|
| 55 |
elif isinstance(content, tuple):
|
| 56 |
-
user_content.extend(_image_content(path)
|
| 57 |
-
for path in content if path)
|
| 58 |
elif role == "assistant":
|
| 59 |
msgs.append(_message("user", user_content.copy()))
|
| 60 |
user_content.clear()
|
|
@@ -69,7 +79,7 @@ def stream_response(message: Dict[str, Any], history: List[Dict[str, Any]], mode
|
|
| 69 |
stream = _client.chat.completions.create(
|
| 70 |
model=model_name,
|
| 71 |
messages=messages,
|
| 72 |
-
temperature=0.
|
| 73 |
top_p=1,
|
| 74 |
extra_body={
|
| 75 |
"repetition_penalty": 1.05,
|
|
@@ -85,20 +95,20 @@ def stream_response(message: Dict[str, Any], history: List[Dict[str, Any]], mode
|
|
| 85 |
partial += delta
|
| 86 |
yield partial
|
| 87 |
except Exception as e:
|
| 88 |
-
yield f"
|
| 89 |
|
| 90 |
|
| 91 |
def build_demo() -> gr.Blocks:
|
| 92 |
chatbot = gr.Chatbot(type="messages", allow_tags=["think"])
|
| 93 |
textbox = gr.MultimodalTextbox(
|
| 94 |
show_label=False,
|
| 95 |
-
placeholder="
|
| 96 |
file_types=["image"],
|
| 97 |
file_count="single",
|
| 98 |
max_plain_text_length=32768
|
| 99 |
)
|
| 100 |
model_selector = gr.Dropdown(
|
| 101 |
-
label="
|
| 102 |
choices=[
|
| 103 |
("LLaVA-OneVision-1.5-8B-Instruct", "LLaVA-OneVision-1.5-8B-Instruct"),
|
| 104 |
("LLaVA-OneVision-1.5-4B-Instruct", "LLaVA-OneVision-1.5-4B-Instruct"),
|
|
@@ -111,12 +121,14 @@ def build_demo() -> gr.Blocks:
|
|
| 111 |
multimodal=True,
|
| 112 |
chatbot=chatbot,
|
| 113 |
textbox=textbox,
|
| 114 |
-
title="
|
| 115 |
-
description=
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
| 118 |
additional_inputs=[model_selector],
|
| 119 |
-
additional_inputs_accordion=gr.Accordion("
|
| 120 |
).queue(default_concurrency_limit=8)
|
| 121 |
|
| 122 |
|
|
@@ -125,5 +137,4 @@ def main():
|
|
| 125 |
|
| 126 |
|
| 127 |
if __name__ == "__main__":
|
| 128 |
-
main()
|
| 129 |
-
|
|
|
|
| 7 |
import gradio as gr
|
| 8 |
from openai import OpenAI
|
| 9 |
|
| 10 |
+
# Modelo por defecto
|
| 11 |
+
DEFAULT_MODEL = "LLaVA-OneVision-1.5-8B-Instruct"
|
| 12 |
|
| 13 |
+
# Cliente OpenAI-compatible (usa el endpoint de Hugging Face o el tuyo)
|
| 14 |
_client = OpenAI(
|
| 15 |
base_url=os.getenv("BASE_URL", ""),
|
| 16 |
api_key=os.getenv("API_KEY", ""),
|
|
|
|
| 39 |
def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]:
|
| 40 |
files = message.get("files") or []
|
| 41 |
text = (message.get("text") or "").strip()
|
| 42 |
+
|
| 43 |
+
# 🔹 Si no hay texto, añadimos un prompt nutricional por defecto
|
| 44 |
+
if not text:
|
| 45 |
+
text = (
|
| 46 |
+
"Analiza la imagen del plato de comida y describe los alimentos que contiene. "
|
| 47 |
+
"Indica una estimación de calorías, proteínas, carbohidratos y grasas. "
|
| 48 |
+
"Responde en formato breve y estructurado."
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
content: List[Dict[str, Any]] = [_image_content(p) for p in files]
|
| 52 |
if text:
|
| 53 |
content.append(_text_content(text))
|
|
|
|
| 64 |
if isinstance(content, str):
|
| 65 |
user_content.append(_text_content(content))
|
| 66 |
elif isinstance(content, tuple):
|
| 67 |
+
user_content.extend(_image_content(path) for path in content if path)
|
|
|
|
| 68 |
elif role == "assistant":
|
| 69 |
msgs.append(_message("user", user_content.copy()))
|
| 70 |
user_content.clear()
|
|
|
|
| 79 |
stream = _client.chat.completions.create(
|
| 80 |
model=model_name,
|
| 81 |
messages=messages,
|
| 82 |
+
temperature=0.1,
|
| 83 |
top_p=1,
|
| 84 |
extra_body={
|
| 85 |
"repetition_penalty": 1.05,
|
|
|
|
| 95 |
partial += delta
|
| 96 |
yield partial
|
| 97 |
except Exception as e:
|
| 98 |
+
yield f"⚠️ Error al obtener respuesta: {e}"
|
| 99 |
|
| 100 |
|
| 101 |
def build_demo() -> gr.Blocks:
|
| 102 |
chatbot = gr.Chatbot(type="messages", allow_tags=["think"])
|
| 103 |
textbox = gr.MultimodalTextbox(
|
| 104 |
show_label=False,
|
| 105 |
+
placeholder="Subí una foto de tu comida para analizarla...",
|
| 106 |
file_types=["image"],
|
| 107 |
file_count="single",
|
| 108 |
max_plain_text_length=32768
|
| 109 |
)
|
| 110 |
model_selector = gr.Dropdown(
|
| 111 |
+
label="Modelo",
|
| 112 |
choices=[
|
| 113 |
("LLaVA-OneVision-1.5-8B-Instruct", "LLaVA-OneVision-1.5-8B-Instruct"),
|
| 114 |
("LLaVA-OneVision-1.5-4B-Instruct", "LLaVA-OneVision-1.5-4B-Instruct"),
|
|
|
|
| 121 |
multimodal=True,
|
| 122 |
chatbot=chatbot,
|
| 123 |
textbox=textbox,
|
| 124 |
+
title="🍽️ NasFit Vision AI",
|
| 125 |
+
description=(
|
| 126 |
+
"Subí una foto de tu comida y NasFit IA estimará su contenido nutricional. "
|
| 127 |
+
"Basado en **LLaVA-OneVision-1.5**, modelo multimodal open source con análisis visual avanzado. "
|
| 128 |
+
"Ideal para tracking nutricional inteligente."
|
| 129 |
+
),
|
| 130 |
additional_inputs=[model_selector],
|
| 131 |
+
additional_inputs_accordion=gr.Accordion("Opciones avanzadas", open=False),
|
| 132 |
).queue(default_concurrency_limit=8)
|
| 133 |
|
| 134 |
|
|
|
|
| 137 |
|
| 138 |
|
| 139 |
if __name__ == "__main__":
|
| 140 |
+
main()
|
|
|