Adanbalf commited on
Commit
c113217
·
verified ·
1 Parent(s): 5168dc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -14
app.py CHANGED
@@ -7,8 +7,10 @@ from typing import Any, Dict, List
7
  import gradio as gr
8
  from openai import OpenAI
9
 
10
- DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "LLaVA-OneVision-1.5-8B-Instruct")
 
11
 
 
12
  _client = OpenAI(
13
  base_url=os.getenv("BASE_URL", ""),
14
  api_key=os.getenv("API_KEY", ""),
@@ -37,6 +39,15 @@ def _message(role: str, content: Any) -> Dict[str, Any]:
37
  def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]:
38
  files = message.get("files") or []
39
  text = (message.get("text") or "").strip()
 
 
 
 
 
 
 
 
 
40
  content: List[Dict[str, Any]] = [_image_content(p) for p in files]
41
  if text:
42
  content.append(_text_content(text))
@@ -53,8 +64,7 @@ def _convert_history(history: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
53
  if isinstance(content, str):
54
  user_content.append(_text_content(content))
55
  elif isinstance(content, tuple):
56
- user_content.extend(_image_content(path)
57
- for path in content if path)
58
  elif role == "assistant":
59
  msgs.append(_message("user", user_content.copy()))
60
  user_content.clear()
@@ -69,7 +79,7 @@ def stream_response(message: Dict[str, Any], history: List[Dict[str, Any]], mode
69
  stream = _client.chat.completions.create(
70
  model=model_name,
71
  messages=messages,
72
- temperature=0.000001,
73
  top_p=1,
74
  extra_body={
75
  "repetition_penalty": 1.05,
@@ -85,20 +95,20 @@ def stream_response(message: Dict[str, Any], history: List[Dict[str, Any]], mode
85
  partial += delta
86
  yield partial
87
  except Exception as e:
88
- yield f"Failed to get response: {e}"
89
 
90
 
91
  def build_demo() -> gr.Blocks:
92
  chatbot = gr.Chatbot(type="messages", allow_tags=["think"])
93
  textbox = gr.MultimodalTextbox(
94
  show_label=False,
95
- placeholder="Enter text, or upload one or more images...",
96
  file_types=["image"],
97
  file_count="single",
98
  max_plain_text_length=32768
99
  )
100
  model_selector = gr.Dropdown(
101
- label="Model",
102
  choices=[
103
  ("LLaVA-OneVision-1.5-8B-Instruct", "LLaVA-OneVision-1.5-8B-Instruct"),
104
  ("LLaVA-OneVision-1.5-4B-Instruct", "LLaVA-OneVision-1.5-4B-Instruct"),
@@ -111,12 +121,14 @@ def build_demo() -> gr.Blocks:
111
  multimodal=True,
112
  chatbot=chatbot,
113
  textbox=textbox,
114
- title="LLaVA-OneVision-1.5: Fully Open Framework for Democratized Multimodal Training",
115
- description="""**LLaVA-OneVision1.5** introduces a novel family of fully open-source Large Multimodal Models (LMMs) that achieves state-of-the-art performance with substantially lower cost through training on native resolution images.
116
-
117
- 🔗 **Links**: [GitHub](https://github.com/EvolvingLMMs-Lab/LLaVA-OneVision-1.5) | [HuggingFace](https://huggingface.co/lmms-lab)""",
 
 
118
  additional_inputs=[model_selector],
119
- additional_inputs_accordion=gr.Accordion("Options", open=True),
120
  ).queue(default_concurrency_limit=8)
121
 
122
 
@@ -125,5 +137,4 @@ def main():
125
 
126
 
127
  if __name__ == "__main__":
128
- main()
129
-
 
7
  import gradio as gr
8
  from openai import OpenAI
9
 
10
+ # Modelo por defecto
11
+ DEFAULT_MODEL = "LLaVA-OneVision-1.5-8B-Instruct"
12
 
13
+ # Cliente OpenAI-compatible (usa el endpoint de Hugging Face o el tuyo)
14
  _client = OpenAI(
15
  base_url=os.getenv("BASE_URL", ""),
16
  api_key=os.getenv("API_KEY", ""),
 
39
  def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]:
40
  files = message.get("files") or []
41
  text = (message.get("text") or "").strip()
42
+
43
+ # 🔹 Si no hay texto, añadimos un prompt nutricional por defecto
44
+ if not text:
45
+ text = (
46
+ "Analiza la imagen del plato de comida y describe los alimentos que contiene. "
47
+ "Indica una estimación de calorías, proteínas, carbohidratos y grasas. "
48
+ "Responde en formato breve y estructurado."
49
+ )
50
+
51
  content: List[Dict[str, Any]] = [_image_content(p) for p in files]
52
  if text:
53
  content.append(_text_content(text))
 
64
  if isinstance(content, str):
65
  user_content.append(_text_content(content))
66
  elif isinstance(content, tuple):
67
+ user_content.extend(_image_content(path) for path in content if path)
 
68
  elif role == "assistant":
69
  msgs.append(_message("user", user_content.copy()))
70
  user_content.clear()
 
79
  stream = _client.chat.completions.create(
80
  model=model_name,
81
  messages=messages,
82
+ temperature=0.1,
83
  top_p=1,
84
  extra_body={
85
  "repetition_penalty": 1.05,
 
95
  partial += delta
96
  yield partial
97
  except Exception as e:
98
+ yield f"⚠️ Error al obtener respuesta: {e}"
99
 
100
 
101
  def build_demo() -> gr.Blocks:
102
  chatbot = gr.Chatbot(type="messages", allow_tags=["think"])
103
  textbox = gr.MultimodalTextbox(
104
  show_label=False,
105
+ placeholder="Subí una foto de tu comida para analizarla...",
106
  file_types=["image"],
107
  file_count="single",
108
  max_plain_text_length=32768
109
  )
110
  model_selector = gr.Dropdown(
111
+ label="Modelo",
112
  choices=[
113
  ("LLaVA-OneVision-1.5-8B-Instruct", "LLaVA-OneVision-1.5-8B-Instruct"),
114
  ("LLaVA-OneVision-1.5-4B-Instruct", "LLaVA-OneVision-1.5-4B-Instruct"),
 
121
  multimodal=True,
122
  chatbot=chatbot,
123
  textbox=textbox,
124
+ title="🍽️ NasFit Vision AI",
125
+ description=(
126
+ "Subí una foto de tu comida y NasFit IA estimará su contenido nutricional. "
127
+ "Basado en **LLaVA-OneVision-1.5**, modelo multimodal open source con análisis visual avanzado. "
128
+ "Ideal para tracking nutricional inteligente."
129
+ ),
130
  additional_inputs=[model_selector],
131
+ additional_inputs_accordion=gr.Accordion("Opciones avanzadas", open=False),
132
  ).queue(default_concurrency_limit=8)
133
 
134
 
 
137
 
138
 
139
  if __name__ == "__main__":
140
+ main()