Juju519 commited on
Commit
fbebd6d
Β·
unverified Β·
1 Parent(s): 73caa05

Refactor local model handling and Gradio UI settings

Browse files

Updated local model configuration and improved fallback handling. Adjusted Gradio UI settings and ensured tokenizer initialization.

Files changed (1) hide show
  1. app.py +39 -21
app.py CHANGED
@@ -1,8 +1,9 @@
1
- # app.py (CS3 compliant, simplified local model to avoid DynamicCache issues)
2
  # - Prometheus metrics on :8000
3
  # - Gradio UI on :7860 (0.0.0.0 for Docker)
4
  # - Local vs API selection via checkbox + env
5
  # - Falls back to local model if no API creds
 
6
 
7
  import os
8
  import json
@@ -21,8 +22,8 @@ print("[CS3] STARTUP")
21
 
22
  PRODUCT_KIND = os.getenv("PRODUCT_KIND", "unknown") # "local" | "api" | "unknown"
23
 
24
- # Local model – use Phi-3-mini like your last working version
25
- LOCAL_MODEL = os.getenv("LOCAL_MODEL", "microsoft/Phi-3-mini-4k-instruct").strip()
26
 
27
  # OpenAI-compatible provider (OpenRouter / Together / OpenAI)
28
  OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "").strip()
@@ -128,7 +129,8 @@ def _build_hf_chat_prompt(msgs: list[dict[str, str]]) -> str:
128
 
129
  # ========== State for local model ==========
130
 
131
- pipe = None # simple pipeline object, like in your old code
 
132
 
133
 
134
  # ========== Core chat handler ==========
@@ -150,9 +152,9 @@ def respond(
150
  - Else:
151
  - If OPENAI_API_KEY set β†’ OpenAI-compatible API path.
152
  - Elif HF_TOKEN set β†’ Hugging Face Router /v1/completions.
153
- - Else β†’ fall back to local model (no πŸ” error).
154
  """
155
- global pipe
156
 
157
  start_time = time.time()
158
  token_estimate = 0
@@ -171,13 +173,27 @@ def respond(
171
 
172
  # ---------- LOCAL MODEL PATH ----------
173
  if effective_use_local:
174
- print(f"[CS3] MODE=local (model={LOCAL_MODEL})")
175
  try:
176
- from transformers import pipeline
177
-
178
- if pipe is None:
179
- # Simple pipeline, just like your last working version
180
- pipe = pipeline("text-generation", model=LOCAL_MODEL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
  local_msgs = (
183
  [{"role": "system", "content": system_message}]
@@ -185,17 +201,17 @@ def respond(
185
  + [{"role": "user", "content": user_with_fact}]
186
  )
187
  prompt = _build_local_prompt(local_msgs)
188
-
189
  outputs = pipe(
190
  prompt,
191
  max_new_tokens=int(max_tokens),
192
  do_sample=True,
193
  temperature=float(temperature),
194
  top_p=float(top_p),
 
 
195
  )
196
-
197
- full_text = outputs[0]["generated_text"]
198
- assistant = full_text[len(prompt):].strip()
199
  if "Assistant:" in assistant:
200
  assistant = assistant.split("Assistant:", 1)[-1].strip()
201
 
@@ -251,6 +267,7 @@ def respond(
251
  return
252
 
253
  # ---------- API PATH (HF ROUTER) ----------
 
254
  if HF_TOKEN:
255
  print("[CS3] MODE=api (HF Router)")
256
  url = f"{HF_BASE_URL.rstrip('/')}/v1/completions"
@@ -315,7 +332,8 @@ def create_demo(enable_oauth: bool = False):
315
  with gr.Blocks(css=fancy_css) as demo:
316
  with gr.Row():
317
  gr.Markdown("<h1 id='title'>🐐 Chat with Gompei</h1>")
318
- token_input = gr.State(value=None) # dummy to match fn signature
 
319
 
320
  gr.ChatInterface(
321
  fn=respond,
@@ -348,8 +366,8 @@ def create_demo(enable_oauth: bool = False):
348
  step=0.05,
349
  label="Top-p (nucleus sampling)",
350
  ),
351
- gr.Checkbox(label="Use Local Model", value=True), # default ON now
352
- token_input,
353
  ],
354
  type="messages",
355
  examples=[
@@ -362,7 +380,7 @@ def create_demo(enable_oauth: bool = False):
362
  128,
363
  0.7,
364
  0.95,
365
- True,
366
  None,
367
  ],
368
  [
@@ -374,7 +392,7 @@ def create_demo(enable_oauth: bool = False):
374
  128,
375
  0.7,
376
  0.95,
377
- True,
378
  None,
379
  ],
380
  ],
 
1
+ # app.py (CS3 compliant, with safe fallbacks)
2
  # - Prometheus metrics on :8000
3
  # - Gradio UI on :7860 (0.0.0.0 for Docker)
4
  # - Local vs API selection via checkbox + env
5
  # - Falls back to local model if no API creds
6
+ # - Avoids double-counting RESP_LATENCY
7
 
8
  import os
9
  import json
 
22
 
23
  PRODUCT_KIND = os.getenv("PRODUCT_KIND", "unknown") # "local" | "api" | "unknown"
24
 
25
+ # Local model
26
+ LOCAL_MODEL = os.getenv("LOCAL_MODEL", "sshleifer/tiny-gpt2").strip()
27
 
28
  # OpenAI-compatible provider (OpenRouter / Together / OpenAI)
29
  OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "").strip()
 
129
 
130
  # ========== State for local model ==========
131
 
132
+ pipe = None
133
+ tokenizer = None
134
 
135
 
136
  # ========== Core chat handler ==========
 
152
  - Else:
153
  - If OPENAI_API_KEY set β†’ OpenAI-compatible API path.
154
  - Elif HF_TOKEN set β†’ Hugging Face Router /v1/completions.
155
+ - Else β†’ fall back to local model (no more πŸ” error).
156
  """
157
+ global pipe, tokenizer
158
 
159
  start_time = time.time()
160
  token_estimate = 0
 
173
 
174
  # ---------- LOCAL MODEL PATH ----------
175
  if effective_use_local:
176
+ print("[CS3] MODE=local")
177
  try:
178
+ from transformers import pipeline, AutoTokenizer
179
+ try:
180
+ import torch
181
+ torch.set_num_threads(2)
182
+ except Exception:
183
+ pass
184
+
185
+ if pipe is None or tokenizer is None:
186
+ tokenizer = AutoTokenizer.from_pretrained(
187
+ LOCAL_MODEL,
188
+ trust_remote_code=True,
189
+ )
190
+ pipe = pipeline(
191
+ "text-generation",
192
+ model=LOCAL_MODEL,
193
+ tokenizer=tokenizer,
194
+ device_map="auto",
195
+ trust_remote_code=True,
196
+ )
197
 
198
  local_msgs = (
199
  [{"role": "system", "content": system_message}]
 
201
  + [{"role": "user", "content": user_with_fact}]
202
  )
203
  prompt = _build_local_prompt(local_msgs)
 
204
  outputs = pipe(
205
  prompt,
206
  max_new_tokens=int(max_tokens),
207
  do_sample=True,
208
  temperature=float(temperature),
209
  top_p=float(top_p),
210
+ pad_token_id=getattr(tokenizer, "eos_token_id", None),
211
+ eos_token_id=getattr(tokenizer, "eos_token_id", None),
212
  )
213
+ full = outputs[0]["generated_text"]
214
+ assistant = full[len(prompt):].strip()
 
215
  if "Assistant:" in assistant:
216
  assistant = assistant.split("Assistant:", 1)[-1].strip()
217
 
 
267
  return
268
 
269
  # ---------- API PATH (HF ROUTER) ----------
270
+ # We know OPENAI_API_KEY is empty here. Use HF_TOKEN if available.
271
  if HF_TOKEN:
272
  print("[CS3] MODE=api (HF Router)")
273
  url = f"{HF_BASE_URL.rstrip('/')}/v1/completions"
 
332
  with gr.Blocks(css=fancy_css) as demo:
333
  with gr.Row():
334
  gr.Markdown("<h1 id='title'>🐐 Chat with Gompei</h1>")
335
+ # CS3: OAuth disabled; keep a dummy state to match fn signature
336
+ token_input = gr.State(value=None)
337
 
338
  gr.ChatInterface(
339
  fn=respond,
 
366
  step=0.05,
367
  label="Top-p (nucleus sampling)",
368
  ),
369
+ gr.Checkbox(label="Use Local Model", value=False),
370
+ token_input, # placeholder for _unused_login
371
  ],
372
  type="messages",
373
  examples=[
 
380
  128,
381
  0.7,
382
  0.95,
383
+ False,
384
  None,
385
  ],
386
  [
 
392
  128,
393
  0.7,
394
  0.95,
395
+ False,
396
  None,
397
  ],
398
  ],