CSDS553_Demo

Sleeping

Juju519 commited on 12 days ago

Commit

fbebd6d

unverified ·

1 Parent(s): 73caa05

Refactor local model handling and Gradio UI settings

Updated local model configuration and improved fallback handling. Adjusted Gradio UI settings and ensured tokenizer initialization.

Files changed (1) hide show

app.py +39 -21

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
-# app.py (CS3 compliant, simplified local model to avoid DynamicCache issues)
 # - Prometheus metrics on :8000
 # - Gradio UI on :7860 (0.0.0.0 for Docker)
 # - Local vs API selection via checkbox + env
 # - Falls back to local model if no API creds
 import os
 import json
@@ -21,8 +22,8 @@ print("[CS3] STARTUP")
 PRODUCT_KIND = os.getenv("PRODUCT_KIND", "unknown")  # "local" | "api" | "unknown"
-# Local model – use Phi-3-mini like your last working version
-LOCAL_MODEL = os.getenv("LOCAL_MODEL", "microsoft/Phi-3-mini-4k-instruct").strip()
 # OpenAI-compatible provider (OpenRouter / Together / OpenAI)
 OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "").strip()
@@ -128,7 +129,8 @@ def _build_hf_chat_prompt(msgs: list[dict[str, str]]) -> str:
 # ========== State for local model ==========
-pipe = None  # simple pipeline object, like in your old code
 # ========== Core chat handler ==========
@@ -150,9 +152,9 @@ def respond(
     - Else:
         - If OPENAI_API_KEY set → OpenAI-compatible API path.
         - Elif HF_TOKEN set → Hugging Face Router /v1/completions.
-        - Else → fall back to local model (no 🔐 error).
     """
-    global pipe
     start_time = time.time()
     token_estimate = 0
@@ -171,13 +173,27 @@ def respond(
         # ---------- LOCAL MODEL PATH ----------
         if effective_use_local:
-            print(f"[CS3] MODE=local (model={LOCAL_MODEL})")
             try:
-                from transformers import pipeline
-                if pipe is None:
-                    # Simple pipeline, just like your last working version
-                    pipe = pipeline("text-generation", model=LOCAL_MODEL)
                 local_msgs = (
                     [{"role": "system", "content": system_message}]
@@ -185,17 +201,17 @@ def respond(
                     + [{"role": "user", "content": user_with_fact}]
                 )
                 prompt = _build_local_prompt(local_msgs)
                 outputs = pipe(
                     prompt,
                     max_new_tokens=int(max_tokens),
                     do_sample=True,
                     temperature=float(temperature),
                     top_p=float(top_p),
                 )
-                full_text = outputs[0]["generated_text"]
-                assistant = full_text[len(prompt):].strip()
                 if "Assistant:" in assistant:
                     assistant = assistant.split("Assistant:", 1)[-1].strip()
@@ -251,6 +267,7 @@ def respond(
             return
         # ---------- API PATH (HF ROUTER) ----------
         if HF_TOKEN:
             print("[CS3] MODE=api (HF Router)")
             url = f"{HF_BASE_URL.rstrip('/')}/v1/completions"
@@ -315,7 +332,8 @@ def create_demo(enable_oauth: bool = False):
     with gr.Blocks(css=fancy_css) as demo:
         with gr.Row():
             gr.Markdown("<h1 id='title'>🐐 Chat with Gompei</h1>")
-            token_input = gr.State(value=None)  # dummy to match fn signature
         gr.ChatInterface(
             fn=respond,
@@ -348,8 +366,8 @@ def create_demo(enable_oauth: bool = False):
                     step=0.05,
                     label="Top-p (nucleus sampling)",
                 ),
-                gr.Checkbox(label="Use Local Model", value=True),  # default ON now
-                token_input,
             ],
             type="messages",
             examples=[
@@ -362,7 +380,7 @@ def create_demo(enable_oauth: bool = False):
                     128,
                     0.7,
                     0.95,
-                    True,
                     None,
                 ],
                 [
@@ -374,7 +392,7 @@ def create_demo(enable_oauth: bool = False):
                     128,
                     0.7,
                     0.95,
-                    True,
                     None,
                 ],
             ],

+# app.py (CS3 compliant, with safe fallbacks)
 # - Prometheus metrics on :8000
 # - Gradio UI on :7860 (0.0.0.0 for Docker)
 # - Local vs API selection via checkbox + env
 # - Falls back to local model if no API creds
+# - Avoids double-counting RESP_LATENCY
 import os
 import json
 PRODUCT_KIND = os.getenv("PRODUCT_KIND", "unknown")  # "local" | "api" | "unknown"
+# Local model
+LOCAL_MODEL = os.getenv("LOCAL_MODEL", "sshleifer/tiny-gpt2").strip()
 # OpenAI-compatible provider (OpenRouter / Together / OpenAI)
 OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "").strip()
 # ========== State for local model ==========
+pipe = None
+tokenizer = None
 # ========== Core chat handler ==========
     - Else:
         - If OPENAI_API_KEY set → OpenAI-compatible API path.
         - Elif HF_TOKEN set → Hugging Face Router /v1/completions.
+        - Else → fall back to local model (no more 🔐 error).
     """
+    global pipe, tokenizer
     start_time = time.time()
     token_estimate = 0
         # ---------- LOCAL MODEL PATH ----------
         if effective_use_local:
+            print("[CS3] MODE=local")
             try:
+                from transformers import pipeline, AutoTokenizer
+                try:
+                    import torch
+                    torch.set_num_threads(2)
+                except Exception:
+                    pass
+                if pipe is None or tokenizer is None:
+                    tokenizer = AutoTokenizer.from_pretrained(
+                        LOCAL_MODEL,
+                        trust_remote_code=True,
+                    )
+                    pipe = pipeline(
+                        "text-generation",
+                        model=LOCAL_MODEL,
+                        tokenizer=tokenizer,
+                        device_map="auto",
+                        trust_remote_code=True,
+                    )
                 local_msgs = (
                     [{"role": "system", "content": system_message}]
                     + [{"role": "user", "content": user_with_fact}]
                 )
                 prompt = _build_local_prompt(local_msgs)
                 outputs = pipe(
                     prompt,
                     max_new_tokens=int(max_tokens),
                     do_sample=True,
                     temperature=float(temperature),
                     top_p=float(top_p),
+                    pad_token_id=getattr(tokenizer, "eos_token_id", None),
+                    eos_token_id=getattr(tokenizer, "eos_token_id", None),
                 )
+                full = outputs[0]["generated_text"]
+                assistant = full[len(prompt):].strip()
                 if "Assistant:" in assistant:
                     assistant = assistant.split("Assistant:", 1)[-1].strip()
             return
         # ---------- API PATH (HF ROUTER) ----------
+        # We know OPENAI_API_KEY is empty here. Use HF_TOKEN if available.
         if HF_TOKEN:
             print("[CS3] MODE=api (HF Router)")
             url = f"{HF_BASE_URL.rstrip('/')}/v1/completions"
     with gr.Blocks(css=fancy_css) as demo:
         with gr.Row():
             gr.Markdown("<h1 id='title'>🐐 Chat with Gompei</h1>")
+            # CS3: OAuth disabled; keep a dummy state to match fn signature
+            token_input = gr.State(value=None)
         gr.ChatInterface(
             fn=respond,
                     step=0.05,
                     label="Top-p (nucleus sampling)",
                 ),
+                gr.Checkbox(label="Use Local Model", value=False),
+                token_input,  # placeholder for _unused_login
             ],
             type="messages",
             examples=[
                     128,
                     0.7,
                     0.95,
+                    False,
                     None,
                 ],
                 [
                     128,
                     0.7,
                     0.95,
+                    False,
                     None,
                 ],
             ],