creditmav / app.py
Jensin's picture
Update app.py
c738e51 verified
# app.py ── run with DISABLE_INFERENCE=1 to skip model calls
import os
# ----------------──── 1. Optional Gradio UI ──────────────────────────
try:
import gradio as gr
USE_GRADIO = True
except ImportError:
USE_GRADIO = False
print("Gradio not installed β†’ UI disabled.")
# ----------------──── 2. Inference toggle ────────────────────────────
DISABLE_INFERENCE = os.getenv("DISABLE_INFERENCE", "0") == "1"
if not DISABLE_INFERENCE:
try:
from huggingface_hub import InferenceClient
except ImportError as err:
raise RuntimeError(
"huggingface_hub missing -- install or set DISABLE_INFERENCE=1"
) from err
# ----------------──── 3. Config ──────────────────────────────────────
MODEL_ID = "Dushyant4342/ft-llama3-8b-credit-analyst"
CONTEXT_WINDOW = 4096
RESERVED_TOKENS = 512
MAX_HISTORY = 10
DEFAULT_SYSTEM = (
"You are an expert credit analyst. Summarise key positive and negative "
"changes in a customer's credit profile."
)
_client: "InferenceClient|None" = None # type hint for clarity
def _get_client():
"""Lazy-init the InferenceClient unless inference is disabled."""
global _client
if _client is None:
_client = InferenceClient(repo_id=MODEL_ID) # picks up HF_HUB_TOKEN
return _client
# ----------------──── 4. Chat handler ────────────────────────────────
def respond(user_msg, history, system_msg, max_tokens, temperature, top_p):
"""Gradio streaming callback -- transparently stubs when inference is off."""
# ╭─ Build the prompt
sys = system_msg.strip() or DEFAULT_SYSTEM
msgs = [{"role": "system", "content": sys}]
for u, a in history[-MAX_HISTORY:]:
if u.strip(): msgs.append({"role": "user", "content": u.strip()})
if a.strip(): msgs.append({"role": "assistant", "content": a.strip()})
if user_msg.strip():
msgs.append({"role": "user", "content": user_msg.strip()})
# ╭─ Token budget guard
budget = min(max_tokens, CONTEXT_WINDOW - RESERVED_TOKENS)
if budget <= 0:
yield "[Error] token budget exhausted."
return
# ╭─ 4a Stub path (no inference)
if DISABLE_INFERENCE:
yield f"(stub) echo: {user_msg}"
return
# ╭─ 4b Live inference path
client = _get_client()
try:
for chunk in client.chat_completion(
model=MODEL_ID,
messages=msgs,
max_tokens=budget,
temperature=temperature,
top_p=top_p,
stream=True,
):
delta = chunk.choices[0].delta.get("content", "")
if delta:
yield delta
except Exception as err: # broad to catch connection + auth errors
yield f"[Error] inference failed: {err}"
# ----------------──── 5. Optional Gradio UI ──────────────────────────
if USE_GRADIO:
demo = gr.ChatInterface(
fn=respond,
title="Credit Analyst Bot (stub-ready)",
description=(
"Set <code>DISABLE_INFERENCE=1</code> to work offline. "
"Otherwise the app will call the hosted model."
),
additional_inputs=[
gr.Textbox(value=DEFAULT_SYSTEM, label="System message"),
gr.Slider(1, CONTEXT_WINDOW, value=512, step=1, label="Max new tokens"),
gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
],
type="messages",
)
if __name__ == "__main__":
demo.launch(show_error=True)
else:
if __name__ == "__main__":
print("Gradio UI disabled.")