Spaces:

Jensin
/

creditmav

Sleeping

App Files Files Community

creditmav / app.py

Jensin

Update app.py

c738e51 verified 4 months ago

raw

history blame contribute delete

4.02 kB

	# app.py ── run with DISABLE_INFERENCE=1 to skip model calls
	import os

	# ----------------──── 1. Optional Gradio UI ──────────────────────────
	try:
	import gradio as gr
	USE_GRADIO = True
	except ImportError:
	USE_GRADIO = False
	print("Gradio not installed → UI disabled.")

	# ----------------──── 2. Inference toggle ────────────────────────────
	DISABLE_INFERENCE = os.getenv("DISABLE_INFERENCE", "0") == "1"

	if not DISABLE_INFERENCE:
	try:
	from huggingface_hub import InferenceClient
	except ImportError as err:
	raise RuntimeError(
	"huggingface_hub missing -- install or set DISABLE_INFERENCE=1"
	) from err

	# ----------------──── 3. Config ──────────────────────────────────────
	MODEL_ID = "Dushyant4342/ft-llama3-8b-credit-analyst"
	CONTEXT_WINDOW = 4096
	RESERVED_TOKENS = 512
	MAX_HISTORY = 10
	DEFAULT_SYSTEM = (
	"You are an expert credit analyst. Summarise key positive and negative "
	"changes in a customer's credit profile."
	)

	_client: "InferenceClient\|None" = None # type hint for clarity


	def _get_client():
	"""Lazy-init the InferenceClient unless inference is disabled."""
	global _client
	if _client is None:
	_client = InferenceClient(repo_id=MODEL_ID) # picks up HF_HUB_TOKEN
	return _client


	# ----------------──── 4. Chat handler ────────────────────────────────
	def respond(user_msg, history, system_msg, max_tokens, temperature, top_p):
	"""Gradio streaming callback -- transparently stubs when inference is off."""
	# ╭─ Build the prompt
	sys = system_msg.strip() or DEFAULT_SYSTEM
	msgs = [{"role": "system", "content": sys}]
	for u, a in history[-MAX_HISTORY:]:
	if u.strip(): msgs.append({"role": "user", "content": u.strip()})
	if a.strip(): msgs.append({"role": "assistant", "content": a.strip()})
	if user_msg.strip():
	msgs.append({"role": "user", "content": user_msg.strip()})

	# ╭─ Token budget guard
	budget = min(max_tokens, CONTEXT_WINDOW - RESERVED_TOKENS)
	if budget <= 0:
	yield "[Error] token budget exhausted."
	return

	# ╭─ 4a Stub path (no inference)
	if DISABLE_INFERENCE:
	yield f"(stub) echo: {user_msg}"
	return

	# ╭─ 4b Live inference path
	client = _get_client()
	try:
	for chunk in client.chat_completion(
	model=MODEL_ID,
	messages=msgs,
	max_tokens=budget,
	temperature=temperature,
	top_p=top_p,
	stream=True,
	):
	delta = chunk.choices[0].delta.get("content", "")
	if delta:
	yield delta
	except Exception as err: # broad to catch connection + auth errors
	yield f"[Error] inference failed: {err}"

	# ----------------──── 5. Optional Gradio UI ──────────────────────────
	if USE_GRADIO:
	demo = gr.ChatInterface(
	fn=respond,
	title="Credit Analyst Bot (stub-ready)",
	description=(
	"Set <code>DISABLE_INFERENCE=1</code> to work offline. "
	"Otherwise the app will call the hosted model."
	),
	additional_inputs=[
	gr.Textbox(value=DEFAULT_SYSTEM, label="System message"),
	gr.Slider(1, CONTEXT_WINDOW, value=512, step=1, label="Max new tokens"),
	gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
	],
	type="messages",
	)

	if __name__ == "__main__":
	demo.launch(show_error=True)
	else:
	if __name__ == "__main__":
	print("Gradio UI disabled.")