import gradio as gr import os import json from datetime import datetime, date from openai import OpenAI import re # ---------------------------------------------------------------------- # Helper to read secrets from the HF Space environment # ---------------------------------------------------------------------- def _secret(key: str, fallback: str = None) -> str: val = os.getenv(key) if val is not None: return val if fallback is not None: return fallback raise RuntimeError(f"Secret '{key}' not found. Please add it to your Space secrets.") # ---------------------------------------------------------------------- # User Management # ---------------------------------------------------------------------- def load_users(): """Load users from secrets or environment variables""" users = {} # Try to load from JSON string users_json = _secret("CHAT_USERS", "{}") try: users_data = json.loads(users_json) for username, password in users_data.items(): users[username] = password except: pass return users # Load users VALID_USERS = load_users() def authenticate_user(username, password): """Authenticate user against the valid users dictionary""" return username in VALID_USERS and VALID_USERS[username] == password # ---------------------------------------------------------------------- # Configuration # ---------------------------------------------------------------------- # Available models with their respective API configurations MODELS = { "Gpt-oss-20b (base)": { "provider": "openrouter", "model_name": "@preset/precise-chat-2", "api_url": "https://openrouter.ai/api/v1", "translate":"no" }, "Gpt-oss-120b": { "provider": "openrouter", "model_name": "@preset/precise-chat", "api_url": "https://openrouter.ai/api/v1", "translate":"no" }, } # Get model display names for dropdown MODEL_NAMES = list(MODELS.keys()) # ---------------------------------------------------------------------- # Core Chat Logic # ---------------------------------------------------------------------- def respond( message, history: list[dict[str, str]], selected_model = MODEL_NAMES[0], system_message = _secret('system_prompt'), max_tokens = 38096, ): """ Handle chat responses using the selected model, with internal tool execution for PRECISE sections. Tools retrieve full documentation from env vars but are not shown in chat history; only the final response is yielded. """ end_date = datetime.strptime(_secret("END_DATE"), "%Y-%m-%d").date() if date.today() < end_date: try: # Get model configuration model_config = MODELS[selected_model] provider = model_config["provider"] # Get API key based on provider if provider == "huggingface": api_key = _secret("HF_TOKEN") else: # openrouter api_key = _secret("OPENROUTER_KEY") # Configure main client for selected model client = OpenAI( base_url=model_config["api_url"], api_key=api_key, ) # Load FULL documentation from environment variables for tool OUTPUTS full_core_features = os.getenv("core_features", "") full_pseudocode = os.getenv("pseudocode", "") print(f"DEBUG: Loaded full_core_features length: {len(full_core_features)} chars") print(f"DEBUG: Loaded full_pseudocode length: {len(full_pseudocode)} chars") # Define tools: Use short summaries for DESCRIPTIONS, full docs for execution core_features_summary = "This section outlines the key features used in the KNN model, including promotion grade, sales percentiles, and ratios like PVR, PTR, and PCR-I, along with their weights, calculations, and binning processes to prepare data for prediction. It also details core model parameters such as decay factors, weights, and LOESS settings, plus formulas for core concepts like weighted PVR handling overlaps and business logic for slow-moving items." pseudocode_summary = "This section describes the complete prediction pipeline, starting from feature engineering and slow-moving classification, through dynamic scaling, KNN neighbor selection with time decay, initial predictions, and bounding using pre-computed artifacts. It continues with LOESS smoothing for branch-level trends, branch adjustments for incentives and manual overrides, and final output enrichment with explainability metadata, supported by detailed pseudocode implementations." tools = [ { "type": "function", "function": { "name": "get_core_features", "description": core_features_summary, "parameters": { "type": "object", "properties": {}, "required": [] } } }, { "type": "function", "function": { "name": "get_pseudocode", "description": pseudocode_summary, "parameters": { "type": "object", "properties": {}, "required": [] } } } ] # Modify system message to include tool instructions tool_instructions = ( "\n\nYou have access to the following tools for retrieving detailed information from the PRECISE blueprint:\n" "- get_core_features: Use this tool to retrieve details on core features, pre-processing, parameters, and formulas in Section 4 when the query relates to features, scaling, or business logic.\n" "- get_pseudocode: Use this tool to retrieve the end-to-end workflow, steps, and pseudocode implementations in Section 5 when the query involves prediction pipelines, KNN, LOESS, or adjustments.\n" "\n\n **Additional notes and instructions:** \n" "- Call tools only when necessary to answer accurately based on the blueprint; otherwise, respond directly using your knowledge of other sections.\n" "- You can only do a **maximum of one tool call per turn**. If the tool result doesn't answer user's query, try your best to answer while noting the limitation of your knowledge and ask if the user wants to try again.\n" "- **IMPORTANT:** It is not explicitly stated in section 4, but remember that Weighted PVR is calculated **per promotion code**. The overlapped promotion calculation is done each time only for the promotion period of A SINGLE PROMOTION CODE." ) final_system_message = system_message + tool_instructions # Prepare messages for main API call (full history + current message) messages = [{"role": "system", "content": final_system_message}] messages.extend(history) # History: Indo queries + prior Indo responses messages.append({"role": "user", "content": message}) # New Indo query print(f"DEBUG: User query: {message}") # First API call: Get initial response from model (with tools enabled) initial_response = client.chat.completions.create( model=model_config["model_name"], messages=messages, max_tokens=max_tokens, stream=False, # Disabled for reliable tool handling tools=tools, tool_choice="auto", # Let model decide if/when to call tools temperature=0.3 ) # Append initial model message to messages for potential follow-up initial_message = initial_response.choices[0].message messages += [initial_message] print(f"DEBUG: Initial response content: {initial_message.content or 'No content (tool call only)'}") print(f"DEBUG: Tool calls detected: {len(initial_message.tool_calls or [])}") # Check for tool calls and execute them tool_calls = initial_message.tool_calls or [] if tool_calls: called_tools = [] for tool_call in tool_calls: func_name = tool_call.function.name called_tools.append(func_name) print(f"DEBUG: Executing tool: {func_name}") # Execute tool (fetch full docs from env var or fallback) if func_name == "get_core_features": tool_output = full_core_features if full_core_features else "Full Section 4 documentation not available." elif func_name == "get_pseudocode": tool_output = full_pseudocode if full_pseudocode else "Full Section 5 documentation not available." else: tool_output = "Tool tidak dikenali." print(f"DEBUG: Unknown tool {func_name}") print(f"DEBUG: Tool output length for {func_name}: {len(tool_output)} chars (preview: {tool_output[:100]}...)") # Append tool result internally (not shown in Gradio history) messages.append({ "role": "tool", "tool_call_id": tool_call.id, "name": func_name, "content": tool_output }) print(f"DEBUG: Called tools summary: {called_tools}") # Second API call: Let model respond with tool outputs incorporated final_response = client.chat.completions.create( model=model_config["model_name"], messages=messages, max_tokens=max_tokens, stream=False, tools=tools, # Still available if more calls needed (unlikely) tool_choice="auto", temperature=0.3 ) final_content = final_response.choices[0].message.content.strip() print(f"DEBUG: Final response preview: {final_content[:200]}...") yield final_content # Yield only final content to Gradio else: # No tools needed; yield initial response content initial_content = initial_message.content.strip() print(f"DEBUG: No tools; yielding initial response preview: {initial_content[:200]}...") yield initial_content except Exception as e: error_msg = f"Error: {e}" print(f"DEBUG: Exception in respond: {error_msg}") yield error_msg else: yield "Chatbot sudah expired." # ---------------------------------------------------------------------- # Custom Auth Function for Gradio # ---------------------------------------------------------------------- def gradio_auth(username, password): """Custom authentication function for Gradio""" return authenticate_user(username, password) # ---------------------------------------------------------------------- # UI Layout # ---------------------------------------------------------------------- # Tips section tips_md = """ """ # Footer footer_md = """ Dipilih providers dengan ZDR (Zero Data Retention) policy. Expired 1 April 2026 - (YOI/251029). """ # Create the chat interface with gr.Blocks( title="PRECISE Chat", theme=gr.themes.Soft() ) as demo: gr.Markdown("# PRECISE Assistant") gr.Markdown("Promotion Recommendation Inference System") with gr.Sidebar(): gr.Markdown("### Configuration") #Model selection selected_model = gr.Dropdown( choices=MODEL_NAMES, value=MODEL_NAMES[0], label="Select Model", info="Pilih LLM yang digunakan." ) # Main chat interface chatbot = gr.ChatInterface( respond, type="messages", additional_inputs=[ selected_model, ], examples=[ ["Siapa Anda dan pertanyaan apa saja yang bisa Anda jawab?"] # ,["Apa saja yang termasuk dan tidak termasuk dalam scope PRECISE?"] # ,["Siapa yang bertanggung jawab atas data dan parameter?"] # ,["Apa saja fitur utama dalam PRECISE?"] # ,["Bagaimana data diproses di PRECISE?"] ,["Apa status PRECISE saat ini dan bagaimana rencana ke depan?"] ], cache_examples=False, ) # Tips and footer gr.Markdown(tips_md) gr.Markdown(footer_md) # ---------------------------------------------------------------------- # Launch with Custom Auth # ---------------------------------------------------------------------- if __name__ == "__main__": demo.launch( auth=gradio_auth, # Use our custom auth function auth_message="Please login to access the chat interface", server_name="0.0.0.0", ssr_mode=False, server_port=7860, show_error=True )