Spaces:
Running
Running
Commit
·
b9d3df6
1
Parent(s):
9725c81
Admin: Add package definition (__init__.py) and Continuous Integration (CI) pipeline.
Browse files- .github/workflows/main_ci.yml +36 -0
- .github/workflows/scheduled_opt.yml +38 -0
- .gitignore +8 -0
- app/__init__.py +0 -0
- app/core_logic.py +161 -0
- app/gradio_interface.py +144 -0
- data/feedback_log.json +23 -0
- data/master_prompt.json +6 -0
- requirements.txt +4 -0
- scripts/optimize_prompt.py +157 -0
.github/workflows/main_ci.yml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Application CI/CD Pipeline
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- main
|
| 7 |
+
- develop
|
| 8 |
+
pull_request:
|
| 9 |
+
branches:
|
| 10 |
+
- main
|
| 11 |
+
|
| 12 |
+
jobs:
|
| 13 |
+
build_and_test:
|
| 14 |
+
runs-on: ubuntu-latest
|
| 15 |
+
steps:
|
| 16 |
+
- name: Checkout repository code
|
| 17 |
+
uses: actions/checkout@v4
|
| 18 |
+
|
| 19 |
+
- name: Set up Python 3.10
|
| 20 |
+
uses: actions/setup-python@v5
|
| 21 |
+
with:
|
| 22 |
+
python-version: '3.10'
|
| 23 |
+
|
| 24 |
+
- name: Install dependencies
|
| 25 |
+
# Install the dependencies needed for the application
|
| 26 |
+
run: pip install -r requirements.txt
|
| 27 |
+
|
| 28 |
+
- name: Run Smoke Test on Core Logic
|
| 29 |
+
# This checks if the core_logic file can run without errors
|
| 30 |
+
# Note: API call might fail if key is not available in CI, but file paths will be checked
|
| 31 |
+
run: |
|
| 32 |
+
# Create a dummy .env file for the CI runner to avoid errors
|
| 33 |
+
echo "OPENROUTER_API_KEY=dummy_key_for_ci_test" > .env
|
| 34 |
+
|
| 35 |
+
# Run the test block inside core_logic.py
|
| 36 |
+
python app/core_logic.py
|
.github/workflows/scheduled_opt.yml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Scheduled Prompt Optimization
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
# Run every day at midnight UTC
|
| 5 |
+
schedule:
|
| 6 |
+
- cron: '0 0 * * *'
|
| 7 |
+
# Allows manual triggering from the GitHub Actions tab
|
| 8 |
+
workflow_dispatch:
|
| 9 |
+
|
| 10 |
+
jobs:
|
| 11 |
+
optimize_and_commit:
|
| 12 |
+
runs-on: ubuntu-latest
|
| 13 |
+
steps:
|
| 14 |
+
- name: Checkout code
|
| 15 |
+
uses: actions/checkout@v4
|
| 16 |
+
with:
|
| 17 |
+
token: ${{ secrets.GITHUB_TOKEN }}
|
| 18 |
+
|
| 19 |
+
- name: Set up Python
|
| 20 |
+
uses: actions/setup-python@v5
|
| 21 |
+
with:
|
| 22 |
+
python-version: '3.10'
|
| 23 |
+
|
| 24 |
+
- name: Install dependencies
|
| 25 |
+
run: pip install -r requirements.txt
|
| 26 |
+
|
| 27 |
+
- name: Run Prompt Optimization Script
|
| 28 |
+
env:
|
| 29 |
+
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
| 30 |
+
run: python scripts/optimize_prompt.py
|
| 31 |
+
|
| 32 |
+
- name: Commit and Push changes
|
| 33 |
+
uses: stefanzweifel/git-auto-commit-action@v5
|
| 34 |
+
with:
|
| 35 |
+
commit_message: 'MLOps: Auto-updated master_prompt.json based on user feedback.'
|
| 36 |
+
file_pattern: |
|
| 37 |
+
data/master_prompt.json
|
| 38 |
+
data/feedback_log.json
|
.gitignore
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Virtual Environment
|
| 2 |
+
venv/
|
| 3 |
+
# Secret Key
|
| 4 |
+
.env
|
| 5 |
+
# Python Caching
|
| 6 |
+
__pycache__/
|
| 7 |
+
# Gradio cache
|
| 8 |
+
.gradio/
|
app/__init__.py
ADDED
|
File without changes
|
app/core_logic.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import datetime
|
| 4 |
+
# Import Path for robust file handling
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from openai import OpenAI
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
|
| 9 |
+
# --- Configuration & Initialization ---
|
| 10 |
+
# Load environment variables from .env file
|
| 11 |
+
load_dotenv()
|
| 12 |
+
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
|
| 13 |
+
|
| 14 |
+
# OpenRouter client setup
|
| 15 |
+
client = OpenAI(
|
| 16 |
+
base_url="https://openrouter.ai/api/v1",
|
| 17 |
+
api_key=OPENROUTER_API_KEY,
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
# --- CORRECTED PATH HANDLING ---
|
| 21 |
+
# The root of the project is two levels up from 'app/core_logic.py'
|
| 22 |
+
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
| 23 |
+
|
| 24 |
+
# Use Path objects for reliable access
|
| 25 |
+
MASTER_PROMPT_PATH = PROJECT_ROOT / "data" / "master_prompt.json"
|
| 26 |
+
FEEDBACK_LOG_PATH = PROJECT_ROOT / "data" / "feedback_log.json"
|
| 27 |
+
|
| 28 |
+
# --- CORRECTED LLM Model IDs ---
|
| 29 |
+
# Swapping to a confirmed free model ID on OpenRouter
|
| 30 |
+
TASK_LLM_MODEL = "x-ai/grok-4-fast"
|
| 31 |
+
# Keeping the Meta-LLM for later use
|
| 32 |
+
META_LLM_MODEL = "x-ai/grok-4-fast"
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def load_master_prompt():
|
| 36 |
+
"""Loads the current system message from the master configuration file."""
|
| 37 |
+
try:
|
| 38 |
+
# Using Path.read_text() is clean and safe
|
| 39 |
+
return json.loads(MASTER_PROMPT_PATH.read_text())
|
| 40 |
+
except FileNotFoundError:
|
| 41 |
+
# Create the file with initial data if it doesn't exist
|
| 42 |
+
initial_data = {
|
| 43 |
+
"system_message": "Error: Master prompt file not found.",
|
| 44 |
+
"model_name": TASK_LLM_MODEL,
|
| 45 |
+
"version": "0.0.0",
|
| 46 |
+
"last_updated": datetime.datetime.now().isoformat()
|
| 47 |
+
}
|
| 48 |
+
with open(MASTER_PROMPT_PATH, 'w') as f:
|
| 49 |
+
json.dump(initial_data, f, indent=4)
|
| 50 |
+
return initial_data
|
| 51 |
+
except Exception as e:
|
| 52 |
+
print(f"Error loading master prompt: {e}")
|
| 53 |
+
return {"system_message": f"CRITICAL ERROR: {e}"}
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def rewrite_and_generate(user_input: str) -> tuple[str, str]:
|
| 57 |
+
"""
|
| 58 |
+
1. Loads the system prompt.
|
| 59 |
+
2. Sends the combined prompt to the Task-LLM via OpenRouter.
|
| 60 |
+
3. Returns the rewritten prompt (for user visibility) and the final response.
|
| 61 |
+
"""
|
| 62 |
+
if not user_input:
|
| 63 |
+
return "Please enter a question.", "---"
|
| 64 |
+
|
| 65 |
+
config = load_master_prompt()
|
| 66 |
+
system_message = config.get("system_message", "")
|
| 67 |
+
|
| 68 |
+
# The Meta-Prompt for the LLM to structure its own output
|
| 69 |
+
rewrite_instruction = (
|
| 70 |
+
"TASK: First, rewrite the following VAGUE_INPUT into a highly specific and professional instruction (the 'OPTIMIZED PROMPT'). "
|
| 71 |
+
"The optimized prompt MUST include a clear Persona, the desired Output Format, and clear Constraints. "
|
| 72 |
+
"Second, provide the final answer based on the OPTIMIZED PROMPT. "
|
| 73 |
+
"Format your output strictly as: \n\n<OPTIMIZED_PROMPT>\n\n<FINAL_RESPONSE>"
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
full_prompt = f"VAGUE_INPUT: {user_input}"
|
| 77 |
+
|
| 78 |
+
messages = [
|
| 79 |
+
{"role": "system", "content": system_message + " " + rewrite_instruction},
|
| 80 |
+
{"role": "user", "content": full_prompt}
|
| 81 |
+
]
|
| 82 |
+
|
| 83 |
+
try:
|
| 84 |
+
response = client.chat.completions.create(
|
| 85 |
+
model=TASK_LLM_MODEL,
|
| 86 |
+
messages=messages,
|
| 87 |
+
temperature=0.7,
|
| 88 |
+
max_tokens=2048,
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
raw_output = response.choices[0].message.content.strip()
|
| 92 |
+
|
| 93 |
+
if "\n\n" in raw_output:
|
| 94 |
+
# We use .split("\n\n", 1) to ensure we only split on the first occurrence
|
| 95 |
+
optimized_prompt, final_response = raw_output.split("\n\n", 1)
|
| 96 |
+
else:
|
| 97 |
+
optimized_prompt = "--- (Parsing Error: Could not separate prompt and response. The LLM output might not follow the required format.)"
|
| 98 |
+
final_response = raw_output
|
| 99 |
+
|
| 100 |
+
return optimized_prompt.strip(), final_response.strip()
|
| 101 |
+
|
| 102 |
+
except Exception as e:
|
| 103 |
+
return (
|
| 104 |
+
f"ERROR: Failed to connect to OpenRouter API. Check key/internet. Details: {e}",
|
| 105 |
+
"---"
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def log_feedback(
|
| 110 |
+
original_prompt: str,
|
| 111 |
+
optimized_prompt: str,
|
| 112 |
+
final_response: str,
|
| 113 |
+
rating: int
|
| 114 |
+
):
|
| 115 |
+
"""Logs the interaction data and user feedback to the JSON log file."""
|
| 116 |
+
|
| 117 |
+
new_entry = {
|
| 118 |
+
"timestamp": datetime.datetime.now().isoformat(),
|
| 119 |
+
"original_prompt": original_prompt,
|
| 120 |
+
"optimized_prompt": optimized_prompt,
|
| 121 |
+
"final_response": final_response,
|
| 122 |
+
"rating": rating
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
try:
|
| 126 |
+
# 1. Load existing data
|
| 127 |
+
if FEEDBACK_LOG_PATH.exists():
|
| 128 |
+
with open(FEEDBACK_LOG_PATH, 'r') as f:
|
| 129 |
+
# Handle case where file is empty (e.g., just '[]')
|
| 130 |
+
content = f.read().strip()
|
| 131 |
+
data = json.loads(content) if content else []
|
| 132 |
+
else:
|
| 133 |
+
data = []
|
| 134 |
+
|
| 135 |
+
# 2. Append new entry and write back
|
| 136 |
+
data.append(new_entry)
|
| 137 |
+
|
| 138 |
+
with open(FEEDBACK_LOG_PATH, 'w') as f:
|
| 139 |
+
json.dump(data, f, indent=4)
|
| 140 |
+
|
| 141 |
+
return "Feedback Logged successfully! Thank you."
|
| 142 |
+
|
| 143 |
+
except Exception as e:
|
| 144 |
+
return f"ERROR: Could not log feedback to JSON file. Details: {e}"
|
| 145 |
+
|
| 146 |
+
# If you run this file directly, it runs a quick test:
|
| 147 |
+
if __name__ == "__main__":
|
| 148 |
+
print("--- Testing Core Logic ---")
|
| 149 |
+
|
| 150 |
+
# Check loading
|
| 151 |
+
config = load_master_prompt()
|
| 152 |
+
print(f"Current System Message: {config.get('system_message', 'N/A')}")
|
| 153 |
+
|
| 154 |
+
# Test generation
|
| 155 |
+
opt_prompt, response = rewrite_and_generate("Tell me about the biggest planet in our solar system.")
|
| 156 |
+
print("\n[Optimized Prompt]:\n", opt_prompt)
|
| 157 |
+
print("\n[Final Response]:\n", response)
|
| 158 |
+
|
| 159 |
+
# Test logging
|
| 160 |
+
log_status = log_feedback("Vague Test", opt_prompt, response, 1)
|
| 161 |
+
print("\n[Log Status]:", log_status)
|
app/gradio_interface.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from app.core_logic import rewrite_and_generate, log_feedback
|
| 3 |
+
|
| 4 |
+
# --- State Variables ---
|
| 5 |
+
# Gradio needs a way to temporarily store the original prompt and response
|
| 6 |
+
# to be used by the separate feedback button.
|
| 7 |
+
|
| 8 |
+
# These will hold the current interaction data
|
| 9 |
+
current_original_prompt = gr.State("")
|
| 10 |
+
current_optimized_prompt = gr.State("")
|
| 11 |
+
current_final_response = gr.State("")
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# --- Helper Functions for Gradio UI ---
|
| 15 |
+
|
| 16 |
+
def process_submission(user_input):
|
| 17 |
+
"""Handles the full workflow: calling the LLM and storing results."""
|
| 18 |
+
|
| 19 |
+
# 1. Call the core logic function
|
| 20 |
+
optimized_prompt, final_response = rewrite_and_generate(user_input)
|
| 21 |
+
|
| 22 |
+
# 2. Return new UI values and update internal state variables
|
| 23 |
+
# The return order must match the output components in the 'Submit' button click event.
|
| 24 |
+
return (
|
| 25 |
+
optimized_prompt,
|
| 26 |
+
final_response,
|
| 27 |
+
user_input, # New state for original_prompt
|
| 28 |
+
optimized_prompt, # New state for optimized_prompt
|
| 29 |
+
final_response # New state for final_response
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
def handle_feedback(rating_value, orig_prompt, opt_prompt, final_resp):
|
| 33 |
+
"""Handles the user clicking the feedback button."""
|
| 34 |
+
if not orig_prompt:
|
| 35 |
+
return "Error: Please run a query first before providing feedback."
|
| 36 |
+
|
| 37 |
+
# 1. Map the string rating to an integer (1 for thumbs up, 0 for thumbs down)
|
| 38 |
+
rating_int = 1 if rating_value == "👍 Excellent" else 0
|
| 39 |
+
|
| 40 |
+
# 2. Call the core logic function to log data
|
| 41 |
+
log_message = log_feedback(orig_prompt, opt_prompt, final_resp, rating_int)
|
| 42 |
+
|
| 43 |
+
# 3. Clear the state variables after successful logging
|
| 44 |
+
return log_message, "", "", ""
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# --- Gradio Interface Layout ---
|
| 48 |
+
|
| 49 |
+
with gr.Blocks(title="IPO-Meta: Prompt Optimizer") as demo:
|
| 50 |
+
gr.Markdown("# ✨ IPO-Meta: Intelligent Prompt Optimizer")
|
| 51 |
+
gr.Markdown(
|
| 52 |
+
"Enter a question, and our system will automatically rewrite it into a **high-quality, structured prompt** (powered by LLMs) "
|
| 53 |
+
"before generating the final answer. Provide feedback to improve the system's core instruction over time! (The MLOps loop)"
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
# --- INPUT SECTION ---
|
| 57 |
+
with gr.Row():
|
| 58 |
+
user_input = gr.Textbox(
|
| 59 |
+
label="1. Your Vague Question/Input:",
|
| 60 |
+
placeholder="e.g., Tell me about quantum computing.",
|
| 61 |
+
lines=3
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
# --- SUBMIT BUTTON ---
|
| 65 |
+
submit_btn = gr.Button("🚀 Optimize Prompt & Generate Response")
|
| 66 |
+
|
| 67 |
+
# --- OUTPUT SECTION ---
|
| 68 |
+
with gr.Row():
|
| 69 |
+
optimized_output = gr.Textbox(
|
| 70 |
+
label="2. Optimized Prompt (The System's Suggestion):",
|
| 71 |
+
lines=4,
|
| 72 |
+
interactive=False,
|
| 73 |
+
elem_id="optimized_prompt"
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
final_response_output = gr.Markdown(
|
| 77 |
+
"**3. Final AI Response:**\n\n---",
|
| 78 |
+
elem_id="final_response_area"
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
# --- FEEDBACK SECTION ---
|
| 82 |
+
gr.Markdown("---")
|
| 83 |
+
gr.Markdown("### 4. Continuous Improvement Feedback (The MLOps Data)")
|
| 84 |
+
|
| 85 |
+
with gr.Row():
|
| 86 |
+
feedback_status = gr.Textbox(
|
| 87 |
+
label="Feedback Status",
|
| 88 |
+
value="Waiting for feedback...",
|
| 89 |
+
interactive=False,
|
| 90 |
+
scale=2
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
# Radio buttons for clear feedback
|
| 94 |
+
feedback_radio = gr.Radio(
|
| 95 |
+
["👍 Excellent", "👎 Needs Work"],
|
| 96 |
+
label="Did the Optimized Prompt result in a good response?",
|
| 97 |
+
value=None, # No default selection
|
| 98 |
+
scale=1
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
feedback_btn = gr.Button("💾 Log Feedback")
|
| 102 |
+
|
| 103 |
+
# --- STATE MANAGEMENT (Hidden) ---
|
| 104 |
+
# These hidden components store the data needed by the feedback handler
|
| 105 |
+
current_original_prompt.render()
|
| 106 |
+
current_optimized_prompt.render()
|
| 107 |
+
current_final_response.render()
|
| 108 |
+
|
| 109 |
+
# --- Event Handling ---
|
| 110 |
+
|
| 111 |
+
# When 'Submit' is clicked:
|
| 112 |
+
submit_btn.click(
|
| 113 |
+
fn=process_submission,
|
| 114 |
+
inputs=[user_input],
|
| 115 |
+
outputs=[
|
| 116 |
+
optimized_output,
|
| 117 |
+
final_response_output,
|
| 118 |
+
current_original_prompt,
|
| 119 |
+
current_optimized_prompt,
|
| 120 |
+
current_final_response
|
| 121 |
+
]
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
# When 'Log Feedback' is clicked:
|
| 125 |
+
feedback_btn.click(
|
| 126 |
+
fn=handle_feedback,
|
| 127 |
+
inputs=[
|
| 128 |
+
feedback_radio,
|
| 129 |
+
current_original_prompt,
|
| 130 |
+
current_optimized_prompt,
|
| 131 |
+
current_final_response
|
| 132 |
+
],
|
| 133 |
+
outputs=[
|
| 134 |
+
feedback_status,
|
| 135 |
+
current_original_prompt, # Clear state on successful log
|
| 136 |
+
current_optimized_prompt, # Clear state on successful log
|
| 137 |
+
current_final_response # Clear state on successful log
|
| 138 |
+
]
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
# Launch the Gradio app
|
| 142 |
+
if __name__ == "__main__":
|
| 143 |
+
# Note: To run locally, your OpenRouter API key must be set in the .env file.
|
| 144 |
+
demo.launch()
|
data/feedback_log.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"timestamp": "2025-09-28T16:15:04.042900",
|
| 4 |
+
"original_prompt": "Vague Test",
|
| 5 |
+
"optimized_prompt": "ERROR: Failed to connect to OpenRouter API. Check key/internet. Details: Error code: 400 - {'error': {'message': 'deepseek/deepseek-v3-0324:free is not a valid model ID', 'code': 400}, 'user_id': 'user_32wobDRyaUhuLgKrre5COzxeNDH'}",
|
| 6 |
+
"final_response": "---",
|
| 7 |
+
"rating": 1
|
| 8 |
+
},
|
| 9 |
+
{
|
| 10 |
+
"timestamp": "2025-09-28T16:16:27.080462",
|
| 11 |
+
"original_prompt": "Vague Test",
|
| 12 |
+
"optimized_prompt": "<OPTIMIZED_PROMPT>\nYou are an expert astronomer and science communicator with a passion for making complex topics accessible to everyone. Your role is to educate users on celestial bodies in an engaging, accurate, and straightforward manner.",
|
| 13 |
+
"final_response": "Task: Provide a comprehensive yet concise overview of the largest planet in our solar system, including its key characteristics, size comparison to other planets, and notable features.\n\nOutput Format: Start with a brief introductory paragraph explaining what the planet is and why it's the largest. Follow with 5-7 bullet points highlighting essential facts such as diameter, mass, composition, moons, and exploration history. End with a short concluding sentence on its significance.\n\nConstraints: Base all information on verified astronomical data from sources like NASA. Keep the language simple, engaging, and suitable for a general audience (ages 12+). Limit the total response to under 300 words. Do not include unrelated topics or speculation; focus solely on factual details. Ensure the response is positive and inspiring about space exploration.\n</OPTIMIZED_PROMPT>\n\n<FINAL_RESPONSE>\nJupiter is the largest planet in our solar system, a gas giant that dwarfs all others in size and mass, making it a fascinating subject for astronomers and space enthusiasts alike.\n\n- **Size and Scale**: Jupiter has a diameter of about 88,846 miles (142,984 kilometers) at its equator, roughly 11 times that of Earth\u2014over 1,300 Earths could fit inside it.\n- **Mass**: It is more than twice as massive as all the other planets combined, with a mass of 1.898 \u00d7 10^27 kilograms, contributing significantly to the solar system's overall gravity.\n- **Composition**: Primarily made of hydrogen (90%) and helium (10%), similar to the Sun, with swirling clouds of ammonia and water vapor creating its colorful bands.\n- **Great Red Spot**: This iconic storm, larger than Earth, has raged for at least 400 years and is a hallmark of Jupiter's turbulent atmosphere.\n- **Moons**: Jupiter boasts 95 known moons, including the four large Galilean moons\u2014Io, Europa, Ganymede, and Callisto\u2014discovered by Galileo in 1610.\n- **Rings**: It has a faint ring system made of dust particles, discovered by Voyager 1 in 1979.\n- **Exploration**: Pioneered by missions like Pioneer 10 (1973), Voyager (1979), and Juno (2016-present), which revealed its magnetic field and interior structure.\n\nJupiter's immense presence not only shapes our solar system but also inspires ongoing quests to uncover its secrets through future missions.\n</FINAL_RESPONSE>",
|
| 14 |
+
"rating": 1
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"timestamp": "2025-09-28T16:24:58.358824",
|
| 18 |
+
"original_prompt": "Vague Test",
|
| 19 |
+
"optimized_prompt": "<OPTIMIZED_PROMPT>\nYou are an expert astronomer with a passion for making complex space topics accessible to everyone. Respond to the query about the largest planet in our solar system by providing an informative overview that includes its name, size compared to Earth, key physical characteristics, notable features like atmosphere and moons, and its role in the solar system. Structure the output as a concise paragraph (under 150 words) followed by 3-5 bullet points highlighting fun facts. Use simple, engaging language suitable for a general audience, ensuring all information is accurate and based on established astronomical knowledge. Do not include unrelated topics or speculation.\n</OPTIMIZED_PROMPT>",
|
| 20 |
+
"final_response": "<FINAL_RESPONSE>\nJupiter is the largest planet in our solar system, a gas giant that's about 11 times wider than Earth and has a mass more than twice that of all other planets combined. It's mostly made of hydrogen and helium, with swirling clouds of ammonia and water ice creating its famous colorful bands and massive storms, including the Great Red Spot\u2014a storm bigger than Earth that's raged for centuries. Jupiter orbits the Sun every 12 Earth years and plays a key role in protecting inner planets by gravitationally pulling in asteroids and comets.\n\n- Jupiter has at least 95 moons, with the four largest (Io, Europa, Ganymede, and Callisto) discovered by Galileo in 1610.\n- Its strong magnetic field creates auroras and intense radiation belts.\n- The planet's diameter is about 88,850 miles (143,000 km), making it visible to the naked eye from Earth.\n- NASA's Juno mission is currently studying Jupiter's atmosphere and interior up close.\n</FINAL_RESPONSE>",
|
| 21 |
+
"rating": 1
|
| 22 |
+
}
|
| 23 |
+
]
|
data/master_prompt.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"system_message": "You are a friendly, helpful content assistant. Your task is to briefly answer the user's question, keeping the response concise and easy to understand for a general audience.",
|
| 3 |
+
"model_name": "google/gemma-7b-it:free",
|
| 4 |
+
"version": "1.0.0",
|
| 5 |
+
"last_updated": "2025-09-28T16:00:00Z"
|
| 6 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core application dependencies
|
| 2 |
+
gradio
|
| 3 |
+
openai # Used to communicate with OpenRouter API
|
| 4 |
+
python-dotenv # Used to load API key from .env file
|
scripts/optimize_prompt.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import datetime
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
|
| 8 |
+
# --- Configuration & Initialization ---
|
| 9 |
+
load_dotenv()
|
| 10 |
+
|
| 11 |
+
# The script runs from the 'scripts/' directory, so its root is one level up.
|
| 12 |
+
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
| 13 |
+
|
| 14 |
+
# File Paths
|
| 15 |
+
MASTER_PROMPT_PATH = PROJECT_ROOT / "data" / "master_prompt.json"
|
| 16 |
+
FEEDBACK_LOG_PATH = PROJECT_ROOT / "data" / "feedback_log.json"
|
| 17 |
+
|
| 18 |
+
# LLM Model ID for Rewriting (Meta-LLM)
|
| 19 |
+
META_LLM_MODEL = "x-ai/grok-4-fast"
|
| 20 |
+
# The minimum number of negative feedback entries required to trigger an update
|
| 21 |
+
MIN_NEGATIVE_FEEDBACK = 3
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def load_data(path: Path):
|
| 25 |
+
"""Safely loads JSON data, handling empty file content."""
|
| 26 |
+
try:
|
| 27 |
+
if path.exists():
|
| 28 |
+
content = path.read_text().strip()
|
| 29 |
+
return json.loads(content) if content else []
|
| 30 |
+
return []
|
| 31 |
+
except Exception as e:
|
| 32 |
+
print(f"Error loading {path}: {e}")
|
| 33 |
+
return []
|
| 34 |
+
|
| 35 |
+
def aggregate_negative_feedback(feedback_data: list) -> str:
|
| 36 |
+
"""
|
| 37 |
+
Analyzes the feedback log to summarize only the negative (rating=0) feedback.
|
| 38 |
+
"""
|
| 39 |
+
negative_feedback = [entry for entry in feedback_data if entry.get("rating") == 0]
|
| 40 |
+
|
| 41 |
+
if len(negative_feedback) < MIN_NEGATIVE_FEEDBACK:
|
| 42 |
+
print(f"INFO: Only {len(negative_feedback)} negative entries found. Skipping optimization.")
|
| 43 |
+
return None
|
| 44 |
+
|
| 45 |
+
# Summarize the negative prompts that led to user dislike
|
| 46 |
+
summary = []
|
| 47 |
+
for entry in negative_feedback:
|
| 48 |
+
summary.append(
|
| 49 |
+
f"User disliked the response (Rating 0) after input: '{entry['original_prompt']}' "
|
| 50 |
+
f"The resulting OPTIMIZED PROMPT was: '{entry['optimized_prompt']}'"
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
print(f"INFO: Aggregated {len(negative_feedback)} negative feedback entries.")
|
| 54 |
+
return "\n---\n".join(summary)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def optimize_system_prompt(current_system_message: str, feedback_summary: str) -> str:
|
| 58 |
+
"""
|
| 59 |
+
Calls the Meta-LLM to rewrite the system message based on negative feedback.
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
# We define a strict Meta-Prompt for the Grok model to follow
|
| 63 |
+
meta_prompt = (
|
| 64 |
+
"You are the **System Prompt Optimizing Agent**. Your goal is to analyze the 'FAILED FEEDBACK' and rewrite the 'CURRENT SYSTEM MESSAGE' "
|
| 65 |
+
"to address the problems identified. The new system message must aim to improve the quality of future responses, making them more accurate, "
|
| 66 |
+
"detailed, or strictly adherent to formatting rules, based on the failure patterns. "
|
| 67 |
+
"You must output **ONLY** the new system message text, nothing else."
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# The user message feeds the prompt and the negative data to the agent
|
| 71 |
+
user_message = f"""
|
| 72 |
+
CURRENT SYSTEM MESSAGE:
|
| 73 |
+
---
|
| 74 |
+
{current_system_message}
|
| 75 |
+
---
|
| 76 |
+
|
| 77 |
+
FAILED FEEDBACK (You must incorporate lessons from this data):
|
| 78 |
+
---
|
| 79 |
+
{feedback_summary}
|
| 80 |
+
---
|
| 81 |
+
|
| 82 |
+
Based ONLY on the above, rewrite the CURRENT SYSTEM MESSAGE to improve it.
|
| 83 |
+
New System Message:
|
| 84 |
+
"""
|
| 85 |
+
|
| 86 |
+
try:
|
| 87 |
+
# Call OpenRouter API with the Meta-LLM
|
| 88 |
+
client = OpenAI(
|
| 89 |
+
base_url="https://openrouter.ai/api/v1",
|
| 90 |
+
api_key=os.getenv("OPENROUTER_API_KEY"),
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
response = client.chat.completions.create(
|
| 94 |
+
model=META_LLM_MODEL,
|
| 95 |
+
messages=[
|
| 96 |
+
{"role": "system", "content": meta_prompt},
|
| 97 |
+
{"role": "user", "content": user_message}
|
| 98 |
+
],
|
| 99 |
+
temperature=0.1, # Low temperature for reliable instruction following
|
| 100 |
+
max_tokens=512,
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
new_prompt = response.choices[0].message.content.strip()
|
| 104 |
+
print("SUCCESS: New System Prompt generated by Meta-LLM.")
|
| 105 |
+
return new_prompt
|
| 106 |
+
|
| 107 |
+
except Exception as e:
|
| 108 |
+
print(f"CRITICAL ERROR: Meta-LLM API call failed: {e}")
|
| 109 |
+
return current_system_message # Return original prompt on failure
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def run_optimization():
|
| 113 |
+
"""Main function for the MLOps pipeline script."""
|
| 114 |
+
print(f"--- Running Prompt Optimization Pipeline at {datetime.datetime.now()} ---")
|
| 115 |
+
|
| 116 |
+
# 1. Load Data
|
| 117 |
+
current_config = load_data(MASTER_PROMPT_PATH)
|
| 118 |
+
feedback_data = load_data(FEEDBACK_LOG_PATH)
|
| 119 |
+
current_system_message = current_config.get("system_message", "")
|
| 120 |
+
|
| 121 |
+
if not feedback_data:
|
| 122 |
+
print("INFO: Feedback log is empty. Exiting optimization.")
|
| 123 |
+
return
|
| 124 |
+
|
| 125 |
+
# 2. Aggregate Feedback
|
| 126 |
+
feedback_summary = aggregate_negative_feedback(feedback_data)
|
| 127 |
+
|
| 128 |
+
if feedback_summary is None:
|
| 129 |
+
return
|
| 130 |
+
|
| 131 |
+
# 3. Optimize Prompt
|
| 132 |
+
new_system_message = optimize_system_prompt(current_system_message, feedback_summary)
|
| 133 |
+
|
| 134 |
+
# 4. Check if prompt actually changed before committing
|
| 135 |
+
if new_system_message != current_system_message:
|
| 136 |
+
print("\n*** PROMPT UPDATED ***")
|
| 137 |
+
|
| 138 |
+
# 5. Update Master Prompt File
|
| 139 |
+
current_config["system_message"] = new_system_message
|
| 140 |
+
current_config["version"] = str(round(float(current_config.get("version", 1.0)) + 0.01, 2))
|
| 141 |
+
current_config["last_updated"] = datetime.datetime.now().isoformat()
|
| 142 |
+
|
| 143 |
+
with open(MASTER_PROMPT_PATH, 'w') as f:
|
| 144 |
+
json.dump(current_config, f, indent=4)
|
| 145 |
+
print(f"Successfully wrote new prompt version {current_config['version']} to master_prompt.json")
|
| 146 |
+
|
| 147 |
+
# 6. Clear Feedback Log (Ready for next cycle)
|
| 148 |
+
with open(FEEDBACK_LOG_PATH, 'w') as f:
|
| 149 |
+
json.dump([], f)
|
| 150 |
+
print("Feedback log cleared.")
|
| 151 |
+
|
| 152 |
+
else:
|
| 153 |
+
print("\nINFO: No significant change or API error. Master prompt remains the same.")
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
if __name__ == "__main__":
|
| 157 |
+
run_optimization()
|