my-gradio-app / scripts /auto_finetune.py
Nguyen Trong Lap
Recreate history without binary blobs
eeb0f9c
#!/usr/bin/env python3
"""
Automated fine-tuning for all agents
Uploads training data and creates fine-tuning jobs
"""
import os
import json
import time
from pathlib import Path
from openai import OpenAI
# Initialize OpenAI client
client = OpenAI(
api_key=os.getenv('OPENAI_API_KEY', 'sk--PC8FIAvV01G7aUyZsJD7Q'),
base_url=os.getenv('OPENAI_BASE_URL', 'https://aiportalapi.stu-platform.live/jpe')
)
AGENTS = ['nutrition', 'symptom', 'exercise', 'mental_health']
def upload_training_file(file_path):
"""Upload training file to OpenAI"""
print(f"πŸ“€ Uploading {file_path.name}...")
try:
with open(file_path, 'rb') as f:
response = client.files.create(
file=f,
purpose='fine-tune'
)
file_id = response.id
print(f"βœ… Uploaded: {file_id}")
return file_id
except Exception as e:
print(f"❌ Upload failed: {e}")
return None
def create_fine_tuning_job(file_id, agent_name):
"""Create fine-tuning job"""
print(f"πŸš€ Creating fine-tuning job for {agent_name}...")
try:
response = client.fine_tuning.jobs.create(
training_file=file_id,
model='gpt-4o-mini-2024-07-18',
suffix=f'{agent_name}-v1'
)
job_id = response.id
print(f"βœ… Job created: {job_id}")
return job_id
except Exception as e:
print(f"❌ Job creation failed: {e}")
return None
def wait_for_job(job_id, agent_name):
"""Wait for fine-tuning job to complete"""
print(f"⏳ Waiting for {agent_name} fine-tuning to complete...")
print(f" This may take 10-30 minutes...")
try:
while True:
response = client.fine_tuning.jobs.retrieve(job_id)
status = response.status
if status == 'succeeded':
model_id = response.fine_tuned_model
print(f"βœ… Fine-tuning completed!")
print(f" Model: {model_id}")
return model_id
elif status in ['failed', 'cancelled']:
print(f"❌ Fine-tuning {status}")
return None
else:
print(f" Status: {status}...", end='\r')
time.sleep(30) # Check every 30 seconds
except Exception as e:
print(f"❌ Error checking status: {e}")
return None
def save_model_config(agent_models):
"""Save fine-tuned model IDs to config"""
config_file = Path("fine_tuning/fine_tuned_models.json")
config_file.parent.mkdir(parents=True, exist_ok=True)
with open(config_file, 'w') as f:
json.dumps(agent_models, f, indent=2)
print(f"\nβœ… Model config saved to: {config_file}")
def update_agent_configs(agent_models):
"""Update agent files to use fine-tuned models"""
print("\nπŸ“ Updating agent configurations...")
# Create a config file that agents can read
config_content = f"""# Fine-tuned Models Configuration
# Generated automatically by auto_finetune.py
FINE_TUNED_MODELS = {{
"""
for agent, model_id in agent_models.items():
if model_id:
config_content += f" '{agent}': '{model_id}',\n"
config_content += "}\n"
# Save to config file
config_file = Path("config/fine_tuned_models.py")
with open(config_file, 'w') as f:
f.write(config_content)
print(f"βœ… Configuration saved to: {config_file}")
print("\nπŸ“Œ To use fine-tuned models, update config/settings.py:")
print(" from config.fine_tuned_models import FINE_TUNED_MODELS")
print(" MODEL = FINE_TUNED_MODELS.get('nutrition', 'gpt-4o-mini')")
def fine_tune_all_agents():
"""Fine-tune all agents"""
print("🎯 Starting automated fine-tuning for all agents...")
print()
training_dir = Path("fine_tuning/training_data")
if not training_dir.exists():
print("❌ Training data not found!")
print(" Run: python scripts/generate_training_data.py")
return
agent_models = {}
for agent in AGENTS:
print(f"\n{'='*60}")
print(f"πŸ€– Processing {agent}_agent")
print(f"{'='*60}\n")
# Find training file
training_file = training_dir / f"{agent}_training.jsonl"
if not training_file.exists():
print(f"⚠️ Training file not found: {training_file}")
continue
# Check file size
file_size = training_file.stat().st_size
print(f"πŸ“Š Training file size: {file_size:,} bytes")
# Count conversations
with open(training_file, 'r') as f:
conv_count = sum(1 for _ in f)
print(f"πŸ“Š Conversations: {conv_count}")
if conv_count < 10:
print(f"⚠️ Too few conversations ({conv_count}), skipping...")
continue
# Upload training file
file_id = upload_training_file(training_file)
if not file_id:
continue
# Create fine-tuning job
job_id = create_fine_tuning_job(file_id, agent)
if not job_id:
continue
# Wait for completion
model_id = wait_for_job(job_id, agent)
if model_id:
agent_models[agent] = model_id
print()
# Save results
if agent_models:
print(f"\n{'='*60}")
print("πŸŽ‰ Fine-tuning Complete!")
print(f"{'='*60}\n")
print("πŸ“Š Fine-tuned models:")
for agent, model_id in agent_models.items():
print(f" {agent}: {model_id}")
# Update configurations
update_agent_configs(agent_models)
print("\nβœ… All done! Your agents are now fine-tuned!")
print("\nπŸ“Œ Next steps:")
print(" 1. Review fine_tuned_models.py")
print(" 2. Update your agent code to use fine-tuned models")
print(" 3. Test the improved agents!")
else:
print("\n⚠️ No models were fine-tuned")
print(" Check the errors above and try again")
if __name__ == "__main__":
fine_tune_all_agents()