jacob-valdez commited on
Commit
729f088
·
verified ·
1 Parent(s): 2faaffe

Upload huggingface_pipeline/inference_api/app.py with huggingface_hub

Browse files
huggingface_pipeline/inference_api/app.py ADDED
@@ -0,0 +1,325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI inference server for BuilderBrain.
3
+
4
+ Provides REST API endpoints for model inference, grammar validation,
5
+ and real-time monitoring data.
6
+ """
7
+
8
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
9
+ from fastapi.responses import JSONResponse
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+ import uvicorn
12
+ import json
13
+ import time
14
+ import psutil
15
+ import asyncio
16
+ from typing import Dict, List, Any, Optional
17
+ from datetime import datetime
18
+ from pydantic import BaseModel
19
+ import sys
20
+ import os
21
+
22
+ # Add parent directory to path for BuilderBrain imports
23
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
24
+
25
+ # Pydantic models for request/response
26
+ class InferenceRequest(BaseModel):
27
+ prompt: str
28
+ model_scale: str = "small"
29
+ grammar_strict: bool = True
30
+ max_tokens: int = 100
31
+
32
+ class GrammarPreviewRequest(BaseModel):
33
+ text: str
34
+ grammar_type: str = "json"
35
+
36
+ class PlanValidationRequest(BaseModel):
37
+ nodes: List[Dict[str, Any]]
38
+ edges: List[Dict[str, Any]]
39
+
40
+ class ModelExportRequest(BaseModel):
41
+ scale: str
42
+ format: str = "hf"
43
+
44
+ class ModelScaleRequest(BaseModel):
45
+ scale: str
46
+
47
+ # Global state for mock responses (in production, this would connect to actual BuilderBrain)
48
+ class MockBuilderBrainState:
49
+ def __init__(self):
50
+ self.current_scale = "small"
51
+ self.grammar_enabled = True
52
+ self.plan_validation_enabled = True
53
+ self.training_active = False
54
+ self.current_step = 1500
55
+ self.total_loss = 2.34
56
+
57
+ # Mock training history
58
+ self.training_history = {
59
+ 'total_loss': [5.0, 4.5, 3.8, 3.2, 2.8, 2.5, 2.3, 2.34, 2.32, 2.31],
60
+ 'task_loss': [4.8, 4.2, 3.5, 2.9, 2.5, 2.2, 2.0, 2.1, 2.05, 2.03],
61
+ 'constraint_losses': {
62
+ 'grammar': [0.2, 0.18, 0.15, 0.12, 0.1, 0.08, 0.06, 0.05, 0.04, 0.035],
63
+ 'graph2graph': [0.15, 0.12, 0.1, 0.08, 0.06, 0.05, 0.04, 0.03, 0.025, 0.02],
64
+ 'reuse': [0.05, 0.04, 0.035, 0.03, 0.025, 0.02, 0.015, 0.01, 0.008, 0.006]
65
+ },
66
+ 'dual_variables': {
67
+ 'grammar': [1.5, 1.3, 1.2, 1.1, 1.0, 0.9, 0.8, 0.75, 0.7, 0.65],
68
+ 'graph2graph': [1.2, 1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.45, 0.4, 0.35],
69
+ 'reuse': [0.8, 0.7, 0.6, 0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2]
70
+ }
71
+ }
72
+
73
+ # Initialize state
74
+ brain_state = MockBuilderBrainState()
75
+
76
+ # Create FastAPI app
77
+ app = FastAPI(
78
+ title="BuilderBrain Inference API",
79
+ description="REST API for BuilderBrain model inference and monitoring",
80
+ version="1.0.0"
81
+ )
82
+
83
+ # Add CORS middleware
84
+ app.add_middleware(
85
+ CORSMiddleware,
86
+ allow_origins=["*"], # In production, specify allowed origins
87
+ allow_credentials=True,
88
+ allow_methods=["*"],
89
+ allow_headers=["*"],
90
+ )
91
+
92
+ @app.get("/health")
93
+ async def health_check():
94
+ """Health check endpoint."""
95
+ return {"status": "healthy", "timestamp": datetime.now().isoformat()}
96
+
97
+ @app.get("/model/status")
98
+ async def get_model_status():
99
+ """Get current model status."""
100
+ return {
101
+ "model_scale": brain_state.current_scale,
102
+ "status": "ready",
103
+ "grammar_enabled": brain_state.grammar_enabled,
104
+ "plan_validation_enabled": brain_state.plan_validation_enabled,
105
+ "last_training": "2024-01-15T10:30:00Z"
106
+ }
107
+
108
+ @app.post("/inference/generate")
109
+ async def run_inference(request: InferenceRequest):
110
+ """Run inference with the specified model."""
111
+ # Simulate processing time
112
+ await asyncio.sleep(0.1)
113
+
114
+ # Mock response generation
115
+ prompt_words = len(request.prompt.split())
116
+ response_text = f"Mock response to: {request.prompt[:50]}..."
117
+
118
+ if request.grammar_strict:
119
+ response_text = '{"response": "Properly formatted JSON response"}'
120
+
121
+ return {
122
+ "prompt": request.prompt,
123
+ "response": response_text,
124
+ "model_scale": request.model_scale,
125
+ "grammar_strict": request.grammar_strict,
126
+ "tokens_generated": prompt_words + 20,
127
+ "processing_time": 0.1,
128
+ "grammar_violations": 0 if request.grammar_strict else 2,
129
+ "timestamp": datetime.now().isoformat()
130
+ }
131
+
132
+ @app.get("/grammar/constraints")
133
+ async def get_grammar_constraints():
134
+ """Get available grammar constraints."""
135
+ return {
136
+ "available_grammars": ["json", "api", "robot_dsl", "phone_flow"],
137
+ "strict_modes": ["json", "api", "robot_dsl"],
138
+ "flexible_modes": ["phone_flow"]
139
+ }
140
+
141
+ @app.post("/grammar/preview")
142
+ async def get_grammar_preview(request: GrammarPreviewRequest):
143
+ """Preview how text would be constrained by grammar."""
144
+ await asyncio.sleep(0.02) # Simulate processing
145
+
146
+ return {
147
+ "original_text": request.text,
148
+ "constrained_text": request.text, # Mock constraint
149
+ "violations": [],
150
+ "suggestions": ["Consider using proper JSON formatting"]
151
+ }
152
+
153
+ @app.post("/plans/validate")
154
+ async def validate_plan(request: PlanValidationRequest):
155
+ """Validate a plan DAG against current schema."""
156
+ await asyncio.sleep(0.05) # Simulate validation time
157
+
158
+ return {
159
+ "valid": True,
160
+ "validation_time": 0.05,
161
+ "errors": [],
162
+ "warnings": ["Consider adding more preconditions for safety"]
163
+ }
164
+
165
+ @app.post("/plans/preview")
166
+ async def get_plan_execution_preview(request: PlanValidationRequest):
167
+ """Preview plan execution without actually running it."""
168
+ await asyncio.sleep(0.03)
169
+
170
+ return {
171
+ "estimated_execution_time": 2.5,
172
+ "resource_requirements": {"cpu": 0.3, "memory": 0.2},
173
+ "risk_assessment": "low",
174
+ "optimization_suggestions": ["Consider parallelizing independent steps"]
175
+ }
176
+
177
+ @app.get("/training/metrics")
178
+ async def get_training_metrics():
179
+ """Get current training metrics from active trainer."""
180
+ return {
181
+ "current_step": brain_state.current_step,
182
+ "total_loss": brain_state.total_loss,
183
+ "task_loss": brain_state.training_history['task_loss'][-1],
184
+ "constraint_losses": {
185
+ k: v[-1] for k, v in brain_state.training_history['constraint_losses'].items()
186
+ },
187
+ "dual_variables": {
188
+ k: v[-1] for k, v in brain_state.training_history['dual_variables'].items()
189
+ },
190
+ "timestamp": datetime.now().isoformat()
191
+ }
192
+
193
+ @app.get("/constraints/metrics")
194
+ async def get_constraint_metrics():
195
+ """Get constraint satisfaction metrics."""
196
+ return {
197
+ "grammar_compliance_rate": 0.95,
198
+ "plan_execution_success_rate": 0.88,
199
+ "constraint_violation_rate": 0.02,
200
+ "safety_energy": 0.05,
201
+ "timestamp": datetime.now().isoformat()
202
+ }
203
+
204
+ @app.get("/system/metrics")
205
+ async def get_system_metrics():
206
+ """Get system performance metrics."""
207
+ cpu_percent = psutil.cpu_percent(interval=1)
208
+ memory = psutil.virtual_memory()
209
+ disk = psutil.disk_usage('/')
210
+
211
+ return {
212
+ "cpu_percent": cpu_percent,
213
+ "memory_percent": memory.percent,
214
+ "memory_used_gb": memory.used / (1024**3),
215
+ "memory_available_gb": memory.available / (1024**3),
216
+ "disk_percent": disk.percent,
217
+ "disk_used_gb": disk.used / (1024**3),
218
+ "disk_free_gb": disk.free / (1024**3),
219
+ "active_processes": len(psutil.pids()),
220
+ "timestamp": datetime.now().isoformat()
221
+ }
222
+
223
+ @app.get("/models/scales")
224
+ async def get_model_scales():
225
+ """Get available model scales."""
226
+ return {"scales": ["tiny", "small", "production"]}
227
+
228
+ @app.post("/models/scale")
229
+ async def set_model_scale(request: ModelScaleRequest):
230
+ """Set the active model scale."""
231
+ brain_state.current_scale = request.scale
232
+ return {"status": "success", "scale": request.scale}
233
+
234
+ @app.post("/models/export")
235
+ async def export_model(request: ModelExportRequest):
236
+ """Export model in specified format."""
237
+ await asyncio.sleep(2.0) # Simulate export time
238
+
239
+ return {
240
+ "export_id": f"export_{request.scale}_{int(time.time())}",
241
+ "status": "completed",
242
+ "download_url": f"/mock/download/{request.scale}",
243
+ "file_size": "1.2GB"
244
+ }
245
+
246
+ @app.get("/exports/{export_id}")
247
+ async def get_export_status(export_id: str):
248
+ """Check status of model export."""
249
+ return {
250
+ "export_id": export_id,
251
+ "status": "completed",
252
+ "progress": 100,
253
+ "download_url": f"/mock/download/{export_id}"
254
+ }
255
+
256
+ # Background task for simulating training
257
+ async def simulate_training():
258
+ """Background task to simulate ongoing training."""
259
+ while True:
260
+ if brain_state.training_active:
261
+ # Update training metrics
262
+ brain_state.current_step += 1
263
+
264
+ # Simulate loss improvement
265
+ improvement_factor = 0.999
266
+ brain_state.total_loss *= improvement_factor
267
+
268
+ # Update history
269
+ if len(brain_state.training_history['total_loss']) >= 10:
270
+ brain_state.training_history['total_loss'].pop(0)
271
+ brain_state.training_history['task_loss'].pop(0)
272
+ for k in brain_state.training_history['constraint_losses']:
273
+ brain_state.training_history['constraint_losses'][k].pop(0)
274
+ for k in brain_state.training_history['dual_variables']:
275
+ brain_state.training_history['dual_variables'][k].pop(0)
276
+
277
+ brain_state.training_history['total_loss'].append(brain_state.total_loss)
278
+ brain_state.training_history['task_loss'].append(brain_state.total_loss * 0.85)
279
+
280
+ for k in brain_state.training_history['constraint_losses']:
281
+ brain_state.training_history['constraint_losses'][k].append(
282
+ brain_state.training_history['constraint_losses'][k][-1] * improvement_factor
283
+ )
284
+ for k in brain_state.training_history['dual_variables']:
285
+ brain_state.training_history['dual_variables'][k].append(
286
+ brain_state.training_history['dual_variables'][k][-1] * 0.995
287
+ )
288
+
289
+ await asyncio.sleep(1.0) # Update every second
290
+
291
+ @app.on_event("startup")
292
+ async def startup_event():
293
+ """Initialize background tasks on startup."""
294
+ asyncio.create_task(simulate_training())
295
+
296
+ @app.post("/training/start")
297
+ async def start_training():
298
+ """Start training simulation."""
299
+ brain_state.training_active = True
300
+ return {"status": "training_started"}
301
+
302
+ @app.post("/training/stop")
303
+ async def stop_training():
304
+ """Stop training simulation."""
305
+ brain_state.training_active = False
306
+ return {"status": "training_stopped"}
307
+
308
+ @app.get("/training/status")
309
+ async def get_training_status():
310
+ """Get current training status."""
311
+ return {
312
+ "active": brain_state.training_active,
313
+ "current_step": brain_state.current_step,
314
+ "total_loss": brain_state.total_loss,
315
+ "timestamp": datetime.now().isoformat()
316
+ }
317
+
318
+ if __name__ == "__main__":
319
+ uvicorn.run(
320
+ "app:app",
321
+ host="0.0.0.0",
322
+ port=8000,
323
+ reload=True,
324
+ log_level="info"
325
+ )