from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse from pydantic import BaseModel import uvicorn import torch import os # GPU Verification on startup print("=" * 50) print("🚀 OpenManus FastAPI - GPU Verification") print("=" * 50) print(f"Is CUDA available: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f"CUDA device count: {torch.cuda.device_count()}") print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") print(f"CUDA version: {torch.version.cuda}") print(f"PyTorch version: {torch.__version__}") else: print("⚠️ WARNING: CUDA not available - running on CPU") print("=" * 50) app = FastAPI( title="OpenManus FastAPI", description="High-performance FastAPI service with NVIDIA A10G GPU support", version="1.0.0", ) # CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Mount static files app.mount("/static", StaticFiles(directory="static"), name="static") # Request models class TextRequest(BaseModel): text: str max_length: int = 100 class HealthResponse(BaseModel): status: str gpu_available: bool cuda_devices: int device_name: str = None @app.get("/") async def serve_frontend(): """Serve the frontend HTML""" return FileResponse("static/index.html") @app.get("/api", response_model=dict) async def root(): """Root endpoint with API information""" return { "message": "OpenManus FastAPI Service", "version": "1.0.0", "endpoints": {"health": "/health", "gpu_info": "/gpu-info", "docs": "/docs"}, } @app.get("/health", response_model=HealthResponse) async def health_check(): """Health check endpoint with GPU status""" gpu_available = torch.cuda.is_available() cuda_devices = torch.cuda.device_count() if gpu_available else 0 device_name = ( torch.cuda.get_device_name(0) if gpu_available and cuda_devices > 0 else None ) return HealthResponse( status="healthy", gpu_available=gpu_available, cuda_devices=cuda_devices, device_name=device_name, ) @app.get("/gpu-info") async def gpu_info(): """Detailed GPU information""" if not torch.cuda.is_available(): return {"error": "CUDA not available"} info = { "cuda_available": True, "device_count": torch.cuda.device_count(), "devices": [], } for i in range(torch.cuda.device_count()): device_props = torch.cuda.get_device_properties(i) info["devices"].append( { "id": i, "name": torch.cuda.get_device_name(i), "total_memory_gb": round(device_props.total_memory / 1024**3, 2), "major": device_props.major, "minor": device_props.minor, "multi_processor_count": device_props.multi_processor_count, } ) return info @app.post("/process") async def process_text(request: TextRequest): """Example endpoint for text processing""" try: # Example processing logic result = { "input": request.text, "length": len(request.text), "max_length": request.max_length, "processed": request.text.upper(), # Simple transformation "gpu_used": torch.cuda.is_available(), } return result except Exception as e: raise HTTPException(status_code=500, detail=str(e)) if __name__ == "__main__": port = int(os.environ.get("PORT", 7860)) uvicorn.run("main:app", host="0.0.0.0", port=port, reload=False, workers=1)