# Install required dependencies import subprocess import sys def install_packages(): packages = ["sentencepiece", "protobuf", "transformers", "torch", "accelerate"] for package in packages: try: __import__(package) except ImportError: print(f"Installing {package}...") subprocess.check_call([sys.executable, "-m", "pip", "install", package]) install_packages() from fastapi import FastAPI, HTTPException from pydantic import BaseModel import uvicorn import torch from transformers import AutoTokenizer, AutoModelForCausalLM import time from fastapi.middleware.cors import CORSMiddleware # Initialize FastAPI app app = FastAPI( title="YAH Tech AI API", description="AI Assistant API for testing", version="1.0.0" ) # Add CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) class YAHBot: def __init__(self): self.repo_id = "Adedoyinjames/brain-ai" self.tokenizer = None self.model = None self._load_model() def _load_model(self): """Load the model from your Hugging Face repo""" try: print(f"🔄 Loading AI model from {self.repo_id}...") self.tokenizer = AutoTokenizer.from_pretrained( self.repo_id, trust_remote_code=True # Required for phi-3 ) self.model = AutoModelForCausalLM.from_pretrained( self.repo_id, trust_remote_code=True, # Required for phi-3 torch_dtype=torch.float16, device_map="auto" ) print("✅ AI model loaded successfully from HF repo!") except Exception as e: print(f"❌ Failed to load AI model from repo: {e}") self.model = None self.tokenizer = None def generate_response(self, user_input): """Generate response using causal language model""" if self.model and self.tokenizer: try: # Format prompt for phi-3 (causal LM) prompt = f"<|user|>\n{user_input}<|end|>\n<|assistant|>\n" inputs = self.tokenizer( prompt, return_tensors="pt", max_length=512, truncation=True, padding=True ) # Move to same device as model device = next(self.model.parameters()).device inputs = {k: v.to(device) for k, v in inputs.items()} with torch.no_grad(): outputs = self.model.generate( inputs.input_ids, max_new_tokens=150, num_return_sequences=1, temperature=0.7, do_sample=True, pad_token_id=self.tokenizer.eos_token_id, # Use EOS token for padding eos_token_id=self.tokenizer.eos_token_id, ) response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) # Remove the prompt from the response for cleaner output if prompt in response: response = response.replace(prompt, "").strip() return response except Exception as e: print(f"Model error: {str(e)}") return "I apologize, but I'm having trouble processing your question right now." return "AI model is not available." # Initialize the bot globally yah_bot = YAHBot() # Request/Response models class ChatRequest(BaseModel): message: str class ChatResponse(BaseModel): response: str status: str timestamp: float class HealthResponse(BaseModel): status: str service: str timestamp: float # API Endpoints @app.get("/") async def root(): return { "message": "YAH Tech AI API is running", "status": "active", "model_repo": yah_bot.repo_id, "model_type": "causal_lm", "endpoints": { "chat": "POST /api/chat", "health": "GET /api/health" } } @app.post("/api/chat", response_model=ChatResponse) async def chat_endpoint(request: ChatRequest): """ Main chat endpoint - Send a message and get AI response """ try: response = yah_bot.generate_response(request.message) return ChatResponse( response=response, status="success", timestamp=time.time() ) except Exception as e: raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}") @app.get("/api/health", response_model=HealthResponse) async def health_check(): return HealthResponse( status="healthy", service="YAH Tech AI API", timestamp=time.time() ) # For Hugging Face Spaces def get_app(): return app if __name__ == "__main__": uvicorn.run( app, host="0.0.0.0", port=7860, log_level="info" )