Spaces:
Runtime error
Runtime error
| """Local LLM integration for the reasoning system.""" | |
| import os | |
| from typing import Dict, Any, Optional | |
| from datetime import datetime | |
| import logging | |
| from llama_cpp import Llama | |
| import huggingface_hub | |
| from .base import ReasoningStrategy | |
| class LocalLLMStrategy(ReasoningStrategy): | |
| """Implements reasoning using local LLM.""" | |
| def __init__(self): | |
| """Initialize the local LLM strategy.""" | |
| self.repo_id = "tensorblock/Llama-3.2-3B-Overthinker-GGUF" | |
| self.filename = "Llama-3.2-3B-Overthinker-Q8_0.gguf" | |
| self.model_dir = "models" | |
| self.logger = logging.getLogger(__name__) | |
| self.model = None | |
| async def initialize(self): | |
| """Initialize the model.""" | |
| try: | |
| # Create models directory if it doesn't exist | |
| os.makedirs(self.model_dir, exist_ok=True) | |
| model_path = os.path.join(self.model_dir, self.filename) | |
| # Download model if it doesn't exist | |
| if not os.path.exists(model_path): | |
| self.logger.info(f"Downloading model to {model_path}...") | |
| model_path = huggingface_hub.hf_hub_download( | |
| repo_id=self.repo_id, | |
| filename=self.filename, | |
| repo_type="model", | |
| local_dir=self.model_dir, | |
| local_dir_use_symlinks=False | |
| ) | |
| self.logger.info("Model downloaded successfully!") | |
| else: | |
| self.logger.info("Using existing model file...") | |
| # Try to use GPU, fall back to CPU if not available | |
| try: | |
| self.model = Llama( | |
| model_path=model_path, | |
| n_ctx=4096, | |
| n_batch=512, | |
| n_threads=8, | |
| n_gpu_layers=35 | |
| ) | |
| self.logger.info("Model loaded with GPU acceleration!") | |
| except Exception as e: | |
| self.logger.warning(f"GPU loading failed: {e}, falling back to CPU...") | |
| self.model = Llama( | |
| model_path=model_path, | |
| n_ctx=2048, | |
| n_batch=512, | |
| n_threads=4, | |
| n_gpu_layers=0 | |
| ) | |
| self.logger.info("Model loaded in CPU-only mode") | |
| except Exception as e: | |
| self.logger.error(f"Error initializing model: {e}") | |
| raise | |
| async def reason(self, query: str, context: Dict[str, Any]) -> Dict[str, Any]: | |
| """Generate reasoning response using local LLM.""" | |
| try: | |
| if not self.model: | |
| await self.initialize() | |
| # Format prompt with context | |
| prompt = self._format_prompt(query, context) | |
| # Generate response | |
| response = self.model( | |
| prompt, | |
| max_tokens=1024 if self.model.n_ctx >= 4096 else 512, | |
| temperature=0.7, | |
| top_p=0.95, | |
| repeat_penalty=1.1, | |
| echo=False | |
| ) | |
| # Extract and structure the response | |
| result = self._parse_response(response['choices'][0]['text']) | |
| return { | |
| 'success': True, | |
| 'answer': result['answer'], | |
| 'reasoning': result['reasoning'], | |
| 'confidence': result['confidence'], | |
| 'timestamp': datetime.now(), | |
| 'metadata': { | |
| 'model': self.repo_id, | |
| 'strategy': 'local_llm', | |
| 'context_length': len(prompt), | |
| 'response_length': len(response['choices'][0]['text']) | |
| } | |
| } | |
| except Exception as e: | |
| self.logger.error(f"Error in reasoning: {e}") | |
| return { | |
| 'success': False, | |
| 'error': str(e), | |
| 'timestamp': datetime.now() | |
| } | |
| def _format_prompt(self, query: str, context: Dict[str, Any]) -> str: | |
| """Format the prompt with query and context.""" | |
| # Include relevant context | |
| context_str = "\n".join([ | |
| f"{k}: {v}" for k, v in context.items() | |
| if k in ['objective', 'constraints', 'background'] | |
| ]) | |
| return f"""Let's solve this problem step by step. | |
| Context: | |
| {context_str} | |
| Question: {query} | |
| Let me break this down: | |
| 1.""" | |
| def _parse_response(self, text: str) -> Dict[str, Any]: | |
| """Parse the response into structured output.""" | |
| # Simple parsing for now | |
| lines = text.strip().split('\n') | |
| return { | |
| 'answer': lines[-1] if lines else '', | |
| 'reasoning': '\n'.join(lines[:-1]) if len(lines) > 1 else '', | |
| 'confidence': 0.8 # Default confidence | |
| } | |