Spaces:

nananie143
/

advanced-reasoning

Runtime error

App Files Files Community

advanced-reasoning / reasoning /local_llm.py

nananie143

Upload folder using huggingface_hub

dcb2a99 verified 11 months ago

raw

history blame

4.97 kB

	"""Local LLM integration for the reasoning system."""

	import os
	from typing import Dict, Any, Optional
	from datetime import datetime
	import logging
	from llama_cpp import Llama
	import huggingface_hub
	from .base import ReasoningStrategy

	class LocalLLMStrategy(ReasoningStrategy):
	"""Implements reasoning using local LLM."""

	def __init__(self):
	"""Initialize the local LLM strategy."""
	self.repo_id = "tensorblock/Llama-3.2-3B-Overthinker-GGUF"
	self.filename = "Llama-3.2-3B-Overthinker-Q8_0.gguf"
	self.model_dir = "models"
	self.logger = logging.getLogger(__name__)
	self.model = None

	async def initialize(self):
	"""Initialize the model."""
	try:
	# Create models directory if it doesn't exist
	os.makedirs(self.model_dir, exist_ok=True)
	model_path = os.path.join(self.model_dir, self.filename)

	# Download model if it doesn't exist
	if not os.path.exists(model_path):
	self.logger.info(f"Downloading model to {model_path}...")
	model_path = huggingface_hub.hf_hub_download(
	repo_id=self.repo_id,
	filename=self.filename,
	repo_type="model",
	local_dir=self.model_dir,
	local_dir_use_symlinks=False
	)
	self.logger.info("Model downloaded successfully!")
	else:
	self.logger.info("Using existing model file...")

	# Try to use GPU, fall back to CPU if not available
	try:
	self.model = Llama(
	model_path=model_path,
	n_ctx=4096,
	n_batch=512,
	n_threads=8,
	n_gpu_layers=35
	)
	self.logger.info("Model loaded with GPU acceleration!")
	except Exception as e:
	self.logger.warning(f"GPU loading failed: {e}, falling back to CPU...")
	self.model = Llama(
	model_path=model_path,
	n_ctx=2048,
	n_batch=512,
	n_threads=4,
	n_gpu_layers=0
	)
	self.logger.info("Model loaded in CPU-only mode")

	except Exception as e:
	self.logger.error(f"Error initializing model: {e}")
	raise

	async def reason(self, query: str, context: Dict[str, Any]) -> Dict[str, Any]:
	"""Generate reasoning response using local LLM."""
	try:
	if not self.model:
	await self.initialize()

	# Format prompt with context
	prompt = self._format_prompt(query, context)

	# Generate response
	response = self.model(
	prompt,
	max_tokens=1024 if self.model.n_ctx >= 4096 else 512,
	temperature=0.7,
	top_p=0.95,
	repeat_penalty=1.1,
	echo=False
	)

	# Extract and structure the response
	result = self._parse_response(response['choices'][0]['text'])

	return {
	'success': True,
	'answer': result['answer'],
	'reasoning': result['reasoning'],
	'confidence': result['confidence'],
	'timestamp': datetime.now(),
	'metadata': {
	'model': self.repo_id,
	'strategy': 'local_llm',
	'context_length': len(prompt),
	'response_length': len(response['choices'][0]['text'])
	}
	}

	except Exception as e:
	self.logger.error(f"Error in reasoning: {e}")
	return {
	'success': False,
	'error': str(e),
	'timestamp': datetime.now()
	}

	def _format_prompt(self, query: str, context: Dict[str, Any]) -> str:
	"""Format the prompt with query and context."""
	# Include relevant context
	context_str = "\n".join([
	f"{k}: {v}" for k, v in context.items()
	if k in ['objective', 'constraints', 'background']
	])

	return f"""Let's solve this problem step by step.

	Context:
	{context_str}

	Question: {query}

	Let me break this down:
	1."""

	def _parse_response(self, text: str) -> Dict[str, Any]:
	"""Parse the response into structured output."""
	# Simple parsing for now
	lines = text.strip().split('\n')

	return {
	'answer': lines[-1] if lines else '',
	'reasoning': '\n'.join(lines[:-1]) if len(lines) > 1 else '',
	'confidence': 0.8 # Default confidence
	}