import json from huggingface_hub import hf_hub_download from llama_cpp import Llama from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType from llama_cpp_agent.providers import LlamaCppPythonProvider class ModelRunner: def __init__(self, repo_id: str, filename: str): model_path = hf_hub_download( repo_id=repo_id, filename=filename, local_dir="./models" ) self.llm = Llama( model_path=model_path, n_gpu_layers=0, # ⛔ No GPU n_batch=512, # safe default for CPU n_ctx=4096 # lower context for CPU environments ) self.provider = LlamaCppPythonProvider(self.llm) self.agent = LlamaCppAgent( self.provider, system_prompt="You are an AI developer. Given a codegen output, return a JSON mapping of filenames to file contents.", predefined_messages_formatter_type=MessagesFormatterType.GEMMA_2, debug_output=False ) def interpret_code_description(self, pasted_text: str) -> str: user_prompt = ( "Here is an AI output that includes multiple code files. " "Please convert it into a JSON object like {\"file1.py\": \"...code...\", \"file2.txt\": \"...\"}\n\n" f"{pasted_text}" ) output = "" for chunk in self.agent.get_chat_response(user_prompt, returns_streaming_generator=True): output += chunk return output