MegaTronX commited on
Commit
fe0e244
·
verified ·
1 Parent(s): e17fb08

Update model_runner.py

Browse files
Files changed (1) hide show
  1. model_runner.py +36 -18
model_runner.py CHANGED
@@ -1,22 +1,40 @@
1
- import os
2
- from llama_cpp import Llama # assuming llama-cpp-python is used to run GGUF
 
 
 
3
 
4
  class ModelRunner:
5
- def __init__(self, model_path: str):
6
- # Initialize the model once
7
- self.llm = Llama(model_path=model_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- def interpret_code_description(self, prompt: str) -> str:
10
- """
11
- Given the pasted AI response (prompt), return the model’s parsing output
12
- (e.g. JSON or a DSL).
13
- """
14
- # You may want to wrap the prompt with instructions
15
- wrapped = (
16
- "You are a code parser. The following is a description of multiple files with code. "
17
- "Produce as output a JSON object mapping filenames to file contents. "
18
- "If something is ambiguous, try your best.\n\n"
19
- + prompt
20
  )
21
- resp = self.llm(prompt=wrapped, max_tokens=2048)
22
- return resp["choices"][0]["text"]
 
 
 
 
 
1
+ import json
2
+ from huggingface_hub import hf_hub_download
3
+ from llama_cpp import Llama
4
+ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
5
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
6
 
7
  class ModelRunner:
8
+ def __init__(self, repo_id: str, filename: str):
9
+ model_path = hf_hub_download(
10
+ repo_id=repo_id,
11
+ filename=filename,
12
+ local_dir="./models"
13
+ )
14
+ self.llm = Llama(
15
+ model_path=model_path,
16
+ flash_attn=True,
17
+ n_gpu_layers=81,
18
+ n_batch=1024,
19
+ n_ctx=8192,
20
+ )
21
+ self.provider = LlamaCppPythonProvider(self.llm)
22
+ self.agent = LlamaCppAgent(
23
+ self.provider,
24
+ system_prompt="You are an AI developer. Given a codegen output, return a JSON mapping of filenames to file contents.",
25
+ predefined_messages_formatter_type=MessagesFormatterType.GEMMA_2,
26
+ debug_output=False
27
+ )
28
 
29
+ def interpret_code_description(self, pasted_text: str) -> str:
30
+ user_prompt = (
31
+ "Here is an AI output that includes multiple code files. "
32
+ "Please convert it into a JSON object like {\"file1.py\": \"...code...\", \"file2.txt\": \"...\"}\n\n"
33
+ f"{pasted_text}"
 
 
 
 
 
 
34
  )
35
+
36
+ output = ""
37
+ for chunk in self.agent.get_chat_response(user_prompt, returns_streaming_generator=True):
38
+ output += chunk
39
+
40
+ return output