Spaces:

MegaTronX
/

CodeZipper

Build error

App Files Files Community

CodeZipper / app.py

MegaTronX

Upload 4 files

b7082b3 verified about 1 month ago

raw

history blame

7.37 kB

	\
	import os
	import re
	import json
	import tempfile
	import zipfile
	import gradio as gr
	from huggingface_hub import hf_hub_download

	# ---- LLM: llama.cpp via llama_cpp_agent ----
	from llama_cpp import Llama
	from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
	from llama_cpp_agent.providers import LlamaCppPythonProvider

	# ----------------------
	# Model configuration
	# ----------------------
	# You can change these defaults in the UI
	DEFAULT_REPO_ID = "tHottie/NeuralDaredevil-8B-abliterated-Q4_K_M-GGUF"
	DEFAULT_FILENAME = "neuraldaredevil-8b-abliterated-q4_k_m-imat.gguf"
	MODELS_DIR = "models"

	os.makedirs(MODELS_DIR, exist_ok=True)

	def ensure_model(repo_id: str, filename: str) -> str:
	local_path = os.path.join(MODELS_DIR, filename)
	if not os.path.exists(local_path):
	hf_hub_download(repo_id=repo_id, filename=filename, local_dir=MODELS_DIR)
	return local_path

	def build_agent(model_path: str, n_ctx=8192, n_gpu_layers=81, n_batch=1024, flash_attn=True):
	llm = Llama(
	model_path=model_path,
	n_ctx=n_ctx,
	n_gpu_layers=n_gpu_layers,
	n_batch=n_batch,
	flash_attn=flash_attn,
	)
	provider = LlamaCppPythonProvider(llm)
	agent = LlamaCppAgent(
	provider,
	system_prompt=(
	"You are an expert code-packager and software project compiler. "
	"Given an AI-generated project description containing code blocks and hints "
	"about filenames or structure, extract each file with its most likely filename "
	"and exact code content. Return ONLY a strict JSON array named manifest, "
	"where each element is an object with keys 'filename' and 'content'. "
	"Do not add commentary outside JSON. "
	"Ensure filenames include directories if implied (e.g., 'src/main.py'). "
	"Preserve code exactly as provided inside code fences."
	),
	predefined_messages_formatter_type=MessagesFormatterType.GEMMA_2,
	debug_output=False
	)
	return agent, provider

	JSON_FALLBACK_NAME = "project.txt"

	def call_llm_manifest(agent, provider, text, temperature=0.2, top_p=0.9, top_k=40, repeat_penalty=1.1, max_tokens=2048):
	# Instruction prompt. Model must respond with STRICT JSON.
	prompt = f"""
	Read the following AI project description and return ONLY JSON.
	Output schema (strict):
	[{{"filename": "server.js", "content": "// code..."}}]

	AI project description:
	{text}
	"""
	settings = provider.get_provider_default_settings()
	settings.temperature = float(temperature)
	settings.top_p = float(top_p)
	settings.top_k = int(top_k)
	settings.repeat_penalty = float(repeat_penalty)
	settings.max_tokens = int(max_tokens)
	settings.stream = False

	out = agent.get_chat_response(prompt, llm_sampling_settings=settings, print_output=False)
	# Try to extract a JSON array from the output robustly
	json_text = None
	try:
	# Prefer the largest bracketed array slice
	start = out.find('[')
	end = out.rfind(']')
	if start != -1 and end != -1 and end > start:
	json_text = out[start:end+1]
	manifest = json.loads(json_text)
	else:
	raise ValueError("No JSON array found")
	except Exception:
	# Fallback: single-file package of raw output for transparency
	manifest = [ {"filename": JSON_FALLBACK_NAME, "content": out} ]
	return manifest

	def naive_regex_merge(text):
	"""
	Heuristic backup that maps code fences to probable filenames by scanning nearby lines.
	This runs only when the model output is a single fallback file OR user ticks 'Force Heuristic Merge'.
	"""
	blocks = []
	# Find all triple-backtick code blocks
	code_pattern = re.compile(r"```([a-zA-Z0-9])\n(.?)```", re.DOTALL)
	# Find filename candidates in preceding lines such as '### STEP: server.js' or '`server.js`'
	lines = text.splitlines()
	candidates = []
	for i, line in enumerate(lines):
	m = re.search(r"([A-Za-z0-9_\\-./]+?\\.[A-Za-z0-9]+)", line)
	if m:
	candidates.append(m.group(1))

	for idx, m in enumerate(code_pattern.finditer(text)):
	lang = m.group(1) or "txt"
	code = m.group(2)
	filename = candidates[idx] if idx < len(candidates) else f"file_{idx+1}.{lang}"
	blocks.append({"filename": filename, "content": code})
	return blocks

	def create_zip_from_manifest(manifest):
	temp_dir = tempfile.mkdtemp()
	zip_path = os.path.join(temp_dir, "project.zip")
	with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
	for item in manifest:
	fname = item.get("filename", JSON_FALLBACK_NAME).lstrip("/")
	content = item.get("content", "")
	fpath = os.path.join(temp_dir, fname)
	os.makedirs(os.path.dirname(fpath), exist_ok=True)
	with open(fpath, "w", encoding="utf-8") as f:
	f.write(content)
	z.write(fpath, arcname=fname)
	return zip_path

	def package_with_llm(ai_text, repo_id, filename, temperature, top_p, top_k, repeat_penalty, max_tokens, force_heuristic):
	model_path = ensure_model(repo_id, filename)
	agent, provider = build_agent(model_path=model_path)

	manifest = call_llm_manifest(
	agent, provider, ai_text,
	temperature=temperature, top_p=top_p, top_k=top_k,
	repeat_penalty=repeat_penalty, max_tokens=max_tokens
	)

	# If model failed to JSON-ify properly (single fallback) or user forces merge, try heuristic merge
	if force_heuristic or (len(manifest) == 1 and manifest[0]["filename"] == JSON_FALLBACK_NAME):
	heuristic = naive_regex_merge(ai_text)
	if heuristic:
	manifest = heuristic

	zip_path = create_zip_from_manifest(manifest)
	return zip_path

	with gr.Blocks(title="AI Project Packager (GGUF, llama.cpp)") as demo:
	gr.Markdown("# AI Project Packager (GGUF, llama.cpp)")
	gr.Markdown("Paste an AI-generated multi-file project description. A local GGUF model will infer filenames and contents, then return a downloadable ZIP.")

	with gr.Row():
	ai_text = gr.Textbox(lines=24, label="Paste AI response here")

	with gr.Accordion("LLM Settings", open=False):
	repo_id = gr.Textbox(value=DEFAULT_REPO_ID, label="Model repo_id")
	filename = gr.Textbox(value=DEFAULT_FILENAME, label="Model filename (*.gguf)")
	with gr.Row():
	temperature = gr.Slider(0.0, 2.0, value=0.2, step=0.05, label="Temperature")
	top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
	top_k = gr.Slider(0, 100, value=40, step=1, label="Top-k")
	with gr.Row():
	repeat_penalty = gr.Slider(0.8, 2.0, value=1.1, step=0.05, label="Repeat penalty")
	max_tokens = gr.Slider(256, 4096, value=2048, step=32, label="Max tokens")
	force_heuristic = gr.Checkbox(value=False, label="Force heuristic filename/code merge if JSON parse fails")

	out_zip = gr.File(label="Download packaged ZIP")

	run_btn = gr.Button("Package Project", variant="primary")
	run_btn.click(
	fn=package_with_llm,
	inputs=[ai_text, repo_id, filename, temperature, top_p, top_k, repeat_penalty, max_tokens, force_heuristic],
	outputs=[out_zip]
	)

	if __name__ == "__main__":
	demo.launch()