Spaces:

ThongCoder
/

minecraft-ai-builder-backend

Paused

96e3318 5 months ago

1.34 kB

	import os
	import requests
	from llama_cpp import Llama

	HF_TOKEN = os.getenv("HF_TOKEN")
	MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
	MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q2_K.gguf"
	MODEL_PATH = f"./models/{MODEL_FILENAME}"

	# Manual download with fallback
	if not os.path.exists(MODEL_PATH):
	print("📦 Downloading GGUF model manually from Hugging Face...")

	url = f"https://huggingface.co/{MODEL_REPO}/resolve/main/{MODEL_FILENAME}"
	headers = {"Authorization": f"Bearer {HF_TOKEN}"}

	os.makedirs("./models", exist_ok=True)
	with requests.get(url, headers=headers, stream=True) as r:
	r.raise_for_status()
	with open(MODEL_PATH, "wb") as f:
	for chunk in r.iter_content(chunk_size=8192):
	f.write(chunk)

	print(f"✅ Model downloaded to {MODEL_PATH}")

	# Load with llama-cpp
	llm = Llama(
	model_path="./models/tinyllama-1.1b-chat-v1.0.Q2_K.gguf",
	n_ctx=128, # Limit context for smaller RAM/CPU
	n_batch=32,
	n_threads=2, # Use 2 threads (you can try 1 if needed)
	n_gpu_layers=0, # CPU-only
	chat_format="llama-2"
	)

	def generate_structure(prompt: str) -> str:
	output = llm.create_completion(
	prompt=prompt,
	temperature=0.7,
	max_tokens=512,
	)
	return output["choices"][0]["text"].strip()