Spaces:

thebigoed
/

LLMTesting

Sleeping

App Files Files Community

LLMTesting / scripts /finetune.py

thebigoed

updated app

c41146d about 1 year ago

raw

history blame contribute delete

3.31 kB

	# run as a module using: python3 -m scripts.finetune

	# Using: https://huggingface.co/blog/mlabonne/sft-llama3

	import torch
	from trl import SFTTrainer
	from datasets import load_dataset
	from transformers import TrainingArguments, TextStreamer
	from unsloth.chat_templates import get_chat_template
	from unsloth import FastLanguageModel, is_bfloat16_supported

	from data.fine_tune_dataset import load_data

	def finetune(model="unsloth/Meta-Llama-3.1-8B-bnb-4bit", dataset="mlabonne/FineTome-100k"):

	hf_token = ""

	# Loading the model and restricting context window
	max_seq_length = 2048
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name=model,
	max_seq_length=max_seq_length,
	load_in_4bit=True,
	dtype=None,
	)

	# Loading prepared dataset
	dataset = load_data(dataset, tokenizer)

	# Loading the model for fine tuning - only set to FT 42million/8billion parameters
	model = FastLanguageModel.get_peft_model(
	model,
	r=16, # rank determines LoRA (Low rank adaptation - freezing much of the model for fine tuning) matrix size, higher increases memory and compute cost
	lora_alpha=16, # scaling factor for updates
	lora_dropout=0, # not used for speedup
	target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"], # where LoRA targets
	use_rslora=True, # rank stabilised
	use_gradient_checkpointing="unsloth"
	)

	# Saving the untrained model, save_method can be lora to only save adapters or merged (16 or 4 bit)
	model.save_pretrained_merged("models/PreFineLlama-3.1-8B", tokenizer, save_method="merged_16bit") # save to models directory locally
	model.push_to_hub_merged("thebigoed/PreFineLlama-3.1-8B", tokenizer, token=hf_token, save_method="merged_16bit")

	trainer=SFTTrainer(
	model=model,
	tokenizer=tokenizer,
	train_dataset=dataset,
	dataset_text_field="text",
	max_seq_length=max_seq_length,
	dataset_num_proc=2,
	packing=True,
	args=TrainingArguments(
	learning_rate=3e-4, # to low = slow and local minima, too high = unstable
	lr_scheduler_type="linear", # adjusts the learning rate (linear and cosine are most popular)
	per_device_train_batch_size=8,
	gradient_accumulation_steps=2,
	num_train_epochs=1,
	fp16=not is_bfloat16_supported(),
	bf16=is_bfloat16_supported(),
	logging_steps=1,
	optim="adamw_8bit",
	weight_decay=0.01,
	warmup_steps=10,
	output_dir="output",
	seed=0,
	),
	)

	trainer.train()

	# Saving the model, save_method can be lora to only save adapters or merged (16 or 4 bit)
	model.save_pretrained_merged("models/FineLlama-3.1-8B", tokenizer, save_method="merged_16bit") # save to models directory locally
	model.push_to_hub_merged("thebigoed/FineLlama-3.1-8B", tokenizer, token=hf_token, save_method="merged_16bit")

	# Use to save in GGUF quantised format
	# quant_methods = ["q2_k", "q3_k_m", "q4_k_m", "q5_k_m", "q6_k", "q8_0"]
	# for quant in quant_methods:
	# model.push_to_hub_gguf("", tokenizer, quant)

	return

	if __name__ == "__main__":
	finetune()