Spaces:
Paused
Paused
| import os | |
| import torch | |
| from transformers import Trainer, TrainingArguments | |
| from datasets import load_dataset | |
| import subprocess | |
| # Install required packages | |
| subprocess.run("pip install git+https://github.com/canopyai/Orpheus-TTS.git", shell=True) | |
| subprocess.run("pip install orpheus-speech vllm==0.7.3", shell=True) | |
| # Load the dataset | |
| dataset = load_dataset("Emotional_Speech_Dataset_(ESD)") | |
| # Get the model | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| model = AutoModelForCausalLM.from_pretrained("canopylabs/orpheus-3b-0.1-pretrained") | |
| tokenizer = AutoTokenizer.from_pretrained("canopylabs/orpheus-3b-0.1-pretrained") | |
| # Setup training arguments | |
| training_args = TrainingArguments( | |
| output_dir="./orpheus-finetuned", | |
| per_device_train_batch_size=2, | |
| gradient_accumulation_steps=4, | |
| learning_rate=5e-5, | |
| num_train_epochs=3, | |
| save_strategy="steps", | |
| save_steps=500, | |
| ) | |
| # Start training | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=dataset, | |
| tokenizer=tokenizer, | |
| ) | |
| trainer.train() | |
| # Save the model | |
| model.save_pretrained("./orpheus-finetuned-model") | |
| tokenizer.save_pretrained("./orpheus-finetuned-model") |