Upload train.py with huggingface_hub
Browse files
train.py
CHANGED
|
@@ -61,7 +61,7 @@ model_name = "Qwen/Qwen3-30B-A3B" # You can change this to any model you want t
|
|
| 61 |
# model_name = "Qwen/Qwen3-0.6B"
|
| 62 |
|
| 63 |
# Training configuration
|
| 64 |
-
output_dir = "./
|
| 65 |
num_train_epochs = 1
|
| 66 |
per_device_train_batch_size = 1
|
| 67 |
gradient_accumulation_steps = 1
|
|
@@ -122,7 +122,6 @@ training_args = SFTConfig(
|
|
| 122 |
fp16=False,
|
| 123 |
max_steps=1000,
|
| 124 |
report_to="wandb", # Disable reporting to avoid wandb prompts
|
| 125 |
-
output_dir="./tmp/sft-model",
|
| 126 |
)
|
| 127 |
|
| 128 |
"""## Initialize and run the SFT Trainer"""
|
|
|
|
| 61 |
# model_name = "Qwen/Qwen3-0.6B"
|
| 62 |
|
| 63 |
# Training configuration
|
| 64 |
+
output_dir = "./tmp/sft-model"
|
| 65 |
num_train_epochs = 1
|
| 66 |
per_device_train_batch_size = 1
|
| 67 |
gradient_accumulation_steps = 1
|
|
|
|
| 122 |
fp16=False,
|
| 123 |
max_steps=1000,
|
| 124 |
report_to="wandb", # Disable reporting to avoid wandb prompts
|
|
|
|
| 125 |
)
|
| 126 |
|
| 127 |
"""## Initialize and run the SFT Trainer"""
|