| wandb_config: | |
| wandb_project: "llm_dialog_summarizer_faster" | |
| run_name: SmolLM2-360M-Instruct-large-R | |
| model_config: | |
| model_id: "HuggingFaceTB/SmolLM2-360M-Instruct" | |
| load_in_4bit: False | |
| max_seq_length: 8192 | |
| lora_config: | |
| r: 64 | |
| lora_alpha: 64 | |
| use_rslora: True | |
| sft_config: | |
| learning_rate: 0.0003 | |
| epochs: 2 | |
| optimizer: "adamw_8bit" | |
| warmup_steps: 100 | |
| weight_decay: 0.01 | |
| lr_scheduler_type: "linear" | |
| seed: 90201 | |
| dataset_text_field: "text" |