| model_args: | |
| attn_implementation: flash_attention_2 | |
| bnb_4bit_quant_type: nf4 | |
| load_in_4bit: false | |
| load_in_8bit: false | |
| lora_alpha: 32 | |
| lora_dropout: 0.05 | |
| lora_modules_to_save: null | |
| lora_r: 16 | |
| lora_target_modules: null | |
| lora_task_type: CAUSAL_LM | |
| model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct | |
| model_revision: main | |
| torch_dtype: bfloat16 | |
| trust_remote_code: false | |
| use_bnb_nested_quant: false | |
| use_dora: false | |
| use_peft: false | |
| use_rslora: false | |
| script_args: | |
| cosine_max_len: 1000 | |
| cosine_max_value_correct: 1.0 | |
| cosine_max_value_wrong: -0.5 | |
| cosine_min_value_correct: 0.5 | |
| cosine_min_value_wrong: 0.0 | |
| dataset_config: null | |
| dataset_name: simone-papicchio/bird | |
| dataset_test_split: test | |
| dataset_train_split: train | |
| gradient_checkpointing_use_reentrant: false | |
| ignore_bias_buffers: false | |
| reward_funcs: | |
| - qatch_metrics | |
| - format | |
| - tag_count | |
| training_args: | |
| _n_gpu: 1 | |
| accelerator_config: | |
| dispatch_batches: null | |
| even_batches: true | |
| gradient_accumulation_kwargs: null | |
| non_blocking: false | |
| split_batches: false | |
| use_configured_state: false | |
| use_seedable_sampler: true | |
| adafactor: false | |
| adam_beta1: 0.9 | |
| adam_beta2: 0.999 | |
| adam_epsilon: 1.0e-08 | |
| add_system_prompt: true | |
| add_validation: false | |
| auto_find_batch_size: false | |
| average_tokens_across_devices: false | |
| base_db_path: data/bird_train/train_databases | |
| batch_eval_metrics: false | |
| benchmarks: [] | |
| beta: 0.04 | |
| bf16: true | |
| bf16_full_eval: false | |
| cache_implementation: null | |
| cached_file_path: /workspaces/deep_thinking/cache_target_sql2execution_BIRD_train.pkl | |
| callbacks: {} | |
| chat_template: null | |
| data_seed: null | |
| dataloader_drop_last: false | |
| dataloader_num_workers: 0 | |
| dataloader_persistent_workers: false | |
| dataloader_pin_memory: true | |
| dataloader_prefetch_factor: null | |
| dataset_test_split_name: validation | |
| ddp_backend: null | |
| ddp_broadcast_buffers: null | |
| ddp_bucket_cap_mb: null | |
| ddp_find_unused_parameters: null | |
| ddp_timeout: 1800 | |
| debug: [] | |
| deepspeed: null | |
| disable_tqdm: false | |
| do_eval: false | |
| do_predict: false | |
| do_train: false | |
| ds3_gather_for_generation: true | |
| epsilon: 0.2 | |
| epsilon_high: null | |
| eval_accumulation_steps: null | |
| eval_delay: 0 | |
| eval_do_concat_batches: true | |
| eval_on_start: false | |
| eval_steps: null | |
| eval_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy | |
| - 'no' | |
| eval_use_gather_object: false | |
| fp16: false | |
| fp16_backend: auto | |
| fp16_full_eval: false | |
| fp16_opt_level: O1 | |
| fsdp: [] | |
| fsdp_config: | |
| min_num_params: 0 | |
| xla: false | |
| xla_fsdp_grad_ckpt: false | |
| xla_fsdp_v2: false | |
| fsdp_min_num_params: 0 | |
| fsdp_transformer_layer_cls_to_wrap: null | |
| full_determinism: false | |
| gradient_accumulation_steps: 16 | |
| gradient_checkpointing: true | |
| gradient_checkpointing_kwargs: | |
| use_reentrant: false | |
| greater_is_better: false | |
| group_by_length: false | |
| half_precision_backend: auto | |
| hub_always_push: false | |
| hub_model_id: Qwen2.5-1.5B-Open-R1-GRPO | |
| hub_model_revision: main | |
| hub_private_repo: null | |
| hub_strategy: !!python/object/apply:transformers.trainer_utils.HubStrategy | |
| - every_save | |
| hub_token: null | |
| ignore_data_skip: false | |
| include_for_metrics: [] | |
| include_inputs_for_metrics: false | |
| include_num_input_tokens_seen: false | |
| include_tokens_per_second: false | |
| jit_mode_eval: false | |
| label_names: null | |
| label_smoothing_factor: 0.0 | |
| learning_rate: 1.0e-06 | |
| length_column_name: length | |
| load_best_model_at_end: false | |
| local_rank: 0 | |
| log_completions: true | |
| log_level: info | |
| log_level_replica: warning | |
| log_on_each_node: true | |
| logging_dir: ./.tensorboard_logging/f5655cd2/ | |
| logging_first_step: true | |
| logging_nan_inf_filter: true | |
| logging_steps: 5 | |
| logging_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy | |
| - steps | |
| lr_scheduler_kwargs: {} | |
| lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType | |
| - constant_with_warmup | |
| max_completion_length: 4096 | |
| max_grad_norm: 0.2 | |
| max_prompt_length: 2048 | |
| max_steps: -1 | |
| metric_for_best_model: loss | |
| min_p: null | |
| model_init_kwargs: '{''revision'': ''main'', ''trust_remote_code'': False, ''attn_implementation'': | |
| ''flash_attention_2'', ''torch_dtype'': torch.bfloat16, ''use_cache'': False}' | |
| mp_parameters: '' | |
| neftune_noise_alpha: null | |
| no_cuda: false | |
| num_completions_to_print: 1 | |
| num_generations: 16 | |
| num_iterations: 1 | |
| num_train_epochs: 1.0 | |
| optim: !!python/object/apply:transformers.training_args.OptimizerNames | |
| - adamw_8bit | |
| optim_args: null | |
| optim_target_modules: null | |
| output_dir: base_models/grpo/Qwen/Qwen2.5-Coder-7B-Instruct/bs_256_ml_4096_gen_16_f5655cd2_RL | |
| overwrite_hub_revision: false | |
| overwrite_output_dir: false | |
| past_index: -1 | |
| per_device_eval_batch_size: 8 | |
| per_device_train_batch_size: 8 | |
| per_gpu_eval_batch_size: null | |
| per_gpu_train_batch_size: null | |
| prediction_loss_only: false | |
| prompt_name: text2sql_model_grpo | |
| push_to_hub: false | |
| push_to_hub_model_id: null | |
| push_to_hub_organization: null | |
| push_to_hub_revision: false | |
| push_to_hub_token: null | |
| ray_scope: last | |
| ref_model_mixup_alpha: 0.6 | |
| ref_model_sync_steps: 512 | |
| remove_unused_columns: false | |
| repetition_penalty: 1.0 | |
| report_to: | |
| - tensorboard | |
| - wandb | |
| restore_callback_states_from_checkpoint: false | |
| resume_from_checkpoint: 'True' | |
| reward_weights: | |
| - 0.85 | |
| - 0.1 | |
| - 0.05 | |
| run_name: exp-9-7B-QATCH | |
| save_on_each_node: false | |
| save_only_model: false | |
| save_safetensors: true | |
| save_steps: 0.1 | |
| save_strategy: !!python/object/apply:transformers.trainer_utils.SaveStrategy | |
| - steps | |
| save_total_limit: 3 | |
| scale_rewards: true | |
| seed: 42 | |
| skip_memory_metrics: true | |
| stratified_by_complexity: false | |
| sync_ref_model: false | |
| temperature: 0.7 | |
| tf32: null | |
| top_k: 50 | |
| top_p: 1.0 | |
| torch_compile: false | |
| torch_compile_backend: null | |
| torch_compile_mode: null | |
| torch_empty_cache_steps: null | |
| torchdynamo: null | |
| tp_size: 0 | |
| tpu_metrics_debug: false | |
| tpu_num_cores: null | |
| use_cpu: false | |
| use_ipex: false | |
| use_legacy_prediction_loop: false | |
| use_liger_kernel: false | |
| use_liger_loss: false | |
| use_mps_device: false | |
| use_vllm: true | |
| validation_split: 0.2 | |
| vllm_device: auto | |
| vllm_dtype: bfloat16 | |
| vllm_enable_prefix_caching: null | |
| vllm_gpu_memory_utilization: 0.7 | |
| vllm_guided_decoding_regex: null | |
| vllm_max_model_len: null | |
| vllm_server_host: 127.0.0.1 | |
| vllm_server_port: 24879 | |
| vllm_server_timeout: 120.0 | |
| wandb_log_unique_prompts: true | |
| warmup_ratio: 0.1 | |
| warmup_steps: 0 | |
| weight_decay: 0.0 | |