Spaces:
Runtime error
Runtime error
| config: conf/train_asr.yaml | |
| print_config: false | |
| log_level: INFO | |
| dry_run: false | |
| iterator_type: sequence | |
| output_dir: exp/asr_conformer_mono16k_warmup800_lr2e-4_accum2 | |
| ngpu: 1 | |
| seed: 0 | |
| num_workers: 1 | |
| num_att_plot: 3 | |
| dist_backend: nccl | |
| dist_init_method: env:// | |
| dist_world_size: null | |
| dist_rank: null | |
| local_rank: 0 | |
| dist_master_addr: null | |
| dist_master_port: null | |
| dist_launcher: null | |
| multiprocessing_distributed: false | |
| unused_parameters: false | |
| sharded_ddp: false | |
| cudnn_enabled: true | |
| cudnn_benchmark: false | |
| cudnn_deterministic: true | |
| collect_stats: false | |
| write_collected_feats: false | |
| max_epoch: 250 | |
| patience: null | |
| val_scheduler_criterion: | |
| - valid | |
| - loss | |
| early_stopping_criterion: | |
| - valid | |
| - loss | |
| - min | |
| best_model_criterion: | |
| - - valid | |
| - acc | |
| - max | |
| keep_nbest_models: 10 | |
| nbest_averaging_interval: 0 | |
| grad_clip: 5.0 | |
| grad_clip_type: 2.0 | |
| grad_noise: false | |
| accum_grad: 2 | |
| no_forward_run: false | |
| resume: true | |
| train_dtype: float32 | |
| use_amp: false | |
| log_interval: null | |
| use_tensorboard: true | |
| use_wandb: false | |
| wandb_project: null | |
| wandb_id: null | |
| wandb_entity: null | |
| wandb_name: null | |
| wandb_model_log_interval: -1 | |
| detect_anomaly: false | |
| pretrain_path: null | |
| init_param: [] | |
| ignore_init_mismatch: false | |
| freeze_param: [] | |
| num_iters_per_epoch: null | |
| batch_size: 20 | |
| valid_batch_size: null | |
| batch_bins: 2000000 | |
| valid_batch_bins: null | |
| train_shape_file: | |
| - exp/asr_stats_raw_word_sp/train/speech_shape | |
| - exp/asr_stats_raw_word_sp/train/text_shape.word | |
| valid_shape_file: | |
| - exp/asr_stats_raw_word_sp/valid/speech_shape | |
| - exp/asr_stats_raw_word_sp/valid/text_shape.word | |
| batch_type: numel | |
| valid_batch_type: null | |
| fold_length: | |
| - 80000 | |
| - 150 | |
| sort_in_batch: descending | |
| sort_batch: descending | |
| multiple_iterator: false | |
| chunk_length: 500 | |
| chunk_shift_ratio: 0.5 | |
| num_cache_chunks: 1024 | |
| train_data_path_and_name_and_type: | |
| - - dump/raw/train_sp/wav.scp | |
| - speech | |
| - sound | |
| - - dump/raw/train_sp/text | |
| - text | |
| - text | |
| valid_data_path_and_name_and_type: | |
| - - dump/raw/dev/wav.scp | |
| - speech | |
| - sound | |
| - - dump/raw/dev/text | |
| - text | |
| - text | |
| allow_variable_data_keys: false | |
| max_cache_size: 0.0 | |
| max_cache_fd: 32 | |
| valid_max_cache_size: null | |
| optim: adam | |
| optim_conf: | |
| lr: 0.0002 | |
| scheduler: warmuplr | |
| scheduler_conf: | |
| warmup_steps: 800 | |
| token_list: | |
| - <blank> | |
| - <unk> | |
| - <move_rel-throttle="slow"-distance="little"-direction="backward"-/> | |
| - <move_rel-throttle="slow"-distance="normal"-direction="backward"-/> | |
| - <move_rel-throttle="slow"-distance="alot"-direction="backward"-/> | |
| - <move_abs-throttle="fast"-pos_x="centerx"-pos_y="centery"-/> | |
| - <move_abs-throttle="fast"-pos_x="left"-pos_y="up"-/> | |
| - <move_abs-throttle="fast"-pos_x="right"-pos_y="down"-/> | |
| - <move_abs-throttle="slow"-pos_x="centerx"-pos_y="centery"-/> | |
| - <move_abs-throttle="slow"-pos_x="left"-pos_y="up"-/> | |
| - <move_abs-throttle="slow"-pos_x="right"-pos_y="down"-/> | |
| - <turn_rel-throttle="slow"-angle="south"-/> | |
| - <move_rel-throttle="fast"-distance="little"-direction="forward"-/> | |
| - <turn_rel-throttle="slow"-angle="east"-/> | |
| - <turn_rel-throttle="slow"-angle="west"-/> | |
| - <turn_rel-throttle="fast"-angle="south"-/> | |
| - <turn_rel-throttle="fast"-angle="east"-/> | |
| - <turn_rel-throttle="fast"-angle="west"-/> | |
| - <turn_abs-angle="west"-/> | |
| - <turn_abs-angle="east"-/> | |
| - <turn_abs-angle="north"-/> | |
| - <turn_abs-angle="south"-/> | |
| - <lift-position="up"-/> | |
| - <move_rel-throttle="fast"-distance="normal"-direction="forward"-/> | |
| - <lift-position="down"-/> | |
| - <approach-throttle="fast"-/> | |
| - <approach-throttle="slow"-/> | |
| - <grab-grabber="close"-/> | |
| - <grab-grabber="open"-/> | |
| - <pointer-state="off"-/> | |
| - <pointer-state="on"-/> | |
| - <move_rel-throttle="fast"-distance="alot"-direction="forward"-/> | |
| - <move_rel-throttle="slow"-distance="little"-direction="forward"-/> | |
| - <move_rel-throttle="slow"-distance="normal"-direction="forward"-/> | |
| - <move_rel-throttle="slow"-distance="alot"-direction="forward"-/> | |
| - <move_rel-throttle="fast"-distance="little"-direction="backward"-/> | |
| - <move_rel-throttle="fast"-distance="normal"-direction="backward"-/> | |
| - <move_rel-throttle="fast"-distance="alot"-direction="backward"-/> | |
| - <sos/eos> | |
| init: null | |
| input_size: null | |
| ctc_conf: | |
| dropout_rate: 0.0 | |
| ctc_type: builtin | |
| reduce: true | |
| ignore_nan_grad: true | |
| model_conf: | |
| ctc_weight: 0.0 | |
| lsm_weight: 0.0 | |
| length_normalized_loss: false | |
| use_preprocessor: true | |
| token_type: word | |
| bpemodel: null | |
| non_linguistic_symbols: null | |
| cleaner: null | |
| g2p: null | |
| speech_volume_normalize: null | |
| rir_scp: null | |
| rir_apply_prob: 1.0 | |
| noise_scp: null | |
| noise_apply_prob: 1.0 | |
| noise_db_range: '13_15' | |
| frontend: default | |
| frontend_conf: | |
| fs: 16000 | |
| specaug: specaug | |
| specaug_conf: | |
| apply_time_warp: true | |
| time_warp_window: 5 | |
| time_warp_mode: bicubic | |
| apply_freq_mask: true | |
| freq_mask_width_range: | |
| - 0 | |
| - 30 | |
| num_freq_mask: 2 | |
| apply_time_mask: true | |
| time_mask_width_range: | |
| - 0 | |
| - 40 | |
| num_time_mask: 2 | |
| normalize: global_mvn | |
| normalize_conf: | |
| stats_file: grabo/feats_stats.npz | |
| preencoder: null | |
| preencoder_conf: {} | |
| encoder: conformer | |
| encoder_conf: | |
| output_size: 256 | |
| attention_heads: 4 | |
| linear_units: 2048 | |
| num_blocks: 12 | |
| dropout_rate: 0.1 | |
| positional_dropout_rate: 0.1 | |
| attention_dropout_rate: 0.0 | |
| input_layer: conv2d | |
| normalize_before: true | |
| macaron_style: true | |
| rel_pos_type: legacy | |
| pos_enc_layer_type: rel_pos | |
| selfattention_layer_type: rel_selfattn | |
| activation_type: swish | |
| use_cnn_module: true | |
| cnn_module_kernel: 15 | |
| postencoder: null | |
| postencoder_conf: {} | |
| decoder: transformer | |
| decoder_conf: | |
| attention_heads: 4 | |
| linear_units: 2048 | |
| num_blocks: 6 | |
| dropout_rate: 0.1 | |
| positional_dropout_rate: 0.1 | |
| self_attention_dropout_rate: 0.0 | |
| src_attention_dropout_rate: 0.0 | |
| required: | |
| - output_dir | |
| - token_list | |
| version: 0.10.5a1 | |
| distributed: false | |