config.yaml 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. # This is an example that demonstrates how to configure a model file.
  2. # You can modify the configuration according to your own requirements.
  3. # to print the register_table:
  4. # from funasr.utils.register import registry_tables
  5. # registry_tables.print()
  6. # network architecture
  7. model: SeacoParaformer
  8. model_conf:
  9. ctc_weight: 0.0
  10. lsm_weight: 0.1
  11. length_normalized_loss: true
  12. predictor_weight: 1.0
  13. predictor_bias: 1
  14. sampling_ratio: 0.75
  15. inner_dim: 512
  16. bias_encoder_type: lstm
  17. bias_encoder_bid: false
  18. seaco_lsm_weight: 0.1
  19. seaco_length_normal: true
  20. train_decoder: false
  21. NO_BIAS: 8377
  22. # encoder
  23. encoder: SANMEncoder
  24. encoder_conf:
  25. output_size: 512
  26. attention_heads: 4
  27. linear_units: 2048
  28. num_blocks: 50
  29. dropout_rate: 0.1
  30. positional_dropout_rate: 0.1
  31. attention_dropout_rate: 0.1
  32. input_layer: pe
  33. pos_enc_class: SinusoidalPositionEncoder
  34. normalize_before: true
  35. kernel_size: 11
  36. sanm_shfit: 0
  37. selfattention_layer_type: sanm
  38. # decoder
  39. decoder: ParaformerSANMDecoder
  40. decoder_conf:
  41. attention_heads: 4
  42. linear_units: 2048
  43. num_blocks: 16
  44. dropout_rate: 0.1
  45. positional_dropout_rate: 0.1
  46. self_attention_dropout_rate: 0.1
  47. src_attention_dropout_rate: 0.1
  48. att_layer_num: 16
  49. kernel_size: 11
  50. sanm_shfit: 0
  51. # seaco decoder
  52. seaco_decoder: ParaformerSANMDecoder
  53. seaco_decoder_conf:
  54. attention_heads: 4
  55. linear_units: 1024
  56. num_blocks: 4
  57. dropout_rate: 0.1
  58. positional_dropout_rate: 0.1
  59. self_attention_dropout_rate: 0.1
  60. src_attention_dropout_rate: 0.1
  61. kernel_size: 21
  62. sanm_shfit: 0
  63. use_output_layer: false
  64. wo_input_layer: true
  65. predictor: CifPredictorV3
  66. predictor_conf:
  67. idim: 512
  68. threshold: 1.0
  69. l_order: 1
  70. r_order: 1
  71. tail_threshold: 0.45
  72. smooth_factor2: 0.25
  73. noise_threshold2: 0.01
  74. upsample_times: 3
  75. use_cif1_cnn: false
  76. upsample_type: cnn_blstm
  77. # frontend related
  78. frontend: WavFrontend
  79. frontend_conf:
  80. fs: 16000
  81. window: hamming
  82. n_mels: 80
  83. frame_length: 25
  84. frame_shift: 10
  85. lfr_m: 7
  86. lfr_n: 6
  87. dither: 0.0
  88. specaug: SpecAugLFR
  89. specaug_conf:
  90. apply_time_warp: false
  91. time_warp_window: 5
  92. time_warp_mode: bicubic
  93. apply_freq_mask: true
  94. freq_mask_width_range:
  95. - 0
  96. - 30
  97. lfr_rate: 6
  98. num_freq_mask: 1
  99. apply_time_mask: true
  100. time_mask_width_range:
  101. - 0
  102. - 12
  103. num_time_mask: 1
  104. train_conf:
  105. accum_grad: 1
  106. grad_clip: 5
  107. max_epoch: 150
  108. val_scheduler_criterion:
  109. - valid
  110. - acc
  111. best_model_criterion:
  112. - - valid
  113. - acc
  114. - max
  115. keep_nbest_models: 10
  116. log_interval: 50
  117. optim: adam
  118. optim_conf:
  119. lr: 0.0005
  120. scheduler: warmuplr
  121. scheduler_conf:
  122. warmup_steps: 30000
  123. dataset: AudioDataset
  124. dataset_conf:
  125. index_ds: IndexDSJsonl
  126. batch_sampler: DynamicBatchLocalShuffleSampler
  127. batch_type: example # example or length
  128. batch_size: 1 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
  129. max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,
  130. buffer_size: 500
  131. shuffle: True
  132. num_workers: 0
  133. tokenizer: CharTokenizer
  134. tokenizer_conf:
  135. unk_symbol: <unk>
  136. split_with_space: true
  137. ctc_conf:
  138. dropout_rate: 0.0
  139. ctc_type: builtin
  140. reduce: true
  141. ignore_nan_grad: true
  142. normalize: null