Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

input shape of nns #242

Open
HaoranWeiUTD opened this issue Aug 5, 2020 · 3 comments
Open

input shape of nns #242

HaoranWeiUTD opened this issue Aug 5, 2020 · 3 comments

Comments

@HaoranWeiUTD
Copy link

HaoranWeiUTD commented Aug 5, 2020

I trained a model and find its input have shape of [194,1,40]. I think 1 is the batchsize, 40 is he feature size, but how 194 comes from?
Because I set the max_seq_length_train=1500, and max_seq_length_valid=1400
Thanks for help !

@HaoranWeiUTD
Copy link
Author

HaoranWeiUTD commented Aug 5, 2020


Here is my cfg file:

[cfg_proto]
cfg_proto=proto/global.proto
cfg_proto_chunk=proto/global_chunk.proto

[exp]
cmd=
run_nn_script=run_nn
out_folder=exp/libri_GRU_fmllr
seed=1234
use_cuda=True
multi_gpu=True
save_gpumem=False
N_epochs_tr=15

[dataset1]
data_name=train_all
fea:fea_name=fmllr
fea_lst=/XXX/Tools/kaldi/egs/multi_cn/s5/fmllr/train_all/feats.scp
fea_opts=apply-cmvn --utt2spk=ark:/XXX/Tools/kaldi/egs/multi_cn/s5/fmllr/train_all/utt2spk ark:/XXX/Tools/kaldi/egs/multi_cn/s5/fmllr/train_all/data/cmvn_speaker.ark ark:- ark:- | add-deltas --delta-order=0 ark:- ark:- |
cw_left=0
cw_right=0

lab:lab_name=lab_cd
lab_folder=/XXX/Tools/kaldi/egs/multi_cn/s5/exp/tri4a/
lab_opts=ali-to-pdf
lab_count_file=auto
lab_data_folder=/XXX/Tools/kaldi/egs/multi_cn/s5/fmllr/train_all/
lab_graph=/XXX/Tools/kaldi/egs/multi_cn/s5/exp/tri4a/graph_tg/

N_chunks=50

[dataset2]
data_name=aidatatang
fea:fea_name=fmllr
fea_lst=/XXX/Tools/kaldi/egs/multi_cn/s5/fmllr/aidatatang/feats.scp
fea_opts=apply-cmvn --utt2spk=ark:/XXX/Tools/kaldi/egs/multi_cn/s5/fmllr/aidatatang/utt2spk ark:/XXX/Tools/kaldi/egs/multi_cn/s5/fmllr/aidatatang/data/cmvn_speaker.ark ark:- ark:- | add-deltas --delta-order=0 ark:- ark:- |
cw_left=0
cw_right=0

lab:lab_name=lab_cd
lab_folder=/XXX/Tools/kaldi/egs/multi_cn/s5/exp/tri4b_ali_aidatatang
lab_opts=ali-to-pdf
lab_count_file=auto
lab_data_folder=/XXX/Tools/kaldi/egs/multi_cn/s5/fmllr/aidatatang/
lab_graph=/XXX/Tools/kaldi/egs/multi_cn/s5/exp/tri4a/graph_tg/

N_chunks=5

[dataset3]
data_name=magicdata
fea:fea_name=fmllr
fea_lst=/XXX/Tools/kaldi/egs/multi_cn/s5/fmllr/magicdata/feats.scp
fea_opts=apply-cmvn --utt2spk=ark:/XXX/Tools/kaldi/egs/multi_cn/s5/fmllr/magicdata/utt2spk ark:/XXX/Tools/kaldi/egs/multi_cn/s5/fmllr/magicdata/data/cmvn_speaker.ark ark:- ark:- | add-deltas --delta-order=0 ark:- ark:- |
cw_left=0
cw_right=0

lab:lab_name=lab_cd
lab_folder=/XXX/Tools/kaldi/egs/multi_cn/s5/exp/tri4b_ali_magicdata
lab_opts=ali-to-pdf
lab_count_file=auto
lab_data_folder=/XXX/Tools/kaldi/egs/multi_cn/s5/fmllr/magicdata/
lab_graph=/XXX/Tools/kaldi/egs/multi_cn/s5/exp/tri4a/graph_tg/

N_chunks=5

[data_use]
train_with=train_all
valid_with=aidatatang
forward_with=magicdata

[batches]
batch_size_train=16
max_seq_length_train=1500
increase_seq_length_train=True
start_seq_len_train=300
multply_factor_seq_len_train=5
batch_size_valid=8
max_seq_length_valid=1400

[architecture1]
arch_name = GRU_layers
arch_proto = proto/GRU.proto
arch_library = neural_networks
arch_class = GRU
arch_pretrain_file = none
arch_freeze = False
arch_seq_model = True

gru_lay = 512,512,512,512
gru_drop = 0.2,0.2,0.2,0.2
gru_use_laynorm_inp = False
gru_use_batchnorm_inp = False
gru_use_laynorm = False,False,False,False
gru_use_batchnorm = True,True,True,True
gru_bidir = True
gru_act = tanh,tanh,tanh,tanh
gru_orthinit=True

arch_lr = 0.0004
arch_halving_factor = 0.5
arch_improvement_threshold = 0.001
arch_opt = rmsprop
opt_momentum = 0.0
opt_alpha = 0.95
opt_eps = 1e-8
opt_centered = False
opt_weight_decay = 0.0

[architecture2]
arch_name=MLP_layers
arch_proto=proto/MLP.proto
arch_library=neural_networks
arch_class=MLP
arch_pretrain_file=none
arch_freeze=False
arch_seq_model=False
dnn_lay=N_out_lab_cd
dnn_drop=0.0
dnn_use_laynorm_inp=False
dnn_use_batchnorm_inp=False
dnn_use_batchnorm=False
dnn_use_laynorm=False
dnn_act=softmax

arch_lr=0.0004
arch_halving_factor=0.5
arch_improvement_threshold=0.001
arch_opt=rmsprop
opt_momentum=0.0
opt_alpha=0.95
opt_eps=1e-8
opt_centered=False
opt_weight_decay=0.0

[model]
model_proto=proto/model.proto
model:out_dnn1=compute(GRU_layers,fmllr)
out_dnn2=compute(MLP_layers,out_dnn1)
loss_final=cost_nll(out_dnn2,lab_cd)
err_final=cost_err(out_dnn2,lab_cd)

[forward]
forward_out=out_dnn2
normalize_posteriors=True
normalize_with_counts_from=lab_cd
save_out_file=False
require_decoding=True

[decoding]
decoding_script_folder=kaldi_decoding_scripts/
decoding_script=decode_dnn.sh
decoding_proto=proto/decoding.proto
min_active=200
max_active=7000
max_mem=50000000
beam=20.0
latbeam=12.0
acwt=0.10
max_arcs=-1
skip_scoring=false
scoring_script=/scratch/ravanelm/exp/librispeech/s5/local/score.sh
scoring_opts="--min-lmwt 4 --max-lmwt 23"
norm_vars=False

@TParcollet
Copy link
Collaborator

It actually depends where this print comes from. It can be the sequence length (as sequence are ordered, smaller ones start first) or sequence_lenght * batch_size (often done before a linear layer to speedup the training).

@HaoranWeiUTD
Copy link
Author

Thanks for your explanation

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants