54 using namespace kaldi;
58 typedef kaldi::int64 int64;
61 "Relabel neural network egs with the read pdf-id alignments, " 63 "Usage: nnet-relabel-egs [options] <pdf-aligment-rspecifier> " 64 "<egs_rspecifier1> ... <egs_rspecifierN> " 65 "<egs_wspecifier1> ... <egs_wspecifierN>\n" 67 " nnet-relabel-egs ark:1.ali egs_in/egs.1.ark egs_in/egs.2.ark " 68 "egs_out/egs.1.ark egs_out/egs.2.ark\n" 69 "See also: nnet-get-egs, nnet-copy-egs, steps/nnet2/relabel_egs.sh\n";
77 if (po.NumArgs() < 3 || po.NumArgs() % 2 == 0) {
82 std::string alignments_rspecifier = po.GetArg(1);
83 int32 num_archives = (po.NumArgs() - 1) / 2;
87 unordered_map<std::string, std::vector<int32>* > utt_to_pdf_ali;
91 int64 num_frames_ali = 0, num_frames_egs = 0,
92 num_frames_missing = 0, num_frames_relabelled = 0;
98 for (; !ali_reader.Done(); ali_reader.Next(), num_ali++) {
99 std::string key = ali_reader.Key();
100 std::vector<int32> *alignment =
new std::vector<int32>(ali_reader.Value());
101 std::pair<std::string, std::vector<int32>* > map(key, alignment);
102 utt_to_pdf_ali.insert(map);
103 num_frames_ali += alignment->size();
107 for (int32
i = 0;
i < num_archives;
i++) {
108 std::string egs_rspecifier(po.GetArg(
i+2));
109 std::string egs_wspecifier(po.GetArg(
i+2+num_archives));
114 for (; !egs_reader.Done(); egs_reader.Next(), num_frames_egs++) {
116 std::string key(egs_reader.Key());
122 KALDI_ERR <<
"Unable to split key " << key <<
" on delimiter - " 123 <<
" into utterance id and frame id";
127 if (utt_to_pdf_ali.find(utt_id) == utt_to_pdf_ali.end()) {
128 KALDI_WARN <<
"Unable to find utterance id " << utt_id;
129 egs_writer.Write(key, eg);
130 num_frames_missing++;
133 const std::vector<int32> *alignment = utt_to_pdf_ali[utt_id];
135 int32 num_frames_in_eg = eg.labels.size();
136 for (int32 t_offset = 0; t_offset < num_frames_in_eg; t_offset++) {
137 int32 t = frame_id + t_offset;
138 if (t >= static_cast<int32>(alignment->size())) {
139 KALDI_ERR <<
"Time index " << t <<
" out of range for alignment, " 140 <<
"should be < " << alignment->size();
142 if (eg.GetLabelSingle(t_offset) != (*alignment)[t])
143 num_frames_relabelled++;
144 eg.SetLabelSingle(t_offset, (*alignment)[t]);
146 egs_writer.Write(key, eg);
150 unordered_map<std::string, std::vector<int32>*>::iterator iter;
152 for (iter = utt_to_pdf_ali.begin(); iter != utt_to_pdf_ali.end(); ++iter)
155 KALDI_LOG <<
"Read " << num_ali <<
" alignments containing a total of " 156 << num_frames_ali <<
" frames; labelled " 157 << num_frames_egs - num_frames_missing <<
" frames out of " 158 << num_frames_egs <<
" examples; labels changed for " 159 << num_frames_relabelled <<
" of those frames.\n.";
161 return (num_frames_missing > 0.5 * num_frames_egs);
163 }
catch(
const std::exception &e) {
164 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
bool SplitEgsKey(const std::string &key, std::string *utt_id, int32 *frame_id)
A templated class for writing objects to an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...