nnet-relabel-egs.cc File Reference
#include <sstream>
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "nnet2/nnet-example.h"
Include dependency graph for nnet-relabel-egs.cc:

Go to the source code of this file.

Namespaces

 kaldi
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:
 

Functions

bool SplitEgsKey (const std::string &key, std::string *utt_id, int32 *frame_id)
 
int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 53 of file nnet-relabel-egs.cc.

References SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), rnnlm::i, KALDI_ERR, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), ParseOptions::PrintUsage(), ParseOptions::Read(), kaldi::SplitEgsKey(), SequentialTableReader< Holder >::Value(), and TableWriter< Holder >::Write().

53  {
54  using namespace kaldi;
55  using namespace kaldi::nnet2;
56 
57  typedef kaldi::int32 int32;
58  typedef kaldi::int64 int64;
59  try {
60  const char *usage =
61  "Relabel neural network egs with the read pdf-id alignments, "
62  "zero-based..\n"
63  "Usage: nnet-relabel-egs [options] <pdf-aligment-rspecifier> "
64  "<egs_rspecifier1> ... <egs_rspecifierN> "
65  "<egs_wspecifier1> ... <egs_wspecifierN>\n"
66  "e.g.: \n"
67  " nnet-relabel-egs ark:1.ali egs_in/egs.1.ark egs_in/egs.2.ark "
68  "egs_out/egs.1.ark egs_out/egs.2.ark\n"
69  "See also: nnet-get-egs, nnet-copy-egs, steps/nnet2/relabel_egs.sh\n";
70 
71  ParseOptions po(usage);
72 
73  po.Read(argc, argv);
74 
75  // Here we expect equal number of input egs archive and output egs archives.
76  // So the total number of arguments including the alignment specifier must be odd.
77  if (po.NumArgs() < 3 || po.NumArgs() % 2 == 0) {
78  po.PrintUsage();
79  exit(1);
80  }
81 
82  std::string alignments_rspecifier = po.GetArg(1);
83  int32 num_archives = (po.NumArgs() - 1) / 2;
84 
85  SequentialInt32VectorReader ali_reader(alignments_rspecifier);
86 
87  unordered_map<std::string, std::vector<int32>* > utt_to_pdf_ali;
88 
89  // Keep statistics
90  int32 num_ali = 0;
91  int64 num_frames_ali = 0, num_frames_egs = 0,
92  num_frames_missing = 0, num_frames_relabelled = 0;
93 
94  // Read alignments and put the pointer in an unordered map
95  // indexed by the key. This is so that we can efficiently find the
96  // alignment corresponding to the utterance to
97  // which a particular frame belongs
98  for (; !ali_reader.Done(); ali_reader.Next(), num_ali++) {
99  std::string key = ali_reader.Key();
100  std::vector<int32> *alignment = new std::vector<int32>(ali_reader.Value());
101  std::pair<std::string, std::vector<int32>* > map(key, alignment);
102  utt_to_pdf_ali.insert(map);
103  num_frames_ali += alignment->size();
104  }
105 
106  // Read archives of egs sequentially
107  for (int32 i = 0; i < num_archives; i++) {
108  std::string egs_rspecifier(po.GetArg(i+2));
109  std::string egs_wspecifier(po.GetArg(i+2+num_archives));
110 
111  SequentialNnetExampleReader egs_reader(egs_rspecifier);
112  NnetExampleWriter egs_writer(egs_wspecifier);
113 
114  for (; !egs_reader.Done(); egs_reader.Next(), num_frames_egs++) {
115 
116  std::string key(egs_reader.Key());
117 
118  std::string utt_id;
119  int32 frame_id;
120 
121  if (!SplitEgsKey(key, &utt_id, &frame_id)) {
122  KALDI_ERR << "Unable to split key " << key << " on delimiter - "
123  << " into utterance id and frame id";
124  }
125  NnetExample eg(egs_reader.Value());
126 
127  if (utt_to_pdf_ali.find(utt_id) == utt_to_pdf_ali.end()) {
128  KALDI_WARN << "Unable to find utterance id " << utt_id;
129  egs_writer.Write(key, eg);
130  num_frames_missing++;
131  continue;
132  }
133  const std::vector<int32> *alignment = utt_to_pdf_ali[utt_id];
134 
135  int32 num_frames_in_eg = eg.labels.size();
136  for (int32 t_offset = 0; t_offset < num_frames_in_eg; t_offset++) {
137  int32 t = frame_id + t_offset;
138  if (t >= static_cast<int32>(alignment->size())) {
139  KALDI_ERR << "Time index " << t << " out of range for alignment, "
140  << "should be < " << alignment->size();
141  }
142  if (eg.GetLabelSingle(t_offset) != (*alignment)[t])
143  num_frames_relabelled++;
144  eg.SetLabelSingle(t_offset, (*alignment)[t]);
145  }
146  egs_writer.Write(key, eg);
147  }
148  }
149 
150  unordered_map<std::string, std::vector<int32>*>::iterator iter;
151 
152  for (iter = utt_to_pdf_ali.begin(); iter != utt_to_pdf_ali.end(); ++iter)
153  delete iter->second;
154 
155  KALDI_LOG << "Read " << num_ali << " alignments containing a total of "
156  << num_frames_ali << " frames; labelled "
157  << num_frames_egs - num_frames_missing << " frames out of "
158  << num_frames_egs << " examples; labels changed for "
159  << num_frames_relabelled << " of those frames.\n.";
160 
161  return (num_frames_missing > 0.5 * num_frames_egs);
162 
163  } catch(const std::exception &e) {
164  std::cerr << e.what();
165  return -1;
166  }
167 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
Definition: nnet-example.h:36
bool SplitEgsKey(const std::string &key, std::string *utt_id, int32 *frame_id)
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
#define KALDI_LOG
Definition: kaldi-error.h:153