build-pfile-from-ali.cc File Reference
#include <string>
#include <vector>
#include "base/kaldi-common.h"
#include "gmm/am-diag-gmm.h"
#include "hmm/transition-model.h"
#include "hmm/hmm-utils.h"
#include "util/common-utils.h"
Include dependency graph for build-pfile-from-ali.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 Build pfiles for Neural Network training from alignment. More...
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Build pfiles for Neural Network training from alignment.

The pfiles contains both the data vectors and their corresponding class/state labels (zero-based).

Definition at line 37 of file build-pfile-from-ali.cc.

References Output::Close(), rnnlm::d, SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), RandomAccessTableReader< Holder >::HasKey(), rnnlm::i, KALDI_ASSERT, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), MatrixBase< Real >::NumCols(), MatrixBase< Real >::NumRows(), ParseOptions::PrintUsage(), AmDiagGmm::Read(), ParseOptions::Read(), TransitionModel::Read(), ParseOptions::Register(), Output::Stream(), Input::Stream(), TransitionModel::TransitionIdToPdf(), RandomAccessTableReader< Holder >::Value(), and SequentialTableReader< Holder >::Value().

37  {
38  using namespace kaldi;
39  typedef kaldi::int32 int32;
40  try {
41  const char *usage =
42  "Build pfiles for neural network training from alignment.\n"
43  "Usage: build-pfile-from-ali [options] <model> <alignments-rspecifier> <feature-rspecifier> \n"
44  "<pfile-wspecifier>\n"
45  "e.g.: \n"
46  " build-pfile-from-ali 1.mdl ark:1.ali features \n"
47  " \"|pfile_create -i - -o pfile.1 -f 143 -l 1\" ";
48 
49  ParseOptions po(usage);
50 
51  int32 every_nth_frame = 1;
52  po.Register("every-nth-frame", &every_nth_frame, "This option will cause it to print "
53  "out only every n'th frame (for subsampling)");
54 
55  po.Read(argc, argv);
56 
57  if (po.NumArgs() != 4) {
58  po.PrintUsage();
59  exit(1);
60  }
61 
62  std::string model_filename = po.GetArg(1),
63  alignments_rspecifier = po.GetArg(2),
64  feature_rspecifier = po.GetArg(3),
65  pfile_wspecifier = po.GetArg(4);
66 
67  TransitionModel trans_model;
68  AmDiagGmm am_gmm;
69  {
70  bool binary;
71  Input ki(model_filename, &binary);
72  trans_model.Read(ki.Stream(), binary);
73  am_gmm.Read(ki.Stream(), binary);
74  }
75 
76  SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
77  RandomAccessInt32VectorReader ali_reader(alignments_rspecifier);
78 
79  int32 num_done = 0, num_no_ali = 0, num_other_error = 0;
80  int32 num_utt = 0;
81 
82  KALDI_ASSERT(every_nth_frame >= 1);
83 
84  Output ko(pfile_wspecifier, false);
85 
86  for (; !feature_reader.Done(); feature_reader.Next()) {
87  std::string key = feature_reader.Key();
88  if (!ali_reader.HasKey(key)) {
89  KALDI_WARN << "Did not find alignment for utterance " << key;
90  num_no_ali++;
91  continue;
92  }
93 
94  const Matrix<BaseFloat> &feats = feature_reader.Value();
95  std::vector<int32> alignment = ali_reader.Value(key);
96  if (static_cast<int32>(feats.NumRows()) != static_cast<int32>(alignment.size())) {
97  KALDI_WARN << "Alignment vector has wrong size " << (alignment.size())
98  << " vs. " << (feats.NumRows());
99  num_other_error++;
100  continue;
101  }
102  int32 dim = feats.NumCols();
103 
104  for (size_t i = 0; i < alignment.size(); i++) {
105  if (i % every_nth_frame == 0) {
106  std::stringstream ss;
107  // Output sentence number and frame number
108  ss << num_utt;
109  ss << " ";
110  ss << (i / every_nth_frame);
111  // Output feature vector
112  for (int32 d = 0; d < dim; ++d) {
113  ss << " ";
114  ss << feats(i, d);
115  }
116  // Output the class label
117  ss << " ";
118  ss << trans_model.TransitionIdToPdf(alignment[i]);
119 
120  ko.Stream() << ss.str().c_str();
121  ko.Stream() << "\n";
122  }
123  }
124  num_done ++; num_utt ++;
125  }
126  ko.Close();
127  KALDI_LOG << "Converted " << num_done << " alignments to pfiles.";
128  KALDI_LOG << num_no_ali << " utterances have no alignment; "
129  << num_other_error << " utterances have other errors.";
130  } catch(const std::exception& e) {
131  std::cerr << e.what();
132  return -1;
133  }
134 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
kaldi::int32 int32
int32 TransitionIdToPdf(int32 trans_id) const
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
void Read(std::istream &is, bool binary)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_WARN
Definition: kaldi-error.h:150
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
#define KALDI_LOG
Definition: kaldi-error.h:153
void Read(std::istream &in_stream, bool binary)
Definition: am-diag-gmm.cc:147