build-pfile-from-ali.cc
Go to the documentation of this file.
1 // bin/build-pfile-from-ali.cc
2 
3 // Copyright 2013 Carnegie Mellon University (Author: Yajie Miao)
4 // Johns Hopkins University (Author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #include <string>
22 using std::string;
23 #include <vector>
24 using std::vector;
25 
26 #include "base/kaldi-common.h"
27 #include "gmm/am-diag-gmm.h"
28 #include "hmm/transition-model.h"
29 #include "hmm/hmm-utils.h"
30 #include "util/common-utils.h"
31 
37 int main(int argc, char *argv[]) {
38  using namespace kaldi;
39  typedef kaldi::int32 int32;
40  try {
41  const char *usage =
42  "Build pfiles for neural network training from alignment.\n"
43  "Usage: build-pfile-from-ali [options] <model> <alignments-rspecifier> <feature-rspecifier> \n"
44  "<pfile-wspecifier>\n"
45  "e.g.: \n"
46  " build-pfile-from-ali 1.mdl ark:1.ali features \n"
47  " \"|pfile_create -i - -o pfile.1 -f 143 -l 1\" ";
48 
49  ParseOptions po(usage);
50 
51  int32 every_nth_frame = 1;
52  po.Register("every-nth-frame", &every_nth_frame, "This option will cause it to print "
53  "out only every n'th frame (for subsampling)");
54 
55  po.Read(argc, argv);
56 
57  if (po.NumArgs() != 4) {
58  po.PrintUsage();
59  exit(1);
60  }
61 
62  std::string model_filename = po.GetArg(1),
63  alignments_rspecifier = po.GetArg(2),
64  feature_rspecifier = po.GetArg(3),
65  pfile_wspecifier = po.GetArg(4);
66 
67  TransitionModel trans_model;
68  AmDiagGmm am_gmm;
69  {
70  bool binary;
71  Input ki(model_filename, &binary);
72  trans_model.Read(ki.Stream(), binary);
73  am_gmm.Read(ki.Stream(), binary);
74  }
75 
76  SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
77  RandomAccessInt32VectorReader ali_reader(alignments_rspecifier);
78 
79  int32 num_done = 0, num_no_ali = 0, num_other_error = 0;
80  int32 num_utt = 0;
81 
82  KALDI_ASSERT(every_nth_frame >= 1);
83 
84  Output ko(pfile_wspecifier, false);
85 
86  for (; !feature_reader.Done(); feature_reader.Next()) {
87  std::string key = feature_reader.Key();
88  if (!ali_reader.HasKey(key)) {
89  KALDI_WARN << "Did not find alignment for utterance " << key;
90  num_no_ali++;
91  continue;
92  }
93 
94  const Matrix<BaseFloat> &feats = feature_reader.Value();
95  std::vector<int32> alignment = ali_reader.Value(key);
96  if (static_cast<int32>(feats.NumRows()) != static_cast<int32>(alignment.size())) {
97  KALDI_WARN << "Alignment vector has wrong size " << (alignment.size())
98  << " vs. " << (feats.NumRows());
99  num_other_error++;
100  continue;
101  }
102  int32 dim = feats.NumCols();
103 
104  for (size_t i = 0; i < alignment.size(); i++) {
105  if (i % every_nth_frame == 0) {
106  std::stringstream ss;
107  // Output sentence number and frame number
108  ss << num_utt;
109  ss << " ";
110  ss << (i / every_nth_frame);
111  // Output feature vector
112  for (int32 d = 0; d < dim; ++d) {
113  ss << " ";
114  ss << feats(i, d);
115  }
116  // Output the class label
117  ss << " ";
118  ss << trans_model.TransitionIdToPdf(alignment[i]);
119 
120  ko.Stream() << ss.str().c_str();
121  ko.Stream() << "\n";
122  }
123  }
124  num_done ++; num_utt ++;
125  }
126  ko.Close();
127  KALDI_LOG << "Converted " << num_done << " alignments to pfiles.";
128  KALDI_LOG << num_no_ali << " utterances have no alignment; "
129  << num_other_error << " utterances have other errors.";
130  } catch(const std::exception& e) {
131  std::cerr << e.what();
132  return -1;
133  }
134 }
135 
136 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
kaldi::int32 int32
int32 TransitionIdToPdf(int32 trans_id) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
std::istream & Stream()
Definition: kaldi-io.cc:826
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
std::ostream & Stream()
Definition: kaldi-io.cc:701
const T & Value(const std::string &key)
void Read(std::istream &is, bool binary)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
#define KALDI_LOG
Definition: kaldi-error.h:153
void Read(std::istream &in_stream, bool binary)
Definition: am-diag-gmm.cc:147
int main(int argc, char *argv[])
Build pfiles for Neural Network training from alignment.
bool Close()
Definition: kaldi-io.cc:677