All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
extract-rows.cc File Reference
Include dependency graph for extract-rows.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

int main ( int  argc,
char *  argv[] 
)

Definition at line 25 of file extract-rows.cc.

References kaldi::ConvertStringToInteger(), kaldi::ConvertStringToReal(), ParseOptions::GetArg(), RandomAccessTableReader< Holder >::HasKey(), KALDI_ERR, KALDI_LOG, KALDI_WARN, ParseOptions::NumArgs(), MatrixBase< Real >::NumRows(), ParseOptions::PrintUsage(), ParseOptions::Read(), ParseOptions::Register(), MatrixBase< Real >::RowRange(), kaldi::SplitStringToVector(), Input::Stream(), RandomAccessTableReader< Holder >::Value(), and TableWriter< Holder >::Write().

25  {
26  try {
27  using namespace kaldi;
28  using namespace std;
29 
30  const char *usage =
31  "Extract certain row ranges of matrices. This is most useful to extract segments\n"
32  "from feature files, for example to modify segmentations or to extract features\n"
33  "corresponding to certain alignments. The program expects a segments file in the\n"
34  "form of\n"
35  " segment-name utterance-id start end\n"
36  "where the segment-name is chosen by the user and utterance-id indexes the input matrices.\n"
37  "By default, 'start' and 'end' are row numbers (zero-based), but if you specify the --frame-shift\n"
38  "option (e.g. --frame-shift=0.01), then they represent a time in seconds, which are converted\n"
39  "to integers by dividing by frame-shift.\n"
40  "\n"
41  "Usage: extract-rows [options] <segments-file> <features-rspecifier> <features-wspecifier>\n"
42  " e.g. extract-rows --frame-shift=0.01 segments ark:feats-in.ark ark:feats-out.ark\n"
43  "See also: select-feats, subset-feats, subsample-feats\n";
44 
45  ParseOptions po(usage);
46 
47  float frame_shift = 0;
48 
49  po.Register("frame-shift", &frame_shift,
50  "Frame shift in sec (e.g. 0.01), if segment files contains times "
51  "instead of frames");
52 
53  po.Read(argc, argv);
54 
55  if (po.NumArgs() != 3) {
56  po.PrintUsage();
57  exit(1);
58  }
59 
60  string segment_rspecifier = po.GetArg(1);
61  string feat_rspecifier = po.GetArg(2);
62  string feat_wspecifier = po.GetArg(3);
63 
64  Input ki(segment_rspecifier);
65  RandomAccessBaseFloatMatrixReader reader(feat_rspecifier);
66  BaseFloatMatrixWriter writer(feat_wspecifier);
67 
68  int32 num_done = 0, num_err = 0;
69 
70  string line;
71 
72  /* read each line from segments file */
73  while (std::getline(ki.Stream(), line)) {
74 
75  vector<string> split_line;
76  SplitStringToVector(line, " \t\r", true, &split_line);
77  if (split_line.size() != 4) {
78  KALDI_WARN << "Invalid line in segments file: " << line;
79  num_err++;
80  continue;
81  }
82 
83  string utt = split_line[0],
84  recording = split_line[1],
85  start_str = split_line[2],
86  end_str = split_line[3];
87 
88  // if the segments are in time, we need to convert them to frame numbers
89  int32 start = 0;
90  int32 end = 0;
91  if (frame_shift > 0) {
92  // Convert the start time and endtime to real from string. Segment is
93  // ignored if start or end time cannot be converted to real.
94  double t1, t2;
95  if (!ConvertStringToReal(start_str, &t1)) {
96  KALDI_ERR << "Invalid line in segments file [bad start]: " << line;
97  continue;
98  }
99  if (!ConvertStringToReal(end_str, &t2)) {
100  KALDI_ERR << "Invalid line in segments file [bad end]: " << line;
101  continue;
102  }
103 
104  start = (int) (t1 / frame_shift);
105  end = (int) (t2 / frame_shift);
106  } else {
107  if (!ConvertStringToInteger(start_str, &start)) {
108  KALDI_ERR << "Invalid line in segments file [bad start]: " << line;
109  continue;
110  }
111  if (!ConvertStringToInteger(end_str, &end)) {
112  KALDI_ERR << "Invalid line in segments file [bad end]: " << line;
113  continue;
114  }
115  }
116 
117  if (start < 0 || end - start <= 0) {
118  KALDI_WARN << "Invalid line in segments file [less than one frame]: " << line;
119  num_err++;
120  continue;
121  }
122 
123  if (reader.HasKey(recording)) {
124  const Matrix<BaseFloat> &feats = reader.Value(recording);
125 
126  if (feats.NumRows() < end) {
127  if (feats.NumRows() > start) {
128  KALDI_WARN << "Truncating end time of segment " << utt << " from "
129  << end << " to " << feats.NumRows();
130  end = feats.NumRows();
131  } else {
132  KALDI_WARN << "Segment " << utt << " is outside of input range: "
133  << "input num-rows " << feats.NumRows() << " vs. "
134  << line;
135  num_err++;
136  continue;
137  }
138  }
139 
140  Matrix<BaseFloat> to_write(feats.RowRange(start, (end-start)));
141  writer.Write(utt, to_write);
142  num_done++;
143  } else {
144  KALDI_WARN << "No recording-id " << recording << " present in features.";
145  num_err++;
146  }
147  }
148 
149  KALDI_LOG << "Processed " << num_done << " segments successfully; "
150  << "errors on " << num_err;
151 
152  return (num_done > 0 ? 0 : 1);
153  } catch(const std::exception &e) {
154  std::cerr << e.what();
155  return -1;
156  }
157 }
Relabels neural network egs with the read pdf-id alignments.
Definition: chain.dox:20
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
Definition: text-utils.h:118
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:366
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
Definition: text-utils.cc:63
#define KALDI_ERR
Definition: kaldi-error.h:127
bool ConvertStringToReal(const std::string &str, T *out)
ConvertStringToReal converts a string into either float or double and returns false if there was any ...
Definition: text-utils.cc:238
void Write(std::ostream &out, bool binary) const
write to stream.
#define KALDI_WARN
Definition: kaldi-error.h:130
SubMatrix< Real > RowRange(const MatrixIndexT row_offset, const MatrixIndexT num_rows) const
Definition: kaldi-matrix.h:203
MatrixIndexT NumRows() const
Returns number of rows (or zero for emtpy matrix).
Definition: kaldi-matrix.h:58
#define KALDI_LOG
Definition: kaldi-error.h:133