34 int main(
int argc,
char *argv[]) {
36 using namespace kaldi;
39 "Create feature files by segmenting input files.\n" 40 "Note: this program should no longer be needed now that\n" 41 "'ranges' in scp files are supported; search for 'ranges' in\n" 42 "http://kaldi-asr.org/doc/io_tut.html, or see the script\n" 43 "utils/data/subsegment_data_dir.sh.\n" 45 "extract-feature-segments [options...] <feats-rspecifier> " 46 " <segments-file> <feats-wspecifier>\n" 47 " (segments-file has lines like: " 48 "output-utterance-id input-utterance-or-spk-id 1.10 2.36)\n";
55 int32 frame_shift = 10;
56 int32 frame_length = 25;
57 bool snip_edges =
true;
60 po.
Register(
"min-segment-length", &min_segment_length,
61 "Minimum segment length in seconds (reject shorter segments)");
62 po.
Register(
"frame-length", &frame_length,
"Frame length in milliseconds");
63 po.
Register(
"frame-shift", &frame_shift,
"Frame shift in milliseconds");
64 po.
Register(
"max-overshoot", &max_overshoot,
65 "End segments overshooting by less (in seconds) are truncated," 67 po.
Register(
"snip-edges", &snip_edges,
68 "If true, n_frames frames will be snipped from the end of each " 69 "extracted feature matrix, " 70 "where n_frames = ceil((frame_length - frame_shift) / frame_shift), " 71 "This ensures that only the feature vectors that " 72 "completely fit in the segment are extracted. " 73 "This makes the extracted segment lengths match the lengths of the " 74 "features that have been extracted from already segmented audio.");
86 std::string rspecifier = po.
GetArg(1);
87 std::string segments_rxfilename = po.
GetArg(2);
88 std::string wspecifier = po.
GetArg(3);
94 Input ki(segments_rxfilename);
96 int32 num_lines = 0, num_success = 0;
98 int32 snip_length = 0;
100 snip_length =
static_cast<int32>(ceil(
101 1.0 * (frame_length - frame_shift) / frame_shift));
106 while (std::getline(ki.
Stream(), line)) {
108 std::vector<std::string> split_line;
113 if (split_line.size() != 4 && split_line.size() != 5) {
114 KALDI_WARN <<
"Invalid line in segments file: " << line;
117 std::string segment = split_line[0],
118 utterance = split_line[1],
119 start_str = split_line[2],
120 end_str = split_line[3];
126 KALDI_WARN <<
"Invalid line in segments file [bad start]: " << line;
130 KALDI_WARN <<
"Invalid line in segments file [bad end]: " << line;
136 if (start < 0 || end <= 0 || start >= end) {
137 KALDI_WARN <<
"Invalid line in segments file " 138 "[empty or invalid segment]: " 144 if (split_line.size() == 5) {
146 KALDI_WARN<<
"Invalid line in segments file [bad channel]: " << line;
154 if (!feat_reader.
HasKey(utterance)) {
155 KALDI_WARN <<
"Did not find features for utterance " << utterance
156 <<
", skipping segment " << segment;
166 (start * 1000.0 / frame_shift)));
167 int32 end_samp =
static_cast<int32>(round(end * 1000.0 / frame_shift));
171 end_samp -= snip_length;
177 if (start_samp < 0 || start_samp >= num_samp) {
178 KALDI_WARN <<
"Start sample out of range " << start_samp
179 <<
" [length:] " << num_samp <<
"x" << num_chan
180 <<
", skipping segment " << segment;
187 if (end_samp > num_samp) {
188 if (end_samp >= num_samp
189 + static_cast<int32>(
190 round(max_overshoot * 1000.0 / frame_shift))) {
191 KALDI_WARN<<
"End sample too far out of range " << end_samp
192 <<
" [length:] " << num_samp <<
"x" << num_chan
193 <<
", skipping segment " 205 + static_cast<int32>(round(
206 (min_segment_length * 1000.0 / frame_shift)))) {
207 KALDI_WARN<<
"Segment " << segment <<
" too short, skipping it.";
212 end_samp-start_samp, 0, num_chan);
215 feat_writer.
Write(segment, outmatrix);
218 KALDI_LOG <<
"Successfully processed " << num_success <<
" lines out of " 219 << num_lines <<
" in the segments file. ";
221 if (num_success == 0)
return -1;
223 }
catch(
const std::exception &e) {
224 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const T & Value(const std::string &key)
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
bool ConvertStringToReal(const std::string &str, T *out)
ConvertStringToReal converts a string into either float or double and returns false if there was any ...
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Sub-matrix representation.