This is a program for extracting segments from feature files/archives.
36 using namespace kaldi;
39 "Create feature files by segmenting input files.\n" 40 "Note: this program should no longer be needed now that\n" 41 "'ranges' in scp files are supported; search for 'ranges' in\n" 42 "http://kaldi-asr.org/doc/io_tut.html, or see the script\n" 43 "utils/data/subsegment_data_dir.sh.\n" 45 "extract-feature-segments [options...] <feats-rspecifier> " 46 " <segments-file> <feats-wspecifier>\n" 47 " (segments-file has lines like: " 48 "output-utterance-id input-utterance-or-spk-id 1.10 2.36)\n";
55 int32 frame_shift = 10;
56 int32 frame_length = 25;
57 bool snip_edges =
true;
60 po.Register(
"min-segment-length", &min_segment_length,
61 "Minimum segment length in seconds (reject shorter segments)");
62 po.Register(
"frame-length", &frame_length,
"Frame length in milliseconds");
63 po.Register(
"frame-shift", &frame_shift,
"Frame shift in milliseconds");
64 po.Register(
"max-overshoot", &max_overshoot,
65 "End segments overshooting by less (in seconds) are truncated," 67 po.Register(
"snip-edges", &snip_edges,
68 "If true, n_frames frames will be snipped from the end of each " 69 "extracted feature matrix, " 70 "where n_frames = ceil((frame_length - frame_shift) / frame_shift), " 71 "This ensures that only the feature vectors that " 72 "completely fit in the segment are extracted. " 73 "This makes the extracted segment lengths match the lengths of the " 74 "features that have been extracted from already segmented audio.");
81 if (po.NumArgs() != 3) {
86 std::string rspecifier = po.GetArg(1);
87 std::string segments_rxfilename = po.GetArg(2);
88 std::string wspecifier = po.GetArg(3);
94 Input ki(segments_rxfilename);
96 int32 num_lines = 0, num_success = 0;
98 int32 snip_length = 0;
100 snip_length =
static_cast<int32>(ceil(
101 1.0 * (frame_length - frame_shift) / frame_shift));
106 while (std::getline(ki.Stream(), line)) {
108 std::vector<std::string> split_line;
113 if (split_line.size() != 4 && split_line.size() != 5) {
114 KALDI_WARN <<
"Invalid line in segments file: " << line;
117 std::string segment = split_line[0],
118 utterance = split_line[1],
119 start_str = split_line[2],
120 end_str = split_line[3];
126 KALDI_WARN <<
"Invalid line in segments file [bad start]: " << line;
130 KALDI_WARN <<
"Invalid line in segments file [bad end]: " << line;
136 if (start < 0 || end <= 0 || start >= end) {
137 KALDI_WARN <<
"Invalid line in segments file " 138 "[empty or invalid segment]: " 144 if (split_line.size() == 5) {
146 KALDI_WARN<<
"Invalid line in segments file [bad channel]: " << line;
154 if (!feat_reader.HasKey(utterance)) {
155 KALDI_WARN <<
"Did not find features for utterance " << utterance
156 <<
", skipping segment " << segment;
166 (start * 1000.0 / frame_shift)));
167 int32 end_samp =
static_cast<int32>(round(end * 1000.0 / frame_shift));
171 end_samp -= snip_length;
177 if (start_samp < 0 || start_samp >= num_samp) {
178 KALDI_WARN <<
"Start sample out of range " << start_samp
179 <<
" [length:] " << num_samp <<
"x" << num_chan
180 <<
", skipping segment " << segment;
187 if (end_samp > num_samp) {
188 if (end_samp >= num_samp
189 + static_cast<int32>(
190 round(max_overshoot * 1000.0 / frame_shift))) {
191 KALDI_WARN<<
"End sample too far out of range " << end_samp
192 <<
" [length:] " << num_samp <<
"x" << num_chan
193 <<
", skipping segment " 205 + static_cast<int32>(round(
206 (min_segment_length * 1000.0 / frame_shift)))) {
207 KALDI_WARN<<
"Segment " << segment <<
" too short, skipping it.";
212 end_samp-start_samp, 0, num_chan);
215 feat_writer.Write(segment, outmatrix);
218 KALDI_LOG <<
"Successfully processed " << num_success <<
" lines out of " 219 << num_lines <<
" in the segments file. ";
221 if (num_success == 0)
return -1;
223 }
catch(
const std::exception &e) {
224 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
A templated class for writing objects to an archive or script file; see The Table concept...
Allows random access to a collection of objects in an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
bool ConvertStringToReal(const std::string &str, T *out)
ConvertStringToReal converts a string into either float or double and returns false if there was any ...
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Sub-matrix representation.