34 int main(
int argc,
char *argv[]) {
36 using namespace kaldi;
39 "Extract segments from a large audio file in WAV format.\n" 40 "Usage: extract-segments [options] <wav-rspecifier> <segments-file> <wav-wspecifier>\n" 41 "e.g. extract-segments scp:wav.scp segments ark:- | <some-other-program>\n" 42 " segments-file format: each line is either\n" 43 "<segment-id> <recording-id> <start-time> <end-time>\n" 44 "e.g. call-861225-A-0050-0065 call-861225-A 5.0 6.5\n" 45 "or (less frequently, and not supported in scripts):\n" 46 "<segment-id> <wav-file-name> <start-time> <end-time> <channel>\n" 47 "where <channel> will normally be 0 (left) or 1 (right)\n" 48 "e.g. call-861225-A-0050-0065 call-861225 5.0 6.5 1\n" 49 "And <end-time> of -1 means the segment runs till the end of the WAV file\n" 50 "See also: extract-feature-segments, wav-copy, wav-to-duration\n";
55 po.
Register(
"min-segment-length", &min_segment_length,
56 "Minimum segment length in seconds (reject shorter segments)");
57 po.
Register(
"max-overshoot", &max_overshoot,
58 "End segments overshooting audio by less than this (in seconds) " 59 "are truncated, else rejected.");
67 std::string wav_rspecifier = po.
GetArg(1);
68 std::string segments_rxfilename = po.
GetArg(2);
69 std::string wav_wspecifier = po.
GetArg(3);
73 Input ki(segments_rxfilename);
75 int32 num_lines = 0, num_success = 0;
79 while (std::getline(ki.
Stream(), line)) {
81 std::vector<std::string> split_line;
86 if (split_line.size() != 4 && split_line.size() != 5) {
87 KALDI_WARN <<
"Invalid line in segments file: " << line;
90 std::string segment = split_line[0],
91 recording = split_line[1],
92 start_str = split_line[2],
93 end_str = split_line[3];
99 KALDI_WARN <<
"Invalid line in segments file [bad start]: " << line;
103 KALDI_WARN <<
"Invalid line in segments file [bad end]: " << line;
108 if (start < 0 || (end != -1.0 && end <= 0) ||
109 ((start >= end) && (end > 0))) {
110 KALDI_WARN << (
"Invalid line in segments file " 111 "[empty or invalid segment]: ") << line;
116 if (split_line.size() == 5) {
118 KALDI_WARN <<
"Invalid line in segments file [bad channel]: " << line;
124 if (!reader.
HasKey(recording)) {
125 KALDI_WARN <<
"Could not find recording " << recording
126 <<
", skipping segment " << segment;
134 num_chan = wave_data.
NumRows();
135 BaseFloat file_length = num_samp / samp_freq;
138 if (start < 0 || start > file_length) {
139 KALDI_WARN <<
"Segment start is out of file data range [0, " 140 << file_length <<
"s]; skipping segment '" << line <<
"'";
146 if (end > file_length + max_overshoot) {
147 KALDI_WARN <<
"Segment end is too far out of file data range [0," 148 << file_length <<
"s]; skipping segment '" << line <<
"'";
154 if (end < 0 || end > file_length) end = file_length;
157 if (end - start < min_segment_length) {
158 KALDI_WARN <<
"Segment " << segment <<
" too short, skipping it.";
165 if (num_chan == 1) channel = 0;
167 KALDI_ERR << (
"Your data has multiple channels. You must " 168 "specify the channel in the segments file. " 169 "Skipping segment ") << segment;
172 if (channel >= num_chan) {
173 KALDI_WARN <<
"Invalid channel " << channel <<
" >= " << num_chan
174 <<
". Skipping segment " << segment;
181 int32 start_samp =
static_cast<int32>(start * samp_freq + 0.5f),
182 end_samp = static_cast<int32>(end * samp_freq + 0.5f);
184 if (end_samp > num_samp)
189 start_samp, end_samp - start_samp);
190 WaveData segment_wave(samp_freq, segment_matrix);
191 writer.
Write(segment, segment_wave);
194 KALDI_LOG <<
"Successfully processed " << num_success <<
" lines out of " 195 << num_lines <<
" in the segments file. ";
197 }
catch(
const std::exception &e) {
198 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
BaseFloat SampFreq() const
const Matrix< BaseFloat > & Data() const
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const T & Value(const std::string &key)
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
bool ConvertStringToReal(const std::string &str, T *out)
ConvertStringToReal converts a string into either float or double and returns false if there was any ...
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
This class's purpose is to read in Wave files.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Sub-matrix representation.