27 using namespace kaldi;
29 "Read wav files and output an archive consisting of a single float:\n" 30 "the duration of each one in seconds.\n" 31 "Usage: wav-to-duration [options...] <wav-rspecifier> <duration-wspecifier>\n" 32 "E.g.: wav-to-duration scp:wav.scp ark,t:-\n" 33 "See also: wav-copy extract-segments feat-to-len\n" 34 "Currently this program may output a lot of harmless warnings regarding\n" 35 "nonzero exit status of pipes\n";
37 bool read_entire_file =
false;
41 po.Register(
"read-entire-file", &read_entire_file,
"If true, use regular WaveHolder " 42 "instead of WaveInfoHolder to ensure the returned duration is correct.");
46 if (po.NumArgs() != 2) {
51 std::string wav_rspecifier = po.GetArg(1),
52 duration_wspecifier = po.GetArg(2);
55 double sum_duration = 0.0,
56 min_duration = std::numeric_limits<BaseFloat>::infinity(),
61 if (read_entire_file) {
63 for (; !wav_reader.Done(); wav_reader.Next()) {
64 std::string key = wav_reader.Key();
65 const WaveData &wave_data = wav_reader.Value();
67 duration_writer.Write(key, duration);
69 sum_duration += duration;
70 min_duration = std::min<double>(min_duration, duration);
71 max_duration = std::max<double>(max_duration, duration);
76 for (; !wav_reader.Done(); wav_reader.Next()) {
77 std::string key = wav_reader.Key();
78 const WaveInfo &wave_info = wav_reader.Value();
80 KALDI_ERR <<
"Error: member " << key <<
" has no duration in header. " 81 <<
"Check the source, and/or try --read-entire-file.";
83 duration_writer.Write(key, duration);
85 sum_duration += duration;
86 min_duration = std::min<double>(min_duration, duration);
87 max_duration = std::max<double>(max_duration, duration);
92 KALDI_LOG <<
"Printed duration for " << num_done <<
" audio files.";
94 KALDI_LOG <<
"Mean duration was " << (sum_duration / num_done)
95 <<
", min and max durations were " << min_duration <<
", " 98 return (num_done != 0 ? 0 : 1);
99 }
catch(
const std::exception &e) {
100 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
A templated class for writing objects to an archive or script file; see The Table concept...
BaseFloat Duration() const
Approximate duration, seconds. Invalid if IsStreamed() is true.
bool IsStreamed() const
Is stream size unknown? Duration and SampleCount not valid if true.
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
This class reads and hold wave file header information.
This class's purpose is to read in Wave files.
BaseFloat Duration() const