46 using namespace kaldi;
48 "Extend wave data with a fairly long silence at the end (e.g. 5 seconds).\n" 49 "The input waveforms are assumed having silences at the begin/end and those\n" 50 "segments are extracted and appended to the end of the utterance.\n" 51 "Note this is for use in testing endpointing in decoding.\n" 53 "Usage: extend-wav-with-silence [options] <wav-rspecifier> <wav-wspecifier>\n" 54 " extend-wav-with-silence [options] <wav-rxfilename> <wav-wxfilename>\n";
59 sil_extract_len = 0.05,
60 sil_extract_shift = 0.025;
61 po.Register(
"extra-silence-length", &sil_len,
"the length of silence that will be " 62 "appended to the end of each waveform, in seconds.");
63 po.Register(
"silence-search-length", &sil_search_len,
"the length at the beginning " 64 "or end of each waveform in which to search for the quietest segment of " 65 "silence, in seconds.");
66 po.Register(
"silence-extract-length", &sil_extract_len,
"the length of silence segments " 67 "to be extracted from the waveform, which must be smaller than silence-" 68 "search-length, in seconds.");
69 po.Register(
"silence-extract-shift", &sil_extract_shift,
"the shift length when searching " 70 "for segments of silences, typically samller than silence-extract-length, " 75 if (po.NumArgs() != 2) {
83 int32 num_success = 0;
85 for(; !reader.Done(); reader.Next()){
86 std::string wav_key = reader.Key();
87 const WaveData &wave = reader.Value();
90 int32 num_chan = wave_data.
NumRows(),
91 num_ext_samp = (
int32)(samp_freq * sil_len);
94 for(int32
i = 0;
i < num_chan;
i++){
95 Vector<BaseFloat> wav_this_chan(wave_data.
Row(
i));
96 Vector<BaseFloat> wav_extend(wav_this_chan.Dim() + num_ext_samp);
98 sil_search_len, sil_extract_len, sil_extract_shift);
99 KALDI_ASSERT(wav_extend.Dim() == wav_this_chan.Dim() + num_ext_samp);
100 new_wave.CopyRowFromVec(wav_extend,
i);
102 WaveData wave_out(samp_freq, new_wave);
103 writer.Write(wav_key, wave_out);
106 KALDI_LOG <<
"Successfully extended " << num_success <<
" files.";
109 std::string wav_rxfilename = po.GetArg(1);
110 std::string wav_wxfilename = po.GetArg(2);
112 Input ki(wav_rxfilename, &binary);
114 if (!wh.
Read(ki.Stream())) {
123 int32 num_chan = wave_data.
NumRows(),
124 num_ext_samp = (
int32)(samp_freq * sil_len);
127 for(int32
i = 0;
i < num_chan;
i++){
128 Vector<BaseFloat> wav_this_chan(wave_data.
Row(
i));
129 Vector<BaseFloat> wav_extend(wav_this_chan.Dim() + num_ext_samp);
131 sil_search_len, sil_extract_len, sil_extract_shift);
132 KALDI_ASSERT(wav_extend.Dim() == wav_this_chan.Dim() + num_ext_samp);
133 new_wave.CopyRowFromVec(wav_extend,
i);
135 WaveData wave_out(samp_freq, new_wave);
137 Output ko(wav_wxfilename, binary,
false);
144 }
catch(
const std::exception &e) {
145 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool Read(std::istream &is)
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
A templated class for writing objects to an archive or script file; see The Table concept...
BaseFloat SampFreq() const
const Matrix< BaseFloat > & Data() const
RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename, RspecifierOptions *opts)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
This class's purpose is to read in Wave files.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
std::string PrintableRxfilename(const std::string &rxfilename)
PrintableRxfilename turns the rxfilename into a more human-readable form for error reporting...
std::string PrintableWxfilename(const std::string &wxfilename)
PrintableWxfilename turns the wxfilename into a more human-readable form for error reporting...
static bool Write(std::ostream &os, bool binary, const T &t)
void ExtendWaveWithSilence(const Vector< BaseFloat > &wav_in, BaseFloat samp_rate, Vector< BaseFloat > *wav_out, BaseFloat sil_search_len, BaseFloat sil_extract_len, BaseFloat sil_extract_shift)