27 int main(
int argc,
char *argv[]) {
30 using namespace kaldi;
32 "Demonstrating how to use the Speex wrapper in Kaldi by compressing input waveforms \n" 33 "chunk by chunk and then decompressing them.\n" 35 "Usage: compress-uncompress-speex [options] <wav-rspecifier> <wav-wspecifier>\n";
41 po.
Register(
"chunk-length", &chunk_length_secs,
42 "Length of chunk size in seconds, that we process.");
53 std::string wav_rspecifier = po.
GetArg(1);
54 std::string wav_wspecifier = po.
GetArg(2);
58 int32 num_success = 0;
60 for(; !reader.
Done(); reader.
Next()){
61 std::string wav_key = reader.
Key();
66 int32 num_chan = wave_data.
NumRows();
69 for(int32
i = 0;
i < num_chan;
i++){
75 int32 samp_offset = 0, decode_sample_offset = 0,
76 max_samp = samp_freq * chunk_length_secs;
77 while (samp_offset < wav_this_chan.
Dim()) {
78 int32 this_num_samp = max_samp;
79 if (this_num_samp > wav_this_chan.Dim() - samp_offset)
80 this_num_samp = wav_this_chan.
Dim() - samp_offset;
85 if (this_num_samp == wav_this_chan.Dim() - samp_offset)
87 std::vector<char> speex_bits_part;
94 int32 decode_num_samp = wave_part_spx.
Dim();
95 if (decode_sample_offset + decode_num_samp > wav_this_chan.Dim()) {
96 int32 num_samp_last = wav_this_chan.Dim() - decode_sample_offset;
99 wav_decode.Range(decode_sample_offset, num_samp_last).
100 CopyFromVec(wave_part_tmp);
101 decode_sample_offset += num_samp_last;
103 wav_decode.Range(decode_sample_offset, decode_num_samp).
104 CopyFromVec(wave_part_spx);
105 decode_sample_offset += wave_part_spx.
Dim();
108 samp_offset += this_num_samp;
111 new_wave.CopyRowFromVec(wav_decode,
i);
113 WaveData wave_out(samp_freq, new_wave);
114 writer.
Write(wav_key, wave_out);
117 KALDI_LOG <<
"Successfully processed " << num_success <<
" files.";
119 }
catch(
const std::exception &e) {
120 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void GetWaveform(Vector< BaseFloat > *waveform)
int main(int argc, char *argv[])
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
void AcceptWaveform(int32 sample_rate, const VectorBase< BaseFloat > &waveform)
A templated class for writing objects to an archive or script file; see The Table concept...
BaseFloat SampFreq() const
void AcceptSpeexBits(const std::vector< char > &spx_enc_bits)
const Matrix< BaseFloat > & Data() const
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
MatrixIndexT Dim() const
Returns the dimension of the vector.
This class's purpose is to read in Wave files.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
A class representing a vector.
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void Register(OptionsItf *opts)
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
void GetSpeexBits(std::vector< char > *spx_bits)