compress-uncompress-speex.cc
Go to the documentation of this file.
1 // online2bin/compress-uncompress-speex.cc
2 
3 // 2014 IMSL, PKU-HKUST (author: Wei Shi)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include <vector>
21 #include "base/kaldi-common.h"
22 #include "util/common-utils.h"
23 #include "feat/wave-reader.h"
25 
26 
27 int main(int argc, char *argv[]) {
28  try {
29  typedef kaldi::int32 int32;
30  using namespace kaldi;
31  const char *usage =
32  "Demonstrating how to use the Speex wrapper in Kaldi by compressing input waveforms \n"
33  "chunk by chunk and then decompressing them.\n"
34  "\n"
35  "Usage: compress-uncompress-speex [options] <wav-rspecifier> <wav-wspecifier>\n";
36 
37  ParseOptions po(usage);
38  SpeexOptions spx_config;
39  BaseFloat chunk_length_secs = 0.05;
40 
41  po.Register("chunk-length", &chunk_length_secs,
42  "Length of chunk size in seconds, that we process.");
43 
44  spx_config.Register(&po);
45 
46  po.Read(argc, argv);
47 
48  if (po.NumArgs() != 2) {
49  po.PrintUsage();
50  exit(1);
51  }
52 
53  std::string wav_rspecifier = po.GetArg(1);
54  std::string wav_wspecifier = po.GetArg(2);
55 
56  SequentialTableReader<WaveHolder> reader(wav_rspecifier);
57  TableWriter<WaveHolder> writer(wav_wspecifier);
58  int32 num_success = 0;
59 
60  for(; !reader.Done(); reader.Next()){
61  std::string wav_key = reader.Key();
62  const WaveData &wave = reader.Value();
63 
64  BaseFloat samp_freq = wave.SampFreq(); // read sampling fequency
65  const Matrix<BaseFloat> &wave_data = wave.Data();
66  int32 num_chan = wave_data.NumRows(); // number of channels in recording
67 
68  Matrix<BaseFloat> new_wave(wave_data.NumRows(), wave_data.NumCols());
69  for(int32 i = 0; i < num_chan; i++){
70  OnlineSpeexEncoder spx_encoder(spx_config);
71  OnlineSpeexDecoder spx_decoder(spx_config);
72  Vector<BaseFloat> wav_this_chan(wave_data.Row(i));
73  Vector<BaseFloat> wav_decode(wav_this_chan.Dim());
74 
75  int32 samp_offset = 0, decode_sample_offset = 0,
76  max_samp = samp_freq * chunk_length_secs;
77  while (samp_offset < wav_this_chan.Dim()) {
78  int32 this_num_samp = max_samp;
79  if (this_num_samp > wav_this_chan.Dim() - samp_offset)
80  this_num_samp = wav_this_chan.Dim() - samp_offset;
81  SubVector<BaseFloat> wave_part(wav_this_chan, samp_offset,
82  this_num_samp);
83 
84  spx_encoder.AcceptWaveform(samp_freq, wave_part);
85  if (this_num_samp == wav_this_chan.Dim() - samp_offset) // no more input.
86  spx_encoder.InputFinished();
87  std::vector<char> speex_bits_part;
88  spx_encoder.GetSpeexBits(&speex_bits_part);
89 
90  Vector<BaseFloat> wave_part_spx;
91  spx_decoder.AcceptSpeexBits(speex_bits_part);
92  spx_decoder.GetWaveform(&wave_part_spx);
93 
94  int32 decode_num_samp = wave_part_spx.Dim();
95  if (decode_sample_offset + decode_num_samp > wav_this_chan.Dim()) {
96  int32 num_samp_last = wav_this_chan.Dim() - decode_sample_offset;
97  SubVector<BaseFloat> wave_part_tmp(wave_part_spx,0,num_samp_last);
98 
99  wav_decode.Range(decode_sample_offset, num_samp_last).
100  CopyFromVec(wave_part_tmp);
101  decode_sample_offset += num_samp_last;
102  } else {
103  wav_decode.Range(decode_sample_offset, decode_num_samp).
104  CopyFromVec(wave_part_spx);
105  decode_sample_offset += wave_part_spx.Dim();
106  }
107 
108  samp_offset += this_num_samp;
109  }
110 
111  new_wave.CopyRowFromVec(wav_decode, i);
112  }
113  WaveData wave_out(samp_freq, new_wave);
114  writer.Write(wav_key, wave_out);
115  num_success++;
116  }
117  KALDI_LOG << "Successfully processed " << num_success << " files.";
118  return 0;
119  } catch(const std::exception &e) {
120  std::cerr << e.what();
121  return -1;
122  }
123 }
124 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void GetWaveform(Vector< BaseFloat > *waveform)
int main(int argc, char *argv[])
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
void AcceptWaveform(int32 sample_rate, const VectorBase< BaseFloat > &waveform)
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
BaseFloat SampFreq() const
Definition: wave-reader.h:126
void AcceptSpeexBits(const std::vector< char > &spx_enc_bits)
const Matrix< BaseFloat > & Data() const
Definition: wave-reader.h:124
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
This class&#39;s purpose is to read in Wave files.
Definition: wave-reader.h:106
int NumArgs() const
Number of positional parameters (c.f. argc-1).
A class representing a vector.
Definition: kaldi-vector.h:406
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void Register(OptionsItf *opts)
#define KALDI_LOG
Definition: kaldi-error.h:153
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
void GetSpeexBits(std::vector< char > *spx_bits)