compress-uncompress-speex.cc File Reference
#include <vector>
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "feat/wave-reader.h"
#include "online2/online-speex-wrapper.h"
Include dependency graph for compress-uncompress-speex.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 27 of file compress-uncompress-speex.cc.

References OnlineSpeexDecoder::AcceptSpeexBits(), OnlineSpeexEncoder::AcceptWaveform(), WaveData::Data(), VectorBase< Real >::Dim(), SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), OnlineSpeexEncoder::GetSpeexBits(), OnlineSpeexDecoder::GetWaveform(), rnnlm::i, OnlineSpeexEncoder::InputFinished(), KALDI_LOG, SequentialTableReader< Holder >::Key(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), MatrixBase< Real >::NumCols(), MatrixBase< Real >::NumRows(), ParseOptions::PrintUsage(), ParseOptions::Read(), SpeexOptions::Register(), ParseOptions::Register(), MatrixBase< Real >::Row(), WaveData::SampFreq(), SequentialTableReader< Holder >::Value(), and TableWriter< Holder >::Write().

27  {
28  try {
29  typedef kaldi::int32 int32;
30  using namespace kaldi;
31  const char *usage =
32  "Demonstrating how to use the Speex wrapper in Kaldi by compressing input waveforms \n"
33  "chunk by chunk and then decompressing them.\n"
34  "\n"
35  "Usage: compress-uncompress-speex [options] <wav-rspecifier> <wav-wspecifier>\n";
36 
37  ParseOptions po(usage);
38  SpeexOptions spx_config;
39  BaseFloat chunk_length_secs = 0.05;
40 
41  po.Register("chunk-length", &chunk_length_secs,
42  "Length of chunk size in seconds, that we process.");
43 
44  spx_config.Register(&po);
45 
46  po.Read(argc, argv);
47 
48  if (po.NumArgs() != 2) {
49  po.PrintUsage();
50  exit(1);
51  }
52 
53  std::string wav_rspecifier = po.GetArg(1);
54  std::string wav_wspecifier = po.GetArg(2);
55 
56  SequentialTableReader<WaveHolder> reader(wav_rspecifier);
57  TableWriter<WaveHolder> writer(wav_wspecifier);
58  int32 num_success = 0;
59 
60  for(; !reader.Done(); reader.Next()){
61  std::string wav_key = reader.Key();
62  const WaveData &wave = reader.Value();
63 
64  BaseFloat samp_freq = wave.SampFreq(); // read sampling fequency
65  const Matrix<BaseFloat> &wave_data = wave.Data();
66  int32 num_chan = wave_data.NumRows(); // number of channels in recording
67 
68  Matrix<BaseFloat> new_wave(wave_data.NumRows(), wave_data.NumCols());
69  for(int32 i = 0; i < num_chan; i++){
70  OnlineSpeexEncoder spx_encoder(spx_config);
71  OnlineSpeexDecoder spx_decoder(spx_config);
72  Vector<BaseFloat> wav_this_chan(wave_data.Row(i));
73  Vector<BaseFloat> wav_decode(wav_this_chan.Dim());
74 
75  int32 samp_offset = 0, decode_sample_offset = 0,
76  max_samp = samp_freq * chunk_length_secs;
77  while (samp_offset < wav_this_chan.Dim()) {
78  int32 this_num_samp = max_samp;
79  if (this_num_samp > wav_this_chan.Dim() - samp_offset)
80  this_num_samp = wav_this_chan.Dim() - samp_offset;
81  SubVector<BaseFloat> wave_part(wav_this_chan, samp_offset,
82  this_num_samp);
83 
84  spx_encoder.AcceptWaveform(samp_freq, wave_part);
85  if (this_num_samp == wav_this_chan.Dim() - samp_offset) // no more input.
86  spx_encoder.InputFinished();
87  std::vector<char> speex_bits_part;
88  spx_encoder.GetSpeexBits(&speex_bits_part);
89 
90  Vector<BaseFloat> wave_part_spx;
91  spx_decoder.AcceptSpeexBits(speex_bits_part);
92  spx_decoder.GetWaveform(&wave_part_spx);
93 
94  int32 decode_num_samp = wave_part_spx.Dim();
95  if (decode_sample_offset + decode_num_samp > wav_this_chan.Dim()) {
96  int32 num_samp_last = wav_this_chan.Dim() - decode_sample_offset;
97  SubVector<BaseFloat> wave_part_tmp(wave_part_spx,0,num_samp_last);
98 
99  wav_decode.Range(decode_sample_offset, num_samp_last).
100  CopyFromVec(wave_part_tmp);
101  decode_sample_offset += num_samp_last;
102  } else {
103  wav_decode.Range(decode_sample_offset, decode_num_samp).
104  CopyFromVec(wave_part_spx);
105  decode_sample_offset += wave_part_spx.Dim();
106  }
107 
108  samp_offset += this_num_samp;
109  }
110 
111  new_wave.CopyRowFromVec(wav_decode, i);
112  }
113  WaveData wave_out(samp_freq, new_wave);
114  writer.Write(wav_key, wave_out);
115  num_success++;
116  }
117  KALDI_LOG << "Successfully processed " << num_success << " files.";
118  return 0;
119  } catch(const std::exception &e) {
120  std::cerr << e.what();
121  return -1;
122  }
123 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
BaseFloat SampFreq() const
Definition: wave-reader.h:126
const Matrix< BaseFloat > & Data() const
Definition: wave-reader.h:124
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:64
This class&#39;s purpose is to read in Wave files.
Definition: wave-reader.h:106
A class representing a vector.
Definition: kaldi-vector.h:406
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void Register(OptionsItf *opts)
#define KALDI_LOG
Definition: kaldi-error.h:153
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501