133     using namespace kaldi;
   136         "Corrupts the wave files supplied via input pipe with the specified\n"   137         "room-impulse response (rir_matrix) and additive noise distortions\n"   138         "(specified by corresponding files).\n"   139         "Usage:  wav-reverberate [options...] <wav-in-rxfilename> "   140         "<wav-out-wxfilename>\n"   142         "wav-reverberate --duration=20.25 --impulse-response=rir.wav "   143         "--additive-signals='noise1.wav,noise2.wav' --snrs='20.0,15.0' "   144         "--start-times='0,17.8' input.wav output.wav\n";
   147     std::string rir_file;
   148     std::string additive_signals;
   150     std::string start_times;
   151     bool multi_channel_output = 
false;
   152     bool shift_output = 
true;
   153     int32 input_channel = 0;
   154     int32 rir_channel = 0;
   155     int32 noise_channel = 0;
   156     bool normalize_output = 
true;
   160     po.Register(
"multi-channel-output", &multi_channel_output,
   161                 "Specifies if the output should be multi-channel or not");
   162     po.Register(
"shift-output", &shift_output,
   163                 "If true, the reverberated waveform will be shifted by the "   164                 "amount of the peak position of the RIR and the length of "   165                 "the output waveform will be equal to the input waveform. "   166                 "If false, the length of the output waveform will be "   167                 "equal to (original input length + rir length - 1). "   168                 "This value is true by default and "   169                 "it only affects the output when RIR file is provided.");
   170     po.Register(
"input-wave-channel", &input_channel,
   171                 "Specifies the channel to be used from input as only a "   172                 "single channel will be used to generate reverberated output");
   173     po.Register(
"rir-channel", &rir_channel,
   174                 "Specifies the channel of the room impulse response, "   175                 "it will only be used when multi-channel-output is false");
   176     po.Register(
"noise-channel", &noise_channel,
   177                 "Specifies the channel of the noise file, "   178                 "it will only be used when multi-channel-output is false");
   179     po.Register(
"impulse-response", &rir_file,
   180                 "File with the impulse response for reverberating the input wave"   181                 "It can be either a file in wav format or a piped command. "   182                 "E.g. --impulse-response='rir.wav' or 'sox rir.wav - |' ");
   183     po.Register(
"additive-signals", &additive_signals,
   184                 "A comma separated list of additive signals. "   185                 "They can be either filenames or piped commands. "   186                 "E.g. --additive-signals='noise1.wav,noise2.wav' or "   187                 "'sox noise1.wav - |,sox noise2.wav - |'. "   188                 "Requires --snrs and --start-times.");
   189     po.Register(
"snrs", &snrs,
   190                 "A comma separated list of SNRs(dB). "   191                 "The additive signals will be scaled according to these SNRs. "   192                 "E.g. --snrs='20.0,0.0,5.0,10.0' ");
   193     po.Register(
"start-times", &start_times,
   194                 "A comma separated list of start times referring to the "   195                 "input signal. The additive signals will be added to the "   196                 "input signal starting at the offset. If the start time "   197                 "exceed the length of the input signal, the addition will "   199     po.Register(
"normalize-output", &normalize_output,
   200                 "If true, then after reverberating and "   201                 "possibly adding noise, scale so that the signal "   202                 "energy is the same as the original input signal. "   203                 "See also the --volume option.");
   204     po.Register(
"duration", &duration,
   205                 "If nonzero, it specified the duration (secs) of the output "   206                 "signal. If the duration t is less than the length of the "   207                 "input signal, the first t secs of the signal is trimmed, "   208                 "otherwise, the signal will be repeated to "   209                 "fulfill the duration specified.");
   210     po.Register(
"volume", &volume,
   211                 "If nonzero, a scaling factor on the signal that is applied "   212                 "after reverberating and possibly adding noise. "   213                 "If you set this option to a nonzero value, it will be as "   214                 "if you had also specified --normalize-output=false.");
   217     if (po.NumArgs() != 2) {
   222     if (multi_channel_output) {
   223       if (rir_channel != 0 || noise_channel != 0)
   224         KALDI_WARN << 
"options for --rir-channel and --noise-channel"   225                       "are ignored as --multi-channel-output is true.";
   228     std::string input_wave_file = po.GetArg(1);
   229     std::string output_wave_file = po.GetArg(2);
   234       Input ki(input_wave_file);
   235       waveholder.
Read(ki.Stream());
   236       input_wave = waveholder.
Value();
   242           num_input_channel = input_matrix.
NumRows();  
   243     KALDI_VLOG(1) << 
"sampling frequency of input: " << samp_freq_input
   244                   << 
" #samples: " << num_samp_input
   245                   << 
" #channel: " << num_input_channel;
   249     BaseFloat samp_freq_rir = samp_freq_input;
   250     int32 num_samp_rir = 0,
   252     if (!rir_file.empty()) {
   257         waveholder.
Read(ki.Stream());
   258         rir_wave = waveholder.
Value();
   260       rir_matrix = rir_wave.
Data();
   261       samp_freq_rir = rir_wave.
SampFreq();
   262       num_samp_rir = rir_matrix.
NumCols();
   263       num_rir_channel = rir_matrix.
NumRows();
   264       KALDI_VLOG(1) << 
"sampling frequency of rir: " << samp_freq_rir
   265                     << 
" #samples: " << num_samp_rir
   266                     << 
" #channel: " << num_rir_channel;
   267       if (!multi_channel_output) {
   272     std::vector<Matrix<BaseFloat> > additive_signal_matrices;
   273     if (!additive_signals.empty()) {
   274       if (snrs.empty() || start_times.empty())
   275         KALDI_ERR << 
"--additive-signals option requires "   276                      "--snrs and --start-times to be set.";
   277       std::vector<std::string> split_string;
   279       for (
size_t i = 0; 
i < split_string.size(); 
i++) {
   281         Input ki(split_string[
i]);
   282         waveholder.
Read(ki.Stream());
   288               num_channel = additive_signal_matrix.
NumRows();
   289         KALDI_VLOG(1) << 
"sampling frequency of additive signal: " << samp_freq
   290                       << 
" #samples: " << num_samp
   291                       << 
" #channel: " << num_channel;
   292         if (multi_channel_output) {
   298         additive_signal_matrices.push_back(additive_signal_matrix);
   302     std::vector<BaseFloat> snr_vector;
   307     std::vector<BaseFloat> start_time_vector;
   308     if (!start_times.empty()) {
   312     int32 shift_index = 0;
   313     int32 num_output_channels = (multi_channel_output ? num_rir_channel : 1);
   314     int32 num_samp_output = (duration > 0 ? samp_freq_input * duration :
   315                               (shift_output ? num_samp_input :
   316                                               num_samp_input + num_samp_rir - 1));
   319     for (
int32 output_channel = 0; output_channel < num_output_channels; output_channel++) {
   321       input.CopyRowFromMat(input_matrix, input_channel);
   322       float power_before_reverb = 
VecVec(input, input) / input.Dim();
   324       int32 this_rir_channel = (multi_channel_output ? output_channel : rir_channel);
   326       float early_energy = power_before_reverb;
   327       if (!rir_file.empty()) {
   331         rir.
Scale(1.0 / (1 << 15));
   336           rir.
Max(&shift_index);
   340       if (additive_signal_matrices.size() > 0) {
   342         int32 this_noise_channel = (multi_channel_output ? output_channel : noise_channel);
   343         KALDI_ASSERT(additive_signal_matrices.size() == snr_vector.size());
   344         KALDI_ASSERT(additive_signal_matrices.size() == start_time_vector.size());
   345         for (
int32 i = 0; 
i < additive_signal_matrices.size(); 
i++) {
   346           noise.Resize(additive_signal_matrices[
i].NumCols());
   347           noise.CopyRowFromMat(additive_signal_matrices[
i], this_noise_channel);
   348           AddNoise(&noise, snr_vector[i], start_time_vector[i],
   349                     samp_freq_input, early_energy, &input);
   353       float power_after_reverb = 
VecVec(input, input) / input.Dim();
   357       else if (normalize_output)
   358         input.Scale(sqrt(power_before_reverb / power_after_reverb));
   360       if (num_samp_output <= num_samp_input) {
   362         out_matrix.CopyRowFromVec(input.Range(shift_index, num_samp_output), output_channel);
   366         extended_input.SetZero();
   368         out_matrix.CopyRowFromVec(extended_input, output_channel);
   372     WaveData out_wave(samp_freq_input, out_matrix);
   373     Output ko(output_wave_file, 
false);
   374     out_wave.Write(ko.Stream());
   377   } 
catch(
const std::exception &e) {
   378     std::cerr << e.what();
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
bool Read(std::istream &is)
 
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix). 
 
void ReadCommaSeparatedCommand(const std::string &s, std::vector< BaseFloat > *v)
 
float DoReverberation(const Vector< BaseFloat > &rir, BaseFloat samp_freq, Vector< BaseFloat > *signal)
 
BaseFloat SampFreq() const
 
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero). 
 
const Matrix< BaseFloat > & Data() const
 
void CopyRowFromMat(const MatrixBase< Real > &M, MatrixIndexT row)
Extracts a row of the matrix M. 
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
void AddVectorsOfUnequalLength(const VectorBase< BaseFloat > &signal1, Vector< BaseFloat > *signal2)
 
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters. 
 
Real Max() const
Returns the maximum value of any element, or -infinity for the empty vector. 
 
void Scale(Real alpha)
Multiplies all elements by this constant. 
 
This class's purpose is to read in Wave files. 
 
A class representing a vector. 
 
#define KALDI_ASSERT(cond)
 
void AddNoise(Vector< BaseFloat > *noise, BaseFloat snr_db, BaseFloat time, BaseFloat samp_freq, BaseFloat signal_power, Vector< BaseFloat > *signal)
 
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix). 
 
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.