133 using namespace kaldi;
136 "Corrupts the wave files supplied via input pipe with the specified\n" 137 "room-impulse response (rir_matrix) and additive noise distortions\n" 138 "(specified by corresponding files).\n" 139 "Usage: wav-reverberate [options...] <wav-in-rxfilename> " 140 "<wav-out-wxfilename>\n" 142 "wav-reverberate --duration=20.25 --impulse-response=rir.wav " 143 "--additive-signals='noise1.wav,noise2.wav' --snrs='20.0,15.0' " 144 "--start-times='0,17.8' input.wav output.wav\n";
147 std::string rir_file;
148 std::string additive_signals;
150 std::string start_times;
151 bool multi_channel_output =
false;
152 bool shift_output =
true;
153 int32 input_channel = 0;
154 int32 rir_channel = 0;
155 int32 noise_channel = 0;
156 bool normalize_output =
true;
160 po.Register(
"multi-channel-output", &multi_channel_output,
161 "Specifies if the output should be multi-channel or not");
162 po.Register(
"shift-output", &shift_output,
163 "If true, the reverberated waveform will be shifted by the " 164 "amount of the peak position of the RIR and the length of " 165 "the output waveform will be equal to the input waveform. " 166 "If false, the length of the output waveform will be " 167 "equal to (original input length + rir length - 1). " 168 "This value is true by default and " 169 "it only affects the output when RIR file is provided.");
170 po.Register(
"input-wave-channel", &input_channel,
171 "Specifies the channel to be used from input as only a " 172 "single channel will be used to generate reverberated output");
173 po.Register(
"rir-channel", &rir_channel,
174 "Specifies the channel of the room impulse response, " 175 "it will only be used when multi-channel-output is false");
176 po.Register(
"noise-channel", &noise_channel,
177 "Specifies the channel of the noise file, " 178 "it will only be used when multi-channel-output is false");
179 po.Register(
"impulse-response", &rir_file,
180 "File with the impulse response for reverberating the input wave" 181 "It can be either a file in wav format or a piped command. " 182 "E.g. --impulse-response='rir.wav' or 'sox rir.wav - |' ");
183 po.Register(
"additive-signals", &additive_signals,
184 "A comma separated list of additive signals. " 185 "They can be either filenames or piped commands. " 186 "E.g. --additive-signals='noise1.wav,noise2.wav' or " 187 "'sox noise1.wav - |,sox noise2.wav - |'. " 188 "Requires --snrs and --start-times.");
189 po.Register(
"snrs", &snrs,
190 "A comma separated list of SNRs(dB). " 191 "The additive signals will be scaled according to these SNRs. " 192 "E.g. --snrs='20.0,0.0,5.0,10.0' ");
193 po.Register(
"start-times", &start_times,
194 "A comma separated list of start times referring to the " 195 "input signal. The additive signals will be added to the " 196 "input signal starting at the offset. If the start time " 197 "exceed the length of the input signal, the addition will " 199 po.Register(
"normalize-output", &normalize_output,
200 "If true, then after reverberating and " 201 "possibly adding noise, scale so that the signal " 202 "energy is the same as the original input signal. " 203 "See also the --volume option.");
204 po.Register(
"duration", &duration,
205 "If nonzero, it specified the duration (secs) of the output " 206 "signal. If the duration t is less than the length of the " 207 "input signal, the first t secs of the signal is trimmed, " 208 "otherwise, the signal will be repeated to " 209 "fulfill the duration specified.");
210 po.Register(
"volume", &volume,
211 "If nonzero, a scaling factor on the signal that is applied " 212 "after reverberating and possibly adding noise. " 213 "If you set this option to a nonzero value, it will be as " 214 "if you had also specified --normalize-output=false.");
217 if (po.NumArgs() != 2) {
222 if (multi_channel_output) {
223 if (rir_channel != 0 || noise_channel != 0)
224 KALDI_WARN <<
"options for --rir-channel and --noise-channel" 225 "are ignored as --multi-channel-output is true.";
228 std::string input_wave_file = po.GetArg(1);
229 std::string output_wave_file = po.GetArg(2);
234 Input ki(input_wave_file);
235 waveholder.
Read(ki.Stream());
236 input_wave = waveholder.
Value();
242 num_input_channel = input_matrix.
NumRows();
243 KALDI_VLOG(1) <<
"sampling frequency of input: " << samp_freq_input
244 <<
" #samples: " << num_samp_input
245 <<
" #channel: " << num_input_channel;
249 BaseFloat samp_freq_rir = samp_freq_input;
250 int32 num_samp_rir = 0,
252 if (!rir_file.empty()) {
257 waveholder.
Read(ki.Stream());
258 rir_wave = waveholder.
Value();
260 rir_matrix = rir_wave.
Data();
261 samp_freq_rir = rir_wave.
SampFreq();
262 num_samp_rir = rir_matrix.
NumCols();
263 num_rir_channel = rir_matrix.
NumRows();
264 KALDI_VLOG(1) <<
"sampling frequency of rir: " << samp_freq_rir
265 <<
" #samples: " << num_samp_rir
266 <<
" #channel: " << num_rir_channel;
267 if (!multi_channel_output) {
272 std::vector<Matrix<BaseFloat> > additive_signal_matrices;
273 if (!additive_signals.empty()) {
274 if (snrs.empty() || start_times.empty())
275 KALDI_ERR <<
"--additive-signals option requires " 276 "--snrs and --start-times to be set.";
277 std::vector<std::string> split_string;
279 for (
size_t i = 0;
i < split_string.size();
i++) {
281 Input ki(split_string[
i]);
282 waveholder.
Read(ki.Stream());
288 num_channel = additive_signal_matrix.
NumRows();
289 KALDI_VLOG(1) <<
"sampling frequency of additive signal: " << samp_freq
290 <<
" #samples: " << num_samp
291 <<
" #channel: " << num_channel;
292 if (multi_channel_output) {
298 additive_signal_matrices.push_back(additive_signal_matrix);
302 std::vector<BaseFloat> snr_vector;
307 std::vector<BaseFloat> start_time_vector;
308 if (!start_times.empty()) {
312 int32 shift_index = 0;
313 int32 num_output_channels = (multi_channel_output ? num_rir_channel : 1);
314 int32 num_samp_output = (duration > 0 ? samp_freq_input * duration :
315 (shift_output ? num_samp_input :
316 num_samp_input + num_samp_rir - 1));
319 for (
int32 output_channel = 0; output_channel < num_output_channels; output_channel++) {
321 input.CopyRowFromMat(input_matrix, input_channel);
322 float power_before_reverb =
VecVec(input, input) / input.Dim();
324 int32 this_rir_channel = (multi_channel_output ? output_channel : rir_channel);
326 float early_energy = power_before_reverb;
327 if (!rir_file.empty()) {
331 rir.
Scale(1.0 / (1 << 15));
336 rir.
Max(&shift_index);
340 if (additive_signal_matrices.size() > 0) {
342 int32 this_noise_channel = (multi_channel_output ? output_channel : noise_channel);
343 KALDI_ASSERT(additive_signal_matrices.size() == snr_vector.size());
344 KALDI_ASSERT(additive_signal_matrices.size() == start_time_vector.size());
345 for (
int32 i = 0;
i < additive_signal_matrices.size();
i++) {
346 noise.Resize(additive_signal_matrices[
i].NumCols());
347 noise.CopyRowFromMat(additive_signal_matrices[
i], this_noise_channel);
348 AddNoise(&noise, snr_vector[i], start_time_vector[i],
349 samp_freq_input, early_energy, &input);
353 float power_after_reverb =
VecVec(input, input) / input.Dim();
357 else if (normalize_output)
358 input.Scale(sqrt(power_before_reverb / power_after_reverb));
360 if (num_samp_output <= num_samp_input) {
362 out_matrix.CopyRowFromVec(input.Range(shift_index, num_samp_output), output_channel);
366 extended_input.SetZero();
368 out_matrix.CopyRowFromVec(extended_input, output_channel);
372 WaveData out_wave(samp_freq_input, out_matrix);
373 Output ko(output_wave_file,
false);
374 out_wave.Write(ko.Stream());
377 }
catch(
const std::exception &e) {
378 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool Read(std::istream &is)
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
void ReadCommaSeparatedCommand(const std::string &s, std::vector< BaseFloat > *v)
float DoReverberation(const Vector< BaseFloat > &rir, BaseFloat samp_freq, Vector< BaseFloat > *signal)
BaseFloat SampFreq() const
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
const Matrix< BaseFloat > & Data() const
void CopyRowFromMat(const MatrixBase< Real > &M, MatrixIndexT row)
Extracts a row of the matrix M.
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void AddVectorsOfUnequalLength(const VectorBase< BaseFloat > &signal1, Vector< BaseFloat > *signal2)
void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< std::string > *out)
Split a string using any of the single character delimiters.
Real Max() const
Returns the maximum value of any element, or -infinity for the empty vector.
void Scale(Real alpha)
Multiplies all elements by this constant.
This class's purpose is to read in Wave files.
A class representing a vector.
#define KALDI_ASSERT(cond)
void AddNoise(Vector< BaseFloat > *noise, BaseFloat snr_db, BaseFloat time, BaseFloat samp_freq, BaseFloat signal_power, Vector< BaseFloat > *signal)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.