41 memset(tag,
'\0',
sizeof tag);
47 KALDI_ERR <<
"WaveData: expected " << expected
48 <<
", failed to read anything";
49 if (strcmp(tag, expected))
50 KALDI_ERR <<
"WaveData: expected " << expected <<
", got " << tag;
56 KALDI_ERR <<
"WaveData: expected 4-byte chunk-name, got read error";
68 KALDI_ERR <<
"WaveData: unexpected end of file or read error";
81 KALDI_ERR <<
"WaveData: unexpected end of file or read error";
97 KALDI_ERR <<
"WaveData: error writing to stream.";
106 #ifdef __BIG_ENDIAN__ 111 KALDI_ERR <<
"WaveData: error writing to stream.";
117 if (strcmp(reader.
tag,
"RIFF") == 0)
118 reverse_bytes_ =
false;
119 else if (strcmp(reader.
tag,
"RIFX") == 0)
120 reverse_bytes_ =
true;
122 KALDI_ERR <<
"WaveData: expected RIFF or RIFX, got " << reader.
tag;
124 #ifdef __BIG_ENDIAN__ 125 reverse_bytes_ = !reverse_bytes_;
127 reader.
swap = reverse_bytes_;
132 uint32 riff_chunk_read = 0;
133 riff_chunk_read += 4;
138 riff_chunk_read += 4;
139 while (strcmp(reader.
tag,
"fmt ") != 0) {
141 riff_chunk_read += 4;
142 for (uint32
i = 0;
i < filler_size;
i++) {
145 riff_chunk_read += filler_size;
148 riff_chunk_read += 4;
159 samp_freq_ =
static_cast<BaseFloat>(sample_rate);
161 uint32 fmt_chunk_read = 16;
162 if (audio_format == 1) {
163 if (subchunk1_size < 16) {
164 KALDI_ERR <<
"WaveData: expect PCM format data to have fmt chunk " 165 <<
"of at least size 16.";
167 }
else if (audio_format == 0xFFFE) {
169 if (subchunk1_size < 40 || extra_size < 22) {
170 KALDI_ERR <<
"WaveData: malformed WAVE_FORMAT_EXTENSIBLE format data.";
185 if (guid1 != 0x00000001 || guid2 != 0x00100000 ||
186 guid3 != 0xAA000080 || guid4 != 0x719B3800) {
187 KALDI_ERR <<
"WaveData: unsupported WAVE_FORMAT_EXTENSIBLE format.";
190 KALDI_ERR <<
"WaveData: can read only PCM data, format id in file is: " 194 for (uint32
i = fmt_chunk_read;
i < subchunk1_size; ++
i)
197 if (num_channels_ == 0)
198 KALDI_ERR <<
"WaveData: no channels present";
199 if (bits_per_sample != 16)
200 KALDI_ERR <<
"WaveData: unsupported bits_per_sample = " << bits_per_sample;
201 if (byte_rate != sample_rate * bits_per_sample/8 * num_channels_)
202 KALDI_ERR <<
"Unexpected byte rate " << byte_rate <<
" vs. " 203 << sample_rate <<
" * " << (bits_per_sample/8)
204 <<
" * " << num_channels_;
205 if (block_align != num_channels_ * bits_per_sample/8)
206 KALDI_ERR <<
"Unexpected block_align: " << block_align <<
" vs. " 207 << num_channels_ <<
" * " << (bits_per_sample/8);
209 riff_chunk_read += 4 + subchunk1_size;
216 riff_chunk_read += 4;
221 while (strcmp(reader.
tag,
"data") != 0) {
224 if (chunk_sz != 4 && strcmp(reader.
tag,
"fact") == 0)
225 KALDI_WARN <<
"Expected fact chunk to be 4 bytes long.";
226 for (uint32
i = 0;
i < chunk_sz;
i++)
228 riff_chunk_read += 4 + chunk_sz;
232 riff_chunk_read += 4;
237 riff_chunk_read += 4;
241 bool is_stream_mode =
243 || riff_chunk_size == 0xFFFFFFFF
244 || data_chunk_size == 0
245 || data_chunk_size == 0xFFFFFFFF
246 || data_chunk_size == 0x7FFFF000;
249 KALDI_VLOG(1) <<
"Read in RIFF chunk size: " << riff_chunk_size
250 <<
", data chunk size: " << data_chunk_size
251 <<
". Assume 'stream mode' (reading data to EOF).";
254 && std::abs(static_cast<int64>(riff_chunk_read) +
255 static_cast<int64>(data_chunk_size) -
256 static_cast<int64>(riff_chunk_size)) > 1) {
260 KALDI_WARN <<
"Expected " << riff_chunk_size <<
" bytes in RIFF chunk, but " 261 <<
"after first data block there will be " << riff_chunk_read
262 <<
" + " << data_chunk_size <<
" bytes " 263 <<
"(we do not support reading multiple data chunks).";
269 samp_count_ = data_chunk_size / block_align;
273 const uint32 kBlockSize = 1024 * 1024;
281 std::vector<char> buffer;
286 while (is && bytes_to_go > 0) {
287 uint32 block_bytes = std::min(bytes_to_go, kBlockSize);
288 uint32 offset = buffer.size();
289 buffer.resize(offset + block_bytes);
290 is.read(&buffer[offset], block_bytes);
291 uint32 bytes_read = is.gcount();
292 buffer.resize(offset + bytes_read);
294 bytes_to_go -= bytes_read;
298 KALDI_ERR <<
"WaveData: file read error";
300 if (buffer.size() == 0)
301 KALDI_ERR <<
"WaveData: empty file (no data)";
305 <<
"but read only " << buffer.size() <<
" bytes. " 306 <<
"Truncated file?";
309 uint16 *data_ptr =
reinterpret_cast<uint16*
>(&buffer[0]);
314 for (uint32
i = 0;
i <
data_.NumCols(); ++
i) {
315 for (uint32
j = 0;
j <
data_.NumRows(); ++
j) {
316 int16 k = *data_ptr++;
334 if (
data_.NumRows() == 0)
335 KALDI_ERR <<
"Error: attempting to write empty WAVE file";
338 num_samp =
data_.NumCols(),
341 int32 subchunk2size = (num_chan * num_samp * bytes_per_samp);
342 int32 chunk_size = 36 + subchunk2size;
351 WriteUint32(os, static_cast<int32>(samp_freq_) * num_chan * bytes_per_samp);
361 for (
int32 i = 0;
i < num_samp;
i++) {
362 for (
int32 j = 0;
j < num_chan;
j++) {
363 int32 elem =
static_cast<int32>(trunc(data_ptr[
j * stride +
i]));
364 int16 elem_16 =
static_cast<int16
>(elem);
365 if (elem < std::numeric_limits<int16>::min()) {
366 elem_16 = std::numeric_limits<int16>::min();
368 }
else if (elem > std::numeric_limits<int16>::max()) {
369 elem_16 = std::numeric_limits<int16>::max();
372 #ifdef __BIG_ENDIAN__ 375 os.write(reinterpret_cast<char*>(&elem_16), 2);
379 KALDI_ERR <<
"Error writing wave data to stream.";
381 KALDI_WARN <<
"WARNING: clipped " << num_clipped
382 <<
" samples out of total " << num_chan * num_samp
383 <<
". Reduce volume?";
int32 NumChannels() const
Number of channels, 1 to 16.
void Read(std::istream &is)
Read() will throw on error.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
size_t DataBytes() const
Wave data bytes. Invalid if IsStreamed() is true.
bool ReverseBytes() const
Is data file byte order different from machine byte order?
bool IsStreamed() const
Is stream size unknown? Duration and SampleCount not valid if true.
void Read(std::istream &is)
'is' should be opened in binary mode.
This class reads and hold wave file header information.
#define KALDI_ASSERT(cond)
BaseFloat SampFreq() const
Sample frequency, Hz.
void Write(std::ostream &os) const
Write() will throw on error. os should be opened in binary mode.
static void WriteUint16(std::ostream &os, int16 i)
static void WriteUint32(std::ostream &os, int32 i)
size_t BlockAlign() const
Bytes per sample.