wave-reader.cc
Go to the documentation of this file.
1 // feat/wave-reader.cc
2 
3 // Copyright 2009-2011 Karel Vesely; Petr Motlicek
4 // 2013 Florent Masson
5 // 2013 Johns Hopkins University (author: Daniel Povey)
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 #include <algorithm>
23 #include <cstdio>
24 #include <limits>
25 #include <sstream>
26 #include <vector>
27 
28 #include "feat/wave-reader.h"
29 #include "base/kaldi-error.h"
30 #include "base/kaldi-utils.h"
31 
32 namespace kaldi {
33 
34 // A utility class for reading wave header.
36  std::istream &is;
37  bool swap;
38  char tag[5];
39 
40  WaveHeaderReadGofer(std::istream &is) : is(is), swap(false) {
41  memset(tag, '\0', sizeof tag);
42  }
43 
44  void Expect4ByteTag(const char *expected) {
45  is.read(tag, 4);
46  if (is.fail())
47  KALDI_ERR << "WaveData: expected " << expected
48  << ", failed to read anything";
49  if (strcmp(tag, expected))
50  KALDI_ERR << "WaveData: expected " << expected << ", got " << tag;
51  }
52 
53  void Read4ByteTag() {
54  is.read(tag, 4);
55  if (is.fail())
56  KALDI_ERR << "WaveData: expected 4-byte chunk-name, got read error";
57  }
58 
59  uint32 ReadUint32() {
60  union {
61  char result[4];
62  uint32 ans;
63  } u;
64  is.read(u.result, 4);
65  if (swap)
66  KALDI_SWAP4(u.result);
67  if (is.fail())
68  KALDI_ERR << "WaveData: unexpected end of file or read error";
69  return u.ans;
70  }
71 
72  uint16 ReadUint16() {
73  union {
74  char result[2];
75  int16 ans;
76  } u;
77  is.read(u.result, 2);
78  if (swap)
79  KALDI_SWAP2(u.result);
80  if (is.fail())
81  KALDI_ERR << "WaveData: unexpected end of file or read error";
82  return u.ans;
83  }
84 };
85 
86 static void WriteUint32(std::ostream &os, int32 i) {
87  union {
88  char buf[4];
89  int i;
90  } u;
91  u.i = i;
92 #ifdef __BIG_ENDIAN__
93  KALDI_SWAP4(u.buf);
94 #endif
95  os.write(u.buf, 4);
96  if (os.fail())
97  KALDI_ERR << "WaveData: error writing to stream.";
98 }
99 
100 static void WriteUint16(std::ostream &os, int16 i) {
101  union {
102  char buf[2];
103  int16 i;
104  } u;
105  u.i = i;
106 #ifdef __BIG_ENDIAN__
107  KALDI_SWAP2(u.buf);
108 #endif
109  os.write(u.buf, 2);
110  if (os.fail())
111  KALDI_ERR << "WaveData: error writing to stream.";
112 }
113 
114 void WaveInfo::Read(std::istream &is) {
115  WaveHeaderReadGofer reader(is);
116  reader.Read4ByteTag();
117  if (strcmp(reader.tag, "RIFF") == 0)
118  reverse_bytes_ = false;
119  else if (strcmp(reader.tag, "RIFX") == 0)
120  reverse_bytes_ = true;
121  else
122  KALDI_ERR << "WaveData: expected RIFF or RIFX, got " << reader.tag;
123 
124 #ifdef __BIG_ENDIAN__
125  reverse_bytes_ = !reverse_bytes_;
126 #endif
127  reader.swap = reverse_bytes_;
128 
129  uint32 riff_chunk_size = reader.ReadUint32();
130  reader.Expect4ByteTag("WAVE");
131 
132  uint32 riff_chunk_read = 0;
133  riff_chunk_read += 4; // WAVE included in riff_chunk_size.
134 
135  // Possibly skip any RIFF tags between 'WAVE' and 'fmt '.
136  // Apple devices produce a filler tag 'JUNK' for memory alignment.
137  reader.Read4ByteTag();
138  riff_chunk_read += 4;
139  while (strcmp(reader.tag,"fmt ") != 0) {
140  uint32 filler_size = reader.ReadUint32();
141  riff_chunk_read += 4;
142  for (uint32 i = 0; i < filler_size; i++) {
143  is.get(); // read 1 byte,
144  }
145  riff_chunk_read += filler_size;
146  // get next RIFF tag,
147  reader.Read4ByteTag();
148  riff_chunk_read += 4;
149  }
150 
151  KALDI_ASSERT(strcmp(reader.tag,"fmt ") == 0);
152  uint32 subchunk1_size = reader.ReadUint32();
153  uint16 audio_format = reader.ReadUint16();
154  num_channels_ = reader.ReadUint16();
155  uint32 sample_rate = reader.ReadUint32(),
156  byte_rate = reader.ReadUint32(),
157  block_align = reader.ReadUint16(),
158  bits_per_sample = reader.ReadUint16();
159  samp_freq_ = static_cast<BaseFloat>(sample_rate);
160 
161  uint32 fmt_chunk_read = 16;
162  if (audio_format == 1) {
163  if (subchunk1_size < 16) {
164  KALDI_ERR << "WaveData: expect PCM format data to have fmt chunk "
165  << "of at least size 16.";
166  }
167  } else if (audio_format == 0xFFFE) { // WAVE_FORMAT_EXTENSIBLE
168  uint16 extra_size = reader.ReadUint16();
169  if (subchunk1_size < 40 || extra_size < 22) {
170  KALDI_ERR << "WaveData: malformed WAVE_FORMAT_EXTENSIBLE format data.";
171  }
172  reader.ReadUint16(); // Unused for PCM.
173  reader.ReadUint32(); // Channel map: we do not care.
174  uint32 guid1 = reader.ReadUint32(),
175  guid2 = reader.ReadUint32(),
176  guid3 = reader.ReadUint32(),
177  guid4 = reader.ReadUint32();
178  fmt_chunk_read = 40;
179 
180  // Support only KSDATAFORMAT_SUBTYPE_PCM for now. Interesting formats:
181  // ("00000001-0000-0010-8000-00aa00389b71", KSDATAFORMAT_SUBTYPE_PCM)
182  // ("00000003-0000-0010-8000-00aa00389b71", KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)
183  // ("00000006-0000-0010-8000-00aa00389b71", KSDATAFORMAT_SUBTYPE_ALAW)
184  // ("00000007-0000-0010-8000-00aa00389b71", KSDATAFORMAT_SUBTYPE_MULAW)
185  if (guid1 != 0x00000001 || guid2 != 0x00100000 ||
186  guid3 != 0xAA000080 || guid4 != 0x719B3800) {
187  KALDI_ERR << "WaveData: unsupported WAVE_FORMAT_EXTENSIBLE format.";
188  }
189  } else {
190  KALDI_ERR << "WaveData: can read only PCM data, format id in file is: "
191  << audio_format;
192  }
193 
194  for (uint32 i = fmt_chunk_read; i < subchunk1_size; ++i)
195  is.get(); // use up extra data.
196 
197  if (num_channels_ == 0)
198  KALDI_ERR << "WaveData: no channels present";
199  if (bits_per_sample != 16)
200  KALDI_ERR << "WaveData: unsupported bits_per_sample = " << bits_per_sample;
201  if (byte_rate != sample_rate * bits_per_sample/8 * num_channels_)
202  KALDI_ERR << "Unexpected byte rate " << byte_rate << " vs. "
203  << sample_rate << " * " << (bits_per_sample/8)
204  << " * " << num_channels_;
205  if (block_align != num_channels_ * bits_per_sample/8)
206  KALDI_ERR << "Unexpected block_align: " << block_align << " vs. "
207  << num_channels_ << " * " << (bits_per_sample/8);
208 
209  riff_chunk_read += 4 + subchunk1_size;
210  // size of what we just read, 4 for subchunk1_size + subchunk1_size itself.
211 
212  // We support an optional "fact" chunk (which is useless but which
213  // we encountered), and then a single "data" chunk.
214 
215  reader.Read4ByteTag();
216  riff_chunk_read += 4;
217 
218  // Skip any subchunks between "fmt" and "data". Usually there will
219  // be a single "fact" subchunk, but on Windows there can also be a
220  // "list" subchunk.
221  while (strcmp(reader.tag, "data") != 0) {
222  // We will just ignore the data in these chunks.
223  uint32 chunk_sz = reader.ReadUint32();
224  if (chunk_sz != 4 && strcmp(reader.tag, "fact") == 0)
225  KALDI_WARN << "Expected fact chunk to be 4 bytes long.";
226  for (uint32 i = 0; i < chunk_sz; i++)
227  is.get();
228  riff_chunk_read += 4 + chunk_sz; // for chunk_sz (4) + chunk contents (chunk-sz)
229 
230  // Now read the next chunk name.
231  reader.Read4ByteTag();
232  riff_chunk_read += 4;
233  }
234 
235  KALDI_ASSERT(strcmp(reader.tag, "data") == 0);
236  uint32 data_chunk_size = reader.ReadUint32();
237  riff_chunk_read += 4;
238 
239  // Figure out if the file is going to be read to the end. Values as
240  // observed in the wild:
241  bool is_stream_mode =
242  riff_chunk_size == 0
243  || riff_chunk_size == 0xFFFFFFFF
244  || data_chunk_size == 0
245  || data_chunk_size == 0xFFFFFFFF
246  || data_chunk_size == 0x7FFFF000; // This value is used by SoX.
247 
248  if (is_stream_mode)
249  KALDI_VLOG(1) << "Read in RIFF chunk size: " << riff_chunk_size
250  << ", data chunk size: " << data_chunk_size
251  << ". Assume 'stream mode' (reading data to EOF).";
252 
253  if (!is_stream_mode
254  && std::abs(static_cast<int64>(riff_chunk_read) +
255  static_cast<int64>(data_chunk_size) -
256  static_cast<int64>(riff_chunk_size)) > 1) {
257  // We allow the size to be off by one without warning, because there is a
258  // weirdness in the format of RIFF files that means that the input may
259  // sometimes be padded with 1 unused byte to make the total size even.
260  KALDI_WARN << "Expected " << riff_chunk_size << " bytes in RIFF chunk, but "
261  << "after first data block there will be " << riff_chunk_read
262  << " + " << data_chunk_size << " bytes "
263  << "(we do not support reading multiple data chunks).";
264  }
265 
266  if (is_stream_mode)
267  samp_count_ = -1;
268  else
269  samp_count_ = data_chunk_size / block_align;
270 }
271 
272 void WaveData::Read(std::istream &is) {
273  const uint32 kBlockSize = 1024 * 1024;
274 
275  WaveInfo header;
276  header.Read(is);
277 
278  data_.Resize(0, 0); // clear the data.
279  samp_freq_ = header.SampFreq();
280 
281  std::vector<char> buffer;
282  uint32 bytes_to_go = header.IsStreamed() ? kBlockSize : header.DataBytes();
283 
284  // Once in a while header.DataBytes() will report an insane value;
285  // read the file to the end
286  while (is && bytes_to_go > 0) {
287  uint32 block_bytes = std::min(bytes_to_go, kBlockSize);
288  uint32 offset = buffer.size();
289  buffer.resize(offset + block_bytes);
290  is.read(&buffer[offset], block_bytes);
291  uint32 bytes_read = is.gcount();
292  buffer.resize(offset + bytes_read);
293  if (!header.IsStreamed())
294  bytes_to_go -= bytes_read;
295  }
296 
297  if (is.bad())
298  KALDI_ERR << "WaveData: file read error";
299 
300  if (buffer.size() == 0)
301  KALDI_ERR << "WaveData: empty file (no data)";
302 
303  if (!header.IsStreamed() && buffer.size() < header.DataBytes()) {
304  KALDI_WARN << "Expected " << header.DataBytes() << " bytes of wave data, "
305  << "but read only " << buffer.size() << " bytes. "
306  << "Truncated file?";
307  }
308 
309  uint16 *data_ptr = reinterpret_cast<uint16*>(&buffer[0]);
310 
311  // The matrix is arranged row per channel, column per sample.
312  data_.Resize(header.NumChannels(),
313  buffer.size() / header.BlockAlign());
314  for (uint32 i = 0; i < data_.NumCols(); ++i) {
315  for (uint32 j = 0; j < data_.NumRows(); ++j) {
316  int16 k = *data_ptr++;
317  if (header.ReverseBytes())
318  KALDI_SWAP2(k);
319  data_(j, i) = k;
320  }
321  }
322 }
323 
324 
325 // Write 16-bit PCM.
326 
327 // note: the WAVE chunk contains 2 subchunks.
328 //
329 // subchunk2size = data.NumRows() * data.NumCols() * 2.
330 
331 
332 void WaveData::Write(std::ostream &os) const {
333  os << "RIFF";
334  if (data_.NumRows() == 0)
335  KALDI_ERR << "Error: attempting to write empty WAVE file";
336 
337  int32 num_chan = data_.NumRows(),
338  num_samp = data_.NumCols(),
339  bytes_per_samp = 2;
340 
341  int32 subchunk2size = (num_chan * num_samp * bytes_per_samp);
342  int32 chunk_size = 36 + subchunk2size;
343  WriteUint32(os, chunk_size);
344  os << "WAVE";
345  os << "fmt ";
346  WriteUint32(os, 16);
347  WriteUint16(os, 1);
348  WriteUint16(os, num_chan);
349  KALDI_ASSERT(samp_freq_ > 0);
350  WriteUint32(os, static_cast<int32>(samp_freq_));
351  WriteUint32(os, static_cast<int32>(samp_freq_) * num_chan * bytes_per_samp);
352  WriteUint16(os, num_chan * bytes_per_samp);
353  WriteUint16(os, 8 * bytes_per_samp);
354  os << "data";
355  WriteUint32(os, subchunk2size);
356 
357  const BaseFloat *data_ptr = data_.Data();
358  int32 stride = data_.Stride();
359 
360  int num_clipped = 0;
361  for (int32 i = 0; i < num_samp; i++) {
362  for (int32 j = 0; j < num_chan; j++) {
363  int32 elem = static_cast<int32>(trunc(data_ptr[j * stride + i]));
364  int16 elem_16 = static_cast<int16>(elem);
365  if (elem < std::numeric_limits<int16>::min()) {
366  elem_16 = std::numeric_limits<int16>::min();
367  ++num_clipped;
368  } else if (elem > std::numeric_limits<int16>::max()) {
369  elem_16 = std::numeric_limits<int16>::max();
370  ++num_clipped;
371  }
372 #ifdef __BIG_ENDIAN__
373  KALDI_SWAP2(elem_16);
374 #endif
375  os.write(reinterpret_cast<char*>(&elem_16), 2);
376  }
377  }
378  if (os.fail())
379  KALDI_ERR << "Error writing wave data to stream.";
380  if (num_clipped > 0)
381  KALDI_WARN << "WARNING: clipped " << num_clipped
382  << " samples out of total " << num_chan * num_samp
383  << ". Reduce volume?";
384 }
385 
386 
387 } // end namespace kaldi
int32 NumChannels() const
Number of channels, 1 to 16.
Definition: wave-reader.h:83
void Read(std::istream &is)
Read() will throw on error.
Definition: wave-reader.cc:272
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
#define KALDI_SWAP2(a)
Definition: kaldi-utils.h:114
size_t DataBytes() const
Wave data bytes. Invalid if IsStreamed() is true.
Definition: wave-reader.h:89
bool ReverseBytes() const
Is data file byte order different from machine byte order?
Definition: wave-reader.h:92
kaldi::int32 int32
WaveHeaderReadGofer(std::istream &is)
Definition: wave-reader.cc:40
uint64 data_
bool IsStreamed() const
Is stream size unknown? Duration and SampleCount not valid if true.
Definition: wave-reader.h:71
void Read(std::istream &is)
&#39;is&#39; should be opened in binary mode.
Definition: wave-reader.cc:114
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
This class reads and hold wave file header information.
Definition: wave-reader.h:65
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
BaseFloat SampFreq() const
Sample frequency, Hz.
Definition: wave-reader.h:74
void Write(std::ostream &os) const
Write() will throw on error. os should be opened in binary mode.
Definition: wave-reader.cc:332
void Expect4ByteTag(const char *expected)
Definition: wave-reader.cc:44
static void WriteUint16(std::ostream &os, int16 i)
Definition: wave-reader.cc:100
static void WriteUint32(std::ostream &os, int32 i)
Definition: wave-reader.cc:86
#define KALDI_SWAP4(a)
Definition: kaldi-utils.h:107
size_t BlockAlign() const
Bytes per sample.
Definition: wave-reader.h:86