wave-reader.h
Go to the documentation of this file.
1 // feat/wave-reader.h
2 
3 // Copyright 2009-2011 Karel Vesely; Microsoft Corporation
4 // 2013 Florent Masson
5 // 2013 Johns Hopkins University (author: Daniel Povey)
6 
7 // See ../../COPYING for clarification regarding multiple authors
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 //
13 // http://www.apache.org/licenses/LICENSE-2.0
14 //
15 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
17 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
18 // MERCHANTABLITY OR NON-INFRINGEMENT.
19 // See the Apache 2 License for the specific language governing permissions and
20 // limitations under the License.
21 
22 
23 /*
24 // THE WAVE FORMAT IS SPECIFIED IN:
25 // https://en.wikipedia.org/wiki/WAV
26 //
27 //
28 //
29 // RIFF
30 // |
31 // WAVE
32 // | \ \ \
33 // fmt_ data ... data
34 //
35 //
36 // Riff is a general container, which usually contains one WAVE chunk
37 // each WAVE chunk has header sub-chunk 'fmt_'
38 // and one or more data sub-chunks 'data'
39 //
40 // [Note from Dan: to say that the wave format was ever "specified" anywhere is
41 // not quite right. The guy who invented the wave format attempted to create
42 // a formal specification but it did not completely make sense. And there
43 // doesn't seem to be a consensus on what makes a valid wave file,
44 // particularly where the accuracy of header information is concerned.]
45 */
46 
47 
48 #ifndef KALDI_FEAT_WAVE_READER_H_
49 #define KALDI_FEAT_WAVE_READER_H_
50 
51 #include <cstring>
52 
53 #include "base/kaldi-types.h"
54 #include "matrix/kaldi-vector.h"
55 #include "matrix/kaldi-matrix.h"
56 
57 
58 namespace kaldi {
59 
62 const BaseFloat kWaveSampleMax = 32768.0;
63 
65 class WaveInfo {
66  public:
69 
71  bool IsStreamed() const { return samp_count_ < 0; }
72 
74  BaseFloat SampFreq() const { return samp_freq_; }
75 
77  uint32 SampleCount() const { return samp_count_; }
78 
80  BaseFloat Duration() const { return samp_count_ / samp_freq_; }
81 
83  int32 NumChannels() const { return num_channels_; }
84 
86  size_t BlockAlign() const { return 2 * num_channels_; }
87 
89  size_t DataBytes() const { return samp_count_ * BlockAlign(); }
90 
92  bool ReverseBytes() const { return reverse_bytes_; }
93 
96  void Read(std::istream &is);
97 
98  private:
100  int32 samp_count_; // 0 if empty, -1 if undefined length.
102  bool reverse_bytes_; // File endianness differs from host.
103 };
104 
106 class WaveData {
107  public:
108  WaveData(BaseFloat samp_freq, const MatrixBase<BaseFloat> &data)
109  : data_(data), samp_freq_(samp_freq) {}
110 
111  WaveData() : samp_freq_(0.0) {}
112 
116  void Read(std::istream &is);
117 
119  void Write(std::ostream &os) const;
120 
121  // This function returns the wave data-- it's in a matrix
122  // because there may be multiple channels. In the normal case
123  // there's just one channel so Data() will have one row.
124  const Matrix<BaseFloat> &Data() const { return data_; }
125 
126  BaseFloat SampFreq() const { return samp_freq_; }
127 
128  // Returns the duration in seconds
129  BaseFloat Duration() const { return data_.NumCols() / samp_freq_; }
130 
131  void CopyFrom(const WaveData &other) {
132  samp_freq_ = other.samp_freq_;
133  data_.CopyFromMat(other.data_);
134  }
135 
136  void Clear() {
137  data_.Resize(0, 0);
138  samp_freq_ = 0.0;
139  }
140 
141  void Swap(WaveData *other) {
142  data_.Swap(&(other->data_));
144  }
145 
146  private:
147  static const uint32 kBlockSize = 1024 * 1024; // Use 1M bytes.
150 };
151 
152 
153 // Holder class for .wav files that enables us to read (but not write) .wav
154 // files. c.f. util/kaldi-holder.h we don't use the KaldiObjectHolder template
155 // because we don't want to check for the \0B binary header. We could have faked
156 // it by pretending to read in the wave data in text mode after failing to find
157 // the \0B header, but that would have been a little ugly.
158 class WaveHolder {
159  public:
160  typedef WaveData T;
161 
162  static bool Write(std::ostream &os, bool binary, const T &t) {
163  // We don't write the binary-mode header here [always binary].
164  if (!binary)
165  KALDI_ERR << "Wave data can only be written in binary mode.";
166  try {
167  t.Write(os); // throws exception on failure.
168  return true;
169  } catch (const std::exception &e) {
170  KALDI_WARN << "Exception caught in WaveHolder object (writing). "
171  << e.what();
172  return false; // write failure.
173  }
174  }
175  void Copy(const T &t) { t_.CopyFrom(t); }
176 
177  static bool IsReadInBinary() { return true; }
178 
179  void Clear() { t_.Clear(); }
180 
181  T &Value() { return t_; }
182 
183  WaveHolder &operator = (const WaveHolder &other) {
184  t_.CopyFrom(other.t_);
185  return *this;
186  }
187  WaveHolder(const WaveHolder &other): t_(other.t_) {}
188 
190 
191  bool Read(std::istream &is) {
192  // We don't look for the binary-mode header here [always binary]
193  try {
194  t_.Read(is); // Throws exception on failure.
195  return true;
196  } catch (const std::exception &e) {
197  KALDI_WARN << "Exception caught in WaveHolder::Read(). " << e.what();
198  return false;
199  }
200  }
201 
202  void Swap(WaveHolder *other) {
203  t_.Swap(&(other->t_));
204  }
205 
206  bool ExtractRange(const WaveHolder &other, const std::string &range) {
207  KALDI_ERR << "ExtractRange is not defined for this type of holder.";
208  return false;
209  }
210 
211  private:
212  T t_;
213 };
214 
215 // This is like WaveHolder but when you just want the metadata-
216 // it leaves the actual data undefined, it doesn't read it.
218  public:
219  typedef WaveInfo T;
220 
221  void Clear() { info_ = WaveInfo(); }
222  void Swap(WaveInfoHolder *other) { std::swap(info_, other->info_); }
223  T &Value() { return info_; }
224  static bool IsReadInBinary() { return true; }
225 
226  bool Read(std::istream &is) {
227  try {
228  info_.Read(is); // Throws exception on failure.
229  return true;
230  } catch (const std::exception &e) {
231  KALDI_WARN << "Exception caught in WaveInfoHolder::Read(). " << e.what();
232  return false;
233  }
234  }
235 
236  bool ExtractRange(const WaveInfoHolder &other, const std::string &range) {
237  KALDI_ERR << "ExtractRange is not defined for this type of holder.";
238  return false;
239  }
240 
241  private:
243 };
244 
245 
246 } // namespace kaldi
247 
248 #endif // KALDI_FEAT_WAVE_READER_H_
int32 NumChannels() const
Number of channels, 1 to 16.
Definition: wave-reader.h:83
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
bool Read(std::istream &is)
Definition: wave-reader.h:191
BaseFloat samp_freq_
Definition: wave-reader.h:149
size_t DataBytes() const
Wave data bytes. Invalid if IsStreamed() is true.
Definition: wave-reader.h:89
uint32 SampleCount() const
Number of samples in stream. Invalid if IsStreamed() is true.
Definition: wave-reader.h:77
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
void Swap(WaveData *other)
Definition: wave-reader.h:141
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
bool ReverseBytes() const
Is data file byte order different from machine byte order?
Definition: wave-reader.h:92
kaldi::int32 int32
BaseFloat Duration() const
Approximate duration, seconds. Invalid if IsStreamed() is true.
Definition: wave-reader.h:80
BaseFloat SampFreq() const
Definition: wave-reader.h:126
const Matrix< BaseFloat > & Data() const
Definition: wave-reader.h:124
uint64 data_
bool ExtractRange(const WaveHolder &other, const std::string &range)
Definition: wave-reader.h:206
static bool IsReadInBinary()
Definition: wave-reader.h:224
bool IsStreamed() const
Is stream size unknown? Duration and SampleCount not valid if true.
Definition: wave-reader.h:71
float BaseFloat
Definition: kaldi-types.h:29
Matrix< BaseFloat > data_
Definition: wave-reader.h:148
void Read(std::istream &is)
&#39;is&#39; should be opened in binary mode.
Definition: wave-reader.cc:114
void Copy(const T &t)
Definition: wave-reader.h:175
void Swap(WaveHolder *other)
Definition: wave-reader.h:202
#define KALDI_ERR
Definition: kaldi-error.h:147
void Swap(WaveInfoHolder *other)
Definition: wave-reader.h:222
#define KALDI_WARN
Definition: kaldi-error.h:150
This class reads and hold wave file header information.
Definition: wave-reader.h:65
const BaseFloat kWaveSampleMax
For historical reasons, we scale waveforms to the range (2^15-1)*[-1, 1], not the usual default DSP r...
Definition: wave-reader.h:62
void CopyFrom(const WaveData &other)
Definition: wave-reader.h:131
WaveData(BaseFloat samp_freq, const MatrixBase< BaseFloat > &data)
Definition: wave-reader.h:108
bool Read(std::istream &is)
Definition: wave-reader.h:226
This class&#39;s purpose is to read in Wave files.
Definition: wave-reader.h:106
static bool IsReadInBinary()
Definition: wave-reader.h:177
BaseFloat Duration() const
Definition: wave-reader.h:129
BaseFloat SampFreq() const
Sample frequency, Hz.
Definition: wave-reader.h:74
void Write(std::ostream &os) const
Write() will throw on error. os should be opened in binary mode.
Definition: wave-reader.cc:332
BaseFloat samp_freq_
Definition: wave-reader.h:99
static bool Write(std::ostream &os, bool binary, const T &t)
Definition: wave-reader.h:162
WaveHolder(const WaveHolder &other)
Definition: wave-reader.h:187
bool ExtractRange(const WaveInfoHolder &other, const std::string &range)
Definition: wave-reader.h:236
size_t BlockAlign() const
Bytes per sample.
Definition: wave-reader.h:86