copy-feats-to-htk.cc
Go to the documentation of this file.
1 // featbin/copy-feats-to-htk.cc
2 
3 // Copyright 2013 Petr Motlicek
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "base/kaldi-common.h"
21 #include "util/common-utils.h"
22 #include "matrix/kaldi-matrix.h"
23 #include "matrix/matrix-common.h"
24 #include "matrix/matrix-lib.h"
25 
26 #include <sys/stat.h>
27 
28 #if defined(_MSC_VER)
29 #include <direct.h>
30 #else
31 #include <unistd.h>
32 #endif
33 
34 #include <stdio.h>
35 
36 
37 
38 int main(int argc, char *argv[]) {
39  try {
40  using namespace kaldi;
41 
42 
43  const char *usage =
44  "Save features as HTK files:\n"
45  "Each utterance will be stored as a unique HTK file in a specified directory.\n"
46  "The HTK filename will correspond to the utterance-id (key) in the input table, with the specified extension.\n"
47  "Usage: copy-feats-to-htk [options] in-rspecifier\n"
48  "Example: copy-feats-to-htk --output-dir=/tmp/HTK-features --output-ext=fea scp:feats.scp\n";
49 
50  ParseOptions po(usage);
51  std::string dir_out = "./";
52  std::string ext_out = "fea";
53  int32 sample_period = 100000; // 100ns unit : 10ms = 100000,
54  int32 sample_kind = 9; // USER,
55  /*
56  0 WAVEFORM sampled waveform
57  1 LPC linear prediction filter coefficients
58  2 LPREFC linear prediction reflection coefficients
59  3 LPCEPSTRA LPC cepstral coefficients
60  4 LPDELCEP LPC cepstra plus delta coefficients
61  5 IREFC LPC reflection coef in 16 bit integer format
62  6 MFCC mel-frequency cepstral coefficients
63  7 FBANK log mel-filter bank channel outputs
64  8 MELSPEC linear mel-filter bank channel outputs
65  9 USER user defined sample kind
66  10 DISCRETE vector quantised data
67  11 PLP PLP cepstral coefficients
68  */
69 
70  po.Register("output-ext", &ext_out, "Output ext of HTK files");
71  po.Register("output-dir", &dir_out, "Output directory");
72  po.Register("sample-period", &sample_period, "HTK sampPeriod - sample period in 100ns units");
73  po.Register("sample-kind", &sample_kind, "HTK parmKind - a code indicating the sample kind (e.g., 6=MFCC, 7=FBANK, 9=USER, 11=PLP)");
74 
75 
76 
77  po.Read(argc, argv);
78 
79  //std::cout << "Dir: " << dir_out << " ext: " << ext_out << "\n";
80 
81  if (po.NumArgs() != 1) {
82  po.PrintUsage();
83  exit(1);
84  }
85 
86  std::string rspecifier = po.GetArg(1);
87 
88  // check or create output dir:
89  const char * c = dir_out.c_str();
90  if ( access( c, 0 ) != 0 ){
91 #if defined(_MSC_VER)
92  if (_mkdir(c) != 0)
93 #else
94  if (mkdir(c, S_IRWXU|S_IRGRP|S_IXGRP) != 0)
95 #endif
96  KALDI_ERR << "Could not create output directory: " << dir_out;
97  /*
98  else if (chdir(c) != 0)
99  KALDI_ERR << "first chdir() error: " << dir_out;
100  else if (chdir("..") != 0)
101  KALDI_ERR << "second chdir() error: " << dir_out;
102  else if (rmdir(c) != 0)
103  KALDI_ERR << "rmdir() error: " << dir_out;
104  */
105  }
106 
107 
108  // HTK parameters
109  HtkHeader hdr;
110  hdr.mSamplePeriod = sample_period;
111  hdr.mSampleKind = sample_kind;
112 
113 
114  // write to the HTK files
115  int32 num_frames, dim, num_done=0;
116  SequentialBaseFloatMatrixReader feats_reader(rspecifier);
117  for (; !feats_reader.Done(); feats_reader.Next()) {
118  std::string utt = feats_reader.Key();
119  const Matrix<BaseFloat> &feats = feats_reader.Value();
120  num_frames = feats.NumRows(), dim = feats.NumCols();
121  //std::cout << "Utt: " << utt<< " Frames: " << num_frames << " Dim: " << dim << "\n";
122 
123  hdr.mNSamples = num_frames;
124  hdr.mSampleSize = sizeof(float)*dim;
125 
126  Matrix<BaseFloat> output(num_frames, dim, kUndefined);
127  std::stringstream ss;
128  ss << dir_out << "/" << utt << "." << ext_out;
129  output.Range(0, num_frames, 0, dim).CopyFromMat(feats.Range(0, num_frames, 0, dim));
130  std::ofstream os(ss.str().c_str(), std::ios::out|std::ios::binary);
131  WriteHtk(os, output, hdr);
132  num_done++;
133  }
134  KALDI_LOG << num_done << " HTK feature files generated in the direcory: " << dir_out;
135  return (num_done != 0 ? 0 : 1);
136 
137  } catch(const std::exception &e) {
138  std::cerr << e.what();
139  return -1;
140  }
141 }
142 
143 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int16 mSampleSize
Sample size.
Definition: kaldi-matrix.h:961
int main(int argc, char *argv[])
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
bool WriteHtk(std::ostream &os, const MatrixBase< Real > &M, HtkHeader htk_hdr)
kaldi::int32 int32
void Register(const std::string &name, bool *ptr, const std::string &doc)
int32 mSamplePeriod
Sample period.
Definition: kaldi-matrix.h:959
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
uint16 mSampleKind
Sample kind.
Definition: kaldi-matrix.h:963
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
int32 mNSamples
Number of samples.
Definition: kaldi-matrix.h:957
#define KALDI_LOG
Definition: kaldi-error.h:153
A structure containing the HTK header.
Definition: kaldi-matrix.h:955