copy-feats-to-htk.cc File Reference
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "matrix/kaldi-matrix.h"
#include "matrix/matrix-common.h"
#include "matrix/matrix-lib.h"
#include <sys/stat.h>
#include <unistd.h>
#include <stdio.h>
Include dependency graph for copy-feats-to-htk.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 38 of file copy-feats-to-htk.cc.

References SequentialTableReader< Holder >::Done(), ParseOptions::GetArg(), KALDI_ERR, KALDI_LOG, SequentialTableReader< Holder >::Key(), kaldi::kUndefined, HtkHeader::mNSamples, HtkHeader::mSampleKind, HtkHeader::mSamplePeriod, HtkHeader::mSampleSize, SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), MatrixBase< Real >::NumCols(), MatrixBase< Real >::NumRows(), ParseOptions::PrintUsage(), MatrixBase< Real >::Range(), ParseOptions::Read(), ParseOptions::Register(), SequentialTableReader< Holder >::Value(), and kaldi::WriteHtk().

38  {
39  try {
40  using namespace kaldi;
41 
42 
43  const char *usage =
44  "Save features as HTK files:\n"
45  "Each utterance will be stored as a unique HTK file in a specified directory.\n"
46  "The HTK filename will correspond to the utterance-id (key) in the input table, with the specified extension.\n"
47  "Usage: copy-feats-to-htk [options] in-rspecifier\n"
48  "Example: copy-feats-to-htk --output-dir=/tmp/HTK-features --output-ext=fea scp:feats.scp\n";
49 
50  ParseOptions po(usage);
51  std::string dir_out = "./";
52  std::string ext_out = "fea";
53  int32 sample_period = 100000; // 100ns unit : 10ms = 100000,
54  int32 sample_kind = 9; // USER,
55  /*
56  0 WAVEFORM sampled waveform
57  1 LPC linear prediction filter coefficients
58  2 LPREFC linear prediction reflection coefficients
59  3 LPCEPSTRA LPC cepstral coefficients
60  4 LPDELCEP LPC cepstra plus delta coefficients
61  5 IREFC LPC reflection coef in 16 bit integer format
62  6 MFCC mel-frequency cepstral coefficients
63  7 FBANK log mel-filter bank channel outputs
64  8 MELSPEC linear mel-filter bank channel outputs
65  9 USER user defined sample kind
66  10 DISCRETE vector quantised data
67  11 PLP PLP cepstral coefficients
68  */
69 
70  po.Register("output-ext", &ext_out, "Output ext of HTK files");
71  po.Register("output-dir", &dir_out, "Output directory");
72  po.Register("sample-period", &sample_period, "HTK sampPeriod - sample period in 100ns units");
73  po.Register("sample-kind", &sample_kind, "HTK parmKind - a code indicating the sample kind (e.g., 6=MFCC, 7=FBANK, 9=USER, 11=PLP)");
74 
75 
76 
77  po.Read(argc, argv);
78 
79  //std::cout << "Dir: " << dir_out << " ext: " << ext_out << "\n";
80 
81  if (po.NumArgs() != 1) {
82  po.PrintUsage();
83  exit(1);
84  }
85 
86  std::string rspecifier = po.GetArg(1);
87 
88  // check or create output dir:
89  const char * c = dir_out.c_str();
90  if ( access( c, 0 ) != 0 ){
91 #if defined(_MSC_VER)
92  if (_mkdir(c) != 0)
93 #else
94  if (mkdir(c, S_IRWXU|S_IRGRP|S_IXGRP) != 0)
95 #endif
96  KALDI_ERR << "Could not create output directory: " << dir_out;
97  /*
98  else if (chdir(c) != 0)
99  KALDI_ERR << "first chdir() error: " << dir_out;
100  else if (chdir("..") != 0)
101  KALDI_ERR << "second chdir() error: " << dir_out;
102  else if (rmdir(c) != 0)
103  KALDI_ERR << "rmdir() error: " << dir_out;
104  */
105  }
106 
107 
108  // HTK parameters
109  HtkHeader hdr;
110  hdr.mSamplePeriod = sample_period;
111  hdr.mSampleKind = sample_kind;
112 
113 
114  // write to the HTK files
115  int32 num_frames, dim, num_done=0;
116  SequentialBaseFloatMatrixReader feats_reader(rspecifier);
117  for (; !feats_reader.Done(); feats_reader.Next()) {
118  std::string utt = feats_reader.Key();
119  const Matrix<BaseFloat> &feats = feats_reader.Value();
120  num_frames = feats.NumRows(), dim = feats.NumCols();
121  //std::cout << "Utt: " << utt<< " Frames: " << num_frames << " Dim: " << dim << "\n";
122 
123  hdr.mNSamples = num_frames;
124  hdr.mSampleSize = sizeof(float)*dim;
125 
126  Matrix<BaseFloat> output(num_frames, dim, kUndefined);
127  std::stringstream ss;
128  ss << dir_out << "/" << utt << "." << ext_out;
129  output.Range(0, num_frames, 0, dim).CopyFromMat(feats.Range(0, num_frames, 0, dim));
130  std::ofstream os(ss.str().c_str(), std::ios::out|std::ios::binary);
131  WriteHtk(os, output, hdr);
132  num_done++;
133  }
134  KALDI_LOG << num_done << " HTK feature files generated in the direcory: " << dir_out;
135  return (num_done != 0 ? 0 : 1);
136 
137  } catch(const std::exception &e) {
138  std::cerr << e.what();
139  return -1;
140  }
141 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int16 mSampleSize
Sample size.
Definition: kaldi-matrix.h:961
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
bool WriteHtk(std::ostream &os, const MatrixBase< Real > &M, HtkHeader htk_hdr)
kaldi::int32 int32
int32 mSamplePeriod
Sample period.
Definition: kaldi-matrix.h:959
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_ERR
Definition: kaldi-error.h:147
uint16 mSampleKind
Sample kind.
Definition: kaldi-matrix.h:963
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
Definition: kaldi-matrix.h:202
int32 mNSamples
Number of samples.
Definition: kaldi-matrix.h:957
#define KALDI_LOG
Definition: kaldi-error.h:153
A structure containing the HTK header.
Definition: kaldi-matrix.h:955