26 int main(
int argc,
char *argv[]) {
28 using namespace kaldi;
31 "Apply transform (e.g. LDA; HLDA; fMLLR/CMLLR; MLLT/STC)\n" 32 "Linear transform if transform-num-cols == feature-dim, affine if\n" 33 "transform-num-cols == feature-dim+1 (->append 1.0 to features)\n" 34 "Per-utterance by default, or per-speaker if utt2spk option provided\n" 35 "Global if transform-rxfilename provided.\n" 36 "Usage: transform-feats [options] (<transform-rspecifier>|<transform-rxfilename>) <feats-rspecifier> <feats-wspecifier>\n" 37 "See also: transform-vec, copy-feats, compose-transforms\n";
40 std::string utt2spk_rspecifier;
41 po.
Register(
"utt2spk", &utt2spk_rspecifier,
"rspecifier for utterance to speaker map");
50 std::string transform_rspecifier_or_rxfilename = po.
GetArg(1);
51 std::string feat_rspecifier = po.
GetArg(2);
52 std::string feat_wspecifier = po.
GetArg(3);
58 bool use_global_transform;
63 use_global_transform =
true;
66 use_global_transform =
false;
67 if (!transform_reader.
Open(transform_rspecifier_or_rxfilename,
68 utt2spk_rspecifier)) {
69 KALDI_ERR <<
"Problem opening transforms with rspecifier " 70 <<
'"' << transform_rspecifier_or_rxfilename <<
'"' 71 <<
" and utt2spk rspecifier " 72 <<
'"' << utt2spk_rspecifier <<
'"';
76 enum { Unknown, Logdet, PseudoLogdet, DimIncrease };
77 int32 logdet_type = Unknown;
78 double tot_t = 0.0, tot_logdet = 0.0;
79 int32 num_done = 0, num_error = 0;
82 for (;!feat_reader.
Done(); feat_reader.
Next()) {
83 std::string utt = feat_reader.
Key();
86 if (!use_global_transform && !transform_reader.
HasKey(utt)) {
87 KALDI_WARN <<
"No fMLLR transform available for utterance " 88 << utt <<
", producing no output for this utterance";
93 (use_global_transform ? global_transform : transform_reader.
Value(utt));
95 transform_cols = trans.
NumCols(),
96 feat_dim = feat.NumCols();
100 if (transform_cols == feat_dim) {
102 }
else if (transform_cols == feat_dim + 1) {
105 feat_out.AddMatMat(1.0, feat,
kNoTrans, linear_part,
kTrans, 0.0);
108 feat_out.AddVecToRows(1.0, offset);
110 KALDI_WARN <<
"Transform matrix for utterance " << utt <<
" has bad dimension " 111 << transform_rows <<
"x" << transform_cols <<
" versus feat dim " 113 if (transform_cols == feat_dim+2)
114 KALDI_WARN <<
"[perhaps the transform was created by compose-transforms, " 115 "and you forgot the --b-is-affine option?]";
121 if (logdet_type == Unknown) {
122 if (transform_rows == feat_dim) logdet_type = Logdet;
123 else if (transform_rows < feat_dim) logdet_type = PseudoLogdet;
124 else logdet_type = DimIncrease;
133 if (logdet_type != DimIncrease) {
141 if (use_global_transform) {
142 if (cached_logdet == -1)
143 cached_logdet = 0.5 * TT.LogDet(NULL);
144 logdet = cached_logdet;
146 logdet = 0.5 * TT.LogDet(NULL);
148 if (logdet != logdet || logdet-logdet != 0.0)
149 KALDI_WARN <<
"Matrix has bad logdet " << logdet;
151 tot_t += feat.NumRows();
152 tot_logdet += feat.NumRows() * logdet;
155 feat_writer.
Write(utt, feat_out);
157 if (logdet_type != Unknown && logdet_type != DimIncrease)
158 KALDI_LOG <<
"Overall average " << (logdet_type == PseudoLogdet ?
"[pseudo-]":
"")
159 <<
"logdet is " << (tot_logdet/tot_t) <<
" over " << tot_t
161 KALDI_LOG <<
"Applied transform to " << num_done <<
" utterances; " << num_error
164 return (num_done != 0 ? 0 : 1);
165 }
catch(
const std::exception &e) {
166 std::cerr << e.what();
void AddMat2(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const Real beta)
rank-N update: if (transM == kNoTrans) (*this) = beta*(*this) + alpha * M * M^T, or (if transM == kTr...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Packed symetric matrix class.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
This class is for when you are reading something in random access, but it may actually be stored per-...
A templated class for writing objects to an archive or script file; see The Table concept...
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename, RspecifierOptions *opts)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
bool Open(const std::string &table_rxfilename, const std::string &utt2spk_rxfilename)
Note: when calling Open, utt2spk_rxfilename may be empty.
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void CopyColFromMat(const MatrixBase< OtherReal > &M, MatrixIndexT col)
Extracts a column of the matrix M.
A class representing a vector.
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
const T & Value(const std::string &key)
Sub-matrix representation.