28 using namespace kaldi;
31 "Apply transform (e.g. LDA; HLDA; fMLLR/CMLLR; MLLT/STC)\n" 32 "Linear transform if transform-num-cols == feature-dim, affine if\n" 33 "transform-num-cols == feature-dim+1 (->append 1.0 to features)\n" 34 "Per-utterance by default, or per-speaker if utt2spk option provided\n" 35 "Global if transform-rxfilename provided.\n" 36 "Usage: transform-feats [options] (<transform-rspecifier>|<transform-rxfilename>) <feats-rspecifier> <feats-wspecifier>\n" 37 "See also: transform-vec, copy-feats, compose-transforms\n";
40 std::string utt2spk_rspecifier;
41 po.Register(
"utt2spk", &utt2spk_rspecifier,
"rspecifier for utterance to speaker map");
45 if (po.NumArgs() != 3) {
50 std::string transform_rspecifier_or_rxfilename = po.GetArg(1);
51 std::string feat_rspecifier = po.GetArg(2);
52 std::string feat_wspecifier = po.GetArg(3);
58 bool use_global_transform;
63 use_global_transform =
true;
66 use_global_transform =
false;
67 if (!transform_reader.
Open(transform_rspecifier_or_rxfilename,
68 utt2spk_rspecifier)) {
69 KALDI_ERR <<
"Problem opening transforms with rspecifier " 70 <<
'"' << transform_rspecifier_or_rxfilename <<
'"' 71 <<
" and utt2spk rspecifier " 72 <<
'"' << utt2spk_rspecifier <<
'"';
76 enum { Unknown, Logdet, PseudoLogdet, DimIncrease };
77 int32 logdet_type = Unknown;
78 double tot_t = 0.0, tot_logdet = 0.0;
79 int32 num_done = 0, num_error = 0;
82 for (;!feat_reader.Done(); feat_reader.Next()) {
83 std::string utt = feat_reader.Key();
86 if (!use_global_transform && !transform_reader.
HasKey(utt)) {
87 KALDI_WARN <<
"No fMLLR transform available for utterance " 88 << utt <<
", producing no output for this utterance";
93 (use_global_transform ? global_transform : transform_reader.
Value(utt));
95 transform_cols = trans.
NumCols(),
96 feat_dim = feat.NumCols();
100 if (transform_cols == feat_dim) {
102 }
else if (transform_cols == feat_dim + 1) {
105 feat_out.AddMatMat(1.0, feat,
kNoTrans, linear_part,
kTrans, 0.0);
107 offset.CopyColFromMat(trans, feat_dim);
108 feat_out.AddVecToRows(1.0, offset);
110 KALDI_WARN <<
"Transform matrix for utterance " << utt <<
" has bad dimension " 111 << transform_rows <<
"x" << transform_cols <<
" versus feat dim " 113 if (transform_cols == feat_dim+2)
114 KALDI_WARN <<
"[perhaps the transform was created by compose-transforms, " 115 "and you forgot the --b-is-affine option?]";
121 if (logdet_type == Unknown) {
122 if (transform_rows == feat_dim) logdet_type = Logdet;
123 else if (transform_rows < feat_dim) logdet_type = PseudoLogdet;
124 else logdet_type = DimIncrease;
133 if (logdet_type != DimIncrease) {
141 if (use_global_transform) {
142 if (cached_logdet == -1)
143 cached_logdet = 0.5 * TT.LogDet(NULL);
144 logdet = cached_logdet;
146 logdet = 0.5 * TT.LogDet(NULL);
148 if (logdet != logdet || logdet-logdet != 0.0)
149 KALDI_WARN <<
"Matrix has bad logdet " << logdet;
151 tot_t += feat.NumRows();
152 tot_logdet += feat.NumRows() * logdet;
155 feat_writer.Write(utt, feat_out);
157 if (logdet_type != Unknown && logdet_type != DimIncrease)
158 KALDI_LOG <<
"Overall average " << (logdet_type == PseudoLogdet ?
"[pseudo-]":
"")
159 <<
"logdet is " << (tot_logdet/tot_t) <<
" over " << tot_t
161 KALDI_LOG <<
"Applied transform to " << num_done <<
" utterances; " << num_error
164 return (num_done != 0 ? 0 : 1);
165 }
catch(
const std::exception &e) {
166 std::cerr << e.what();
void AddMat2(const Real alpha, const MatrixBase< Real > &M, MatrixTransposeType transM, const Real beta)
rank-N update: if (transM == kNoTrans) (*this) = beta*(*this) + alpha * M * M^T, or (if transM == kTr...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Packed symetric matrix class.
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
This class is for when you are reading something in random access, but it may actually be stored per-...
A templated class for writing objects to an archive or script file; see The Table concept...
RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename, RspecifierOptions *opts)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
bool Open(const std::string &table_rxfilename, const std::string &utt2spk_rxfilename)
Note: when calling Open, utt2spk_rxfilename may be empty.
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
bool HasKey(const std::string &key)
A class representing a vector.
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
const T & Value(const std::string &key)
Sub-matrix representation.