29 using namespace kaldi;
33 "Set one of the transforms in lvtln to the minimum-squared-error solution\n" 34 "to mapping feats-untransformed to feats-transformed; posteriors may\n" 35 "optionally be used to downweight/remove silence.\n" 36 "Usage: gmm-train-lvtln-special [options] class-index <lvtln-in> <lvtln-out> " 37 " <feats-untransformed-rspecifier> <feats-transformed-rspecifier> [<posteriors-rspecifier>]\n" 39 " gmm-train-lvtln-special 5 5.lvtln 6.lvtln scp:train.scp scp:train_warp095.scp ark:nosil.post\n";
43 bool normalize_var =
false;
44 bool normalize_covar =
false;
45 std::string weights_rspecifier;
48 po.Register(
"binary", &binary,
"Write output in binary mode");
49 po.Register(
"warp", &warp,
"If supplied, can be used to set warp factor" 50 "for this transform");
51 po.Register(
"normalize-var", &normalize_var,
"Normalize diagonal of variance " 52 "to be the same before and after transform.");
53 po.Register(
"normalize-covar", &normalize_covar,
"Normalize (matrix-valued) " 54 "covariance to be the same before and after transform.");
55 po.Register(
"weights-in", &weights_rspecifier,
56 "Can be used to take posteriors as an scp or ark file of weights " 57 "instead of giving <posteriors-rspecfier>");
61 if (po.NumArgs() < 5 || po.NumArgs() > 6) {
66 std::string class_idx_str = po.GetArg(1);
69 KALDI_ERR <<
"Expected integer first argument: got " << class_idx_str;
71 std::string lvtln_rxfilename = po.GetArg(2),
72 lvtln_wxfilename = po.GetArg(3),
73 feats_orig_rspecifier = po.GetArg(4),
74 feats_transformed_rspecifier = po.GetArg(5),
75 posteriors_rspecifier = po.GetOptArg(6);
83 if (!normalize_covar) {
120 for (; !x_reader.Done(); x_reader.Next()) {
121 std::string utt = x_reader.Key();
122 if (!y_reader.HasKey(utt)) {
123 KALDI_WARN <<
"No transformed features for key " << utt;
131 KALDI_ERR <<
"Number of rows and/or columns differs in features, or features have different dim from lvtln object";
135 if (weights_rspecifier ==
"" && posteriors_rspecifier !=
"") {
136 if (!post_reader.HasKey(utt)) {
137 KALDI_WARN <<
"No posteriors for utterance " << utt;
140 const Posterior &post = post_reader.Value(utt);
141 if (static_cast<int32>(post.size()) != x_feats.
NumRows())
142 KALDI_ERR <<
"Mismatch in size of posterior";
143 for (
size_t i = 0;
i < post.size();
i++)
144 for (
size_t j = 0;
j < post[
i].size();
j++)
145 weights(
i) += post[
i][
j].second;
146 }
else if (weights_rspecifier !=
"") {
147 if (!weights_reader.HasKey(utt)) {
148 KALDI_WARN <<
"No weights for utterance " << utt;
151 weights.CopyFromVec(weights_reader.Value(utt));
164 xplus_row_dbl(
j) = x_row(
j);
165 xplus_row_dbl(dim) = 1.0;
167 Q.AddVec2(weight, xplus_row_dbl);
168 l.AddVecVec(weight, y_row_dbl, xplus_row_dbl);
170 sum_xplus(dim) += weight;
172 sum_xplus(
j) += weight * x_row(
j);
173 sumsq_x(
j) += weight * x_row(
j)*x_row(
j);
174 sumsq_diff(
j) += weight * (x_row(
j)-y_row(
j)) * (x_row(
j)-y_row(
j));
175 c(
j) += weight * y_row(
j)*y_row(
j);
187 w_i.AddSpVec(1.0, Qinv, l_i, 0.0);
189 A.Row(
i).CopyFromVec(a_i);
192 sqdiff = sumsq_diff(
i) / beta,
193 scatter = sumsq_x(
i) / beta;
195 KALDI_LOG <<
"For dimension " <<
i <<
", sum-squared error in linear approximation is " 196 << error <<
", versus feature-difference " << sqdiff <<
", orig-sumsq is " 199 double x_var = scatter - pow(sum_xplus(
i) / beta, 2.0);
200 double y_var =
VecSpVec(w_i, Q, w_i)/beta
201 - pow(
VecVec(w_i, sum_xplus)/beta, 2.0);
202 double scale = sqrt(x_var / y_var);
204 <<
", variance of original and transformed data is " << x_var
205 <<
" and " << y_var <<
" respectively; scaling matrix row by " 206 << scale <<
" to make them equal.";
207 A.Row(
i).Scale(scale);
227 for (; !x_reader.Done(); x_reader.Next()) {
228 std::string utt = x_reader.Key();
229 if (!y_reader.HasKey(utt)) {
230 KALDI_WARN <<
"No transformed features for key " << utt;
238 KALDI_ERR <<
"Number of rows and/or columns differs in features, or features have different dim from lvtln object";
242 if (posteriors_rspecifier !=
"") {
243 if (!post_reader.HasKey(utt)) {
244 KALDI_WARN <<
"No posteriors for utterance " << utt;
247 const Posterior &post = post_reader.Value(utt);
248 if (static_cast<int32>(post.size()) != x_feats.
NumRows())
249 KALDI_ERR <<
"Mismatch in size of posterior";
250 for (
size_t i = 0;
i < post.size();
i++)
251 for (
size_t j = 0;
j < post[
i].size();
j++)
252 weights(
i) += post[
i][
j].second;
253 }
else weights.Add(1.0);
262 XX.AddVec2(weight, x_dbl);
263 x.AddVec(weight, x_row);
264 y.AddVec(weight, y_row);
265 XY.AddVecVec(weight, x_dbl, y_dbl);
272 S.AddVec2(-1.0, xbar);
280 P0.AddVecVec(-1.0, xbar, y);
288 KALDI_LOG <<
"Singular values of P are: " << l;
300 lvtln.
SetWarp(class_idx, warp);
303 Output ko(lvtln_wxfilename, binary);
304 lvtln.
Write(ko.Stream(), binary);
307 }
catch(
const std::exception &e) {
308 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
void SetTransform(int32 i, const MatrixBase< BaseFloat > &transform)
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Real VecSpVec(const VectorBase< Real > &v1, const SpMatrix< Real > &M, const VectorBase< Real > &v2)
Computes v1^T * M * v2.
void SetWarp(int32 i, BaseFloat warp)
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void Write(std::ostream &os, bool binary) const
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...