27 int main(
int argc,
char *argv[]) {
29 using namespace kaldi;
33 "Set one of the transforms in lvtln to the minimum-squared-error solution\n" 34 "to mapping feats-untransformed to feats-transformed; posteriors may\n" 35 "optionally be used to downweight/remove silence.\n" 36 "Usage: gmm-train-lvtln-special [options] class-index <lvtln-in> <lvtln-out> " 37 " <feats-untransformed-rspecifier> <feats-transformed-rspecifier> [<posteriors-rspecifier>]\n" 39 " gmm-train-lvtln-special 5 5.lvtln 6.lvtln scp:train.scp scp:train_warp095.scp ark:nosil.post\n";
43 bool normalize_var =
false;
44 bool normalize_covar =
false;
45 std::string weights_rspecifier;
48 po.
Register(
"binary", &binary,
"Write output in binary mode");
49 po.
Register(
"warp", &warp,
"If supplied, can be used to set warp factor" 50 "for this transform");
51 po.
Register(
"normalize-var", &normalize_var,
"Normalize diagonal of variance " 52 "to be the same before and after transform.");
53 po.
Register(
"normalize-covar", &normalize_covar,
"Normalize (matrix-valued) " 54 "covariance to be the same before and after transform.");
55 po.
Register(
"weights-in", &weights_rspecifier,
56 "Can be used to take posteriors as an scp or ark file of weights " 57 "instead of giving <posteriors-rspecfier>");
66 std::string class_idx_str = po.
GetArg(1);
69 KALDI_ERR <<
"Expected integer first argument: got " << class_idx_str;
71 std::string lvtln_rxfilename = po.
GetArg(2),
72 lvtln_wxfilename = po.
GetArg(3),
73 feats_orig_rspecifier = po.
GetArg(4),
74 feats_transformed_rspecifier = po.
GetArg(5),
83 if (!normalize_covar) {
120 for (; !x_reader.
Done(); x_reader.
Next()) {
121 std::string utt = x_reader.
Key();
122 if (!y_reader.
HasKey(utt)) {
123 KALDI_WARN <<
"No transformed features for key " << utt;
131 KALDI_ERR <<
"Number of rows and/or columns differs in features, or features have different dim from lvtln object";
135 if (weights_rspecifier ==
"" && posteriors_rspecifier !=
"") {
136 if (!post_reader.
HasKey(utt)) {
137 KALDI_WARN <<
"No posteriors for utterance " << utt;
141 if (static_cast<int32>(post.size()) != x_feats.
NumRows())
142 KALDI_ERR <<
"Mismatch in size of posterior";
143 for (
size_t i = 0;
i < post.size();
i++)
144 for (
size_t j = 0;
j < post[
i].size();
j++)
145 weights(
i) += post[
i][
j].second;
146 }
else if (weights_rspecifier !=
"") {
147 if (!weights_reader.
HasKey(utt)) {
148 KALDI_WARN <<
"No weights for utterance " << utt;
151 weights.CopyFromVec(weights_reader.
Value(utt));
164 xplus_row_dbl(
j) = x_row(
j);
165 xplus_row_dbl(dim) = 1.0;
167 Q.
AddVec2(weight, xplus_row_dbl);
168 l.
AddVecVec(weight, y_row_dbl, xplus_row_dbl);
170 sum_xplus(dim) += weight;
172 sum_xplus(
j) += weight * x_row(
j);
173 sumsq_x(
j) += weight * x_row(
j)*x_row(
j);
174 sumsq_diff(
j) += weight * (x_row(
j)-y_row(
j)) * (x_row(
j)-y_row(
j));
175 c(
j) += weight * y_row(
j)*y_row(
j);
189 A.
Row(
i).CopyFromVec(a_i);
192 sqdiff = sumsq_diff(
i) / beta,
193 scatter = sumsq_x(
i) / beta;
195 KALDI_LOG <<
"For dimension " <<
i <<
", sum-squared error in linear approximation is " 196 << error <<
", versus feature-difference " << sqdiff <<
", orig-sumsq is " 199 double x_var = scatter - pow(sum_xplus(
i) / beta, 2.0);
200 double y_var =
VecSpVec(w_i, Q, w_i)/beta
201 - pow(
VecVec(w_i, sum_xplus)/beta, 2.0);
202 double scale = sqrt(x_var / y_var);
204 <<
", variance of original and transformed data is " << x_var
205 <<
" and " << y_var <<
" respectively; scaling matrix row by " 206 << scale <<
" to make them equal.";
207 A.
Row(
i).Scale(scale);
227 for (; !x_reader.
Done(); x_reader.
Next()) {
228 std::string utt = x_reader.
Key();
229 if (!y_reader.
HasKey(utt)) {
230 KALDI_WARN <<
"No transformed features for key " << utt;
238 KALDI_ERR <<
"Number of rows and/or columns differs in features, or features have different dim from lvtln object";
242 if (posteriors_rspecifier !=
"") {
243 if (!post_reader.
HasKey(utt)) {
244 KALDI_WARN <<
"No posteriors for utterance " << utt;
248 if (static_cast<int32>(post.size()) != x_feats.
NumRows())
249 KALDI_ERR <<
"Mismatch in size of posterior";
250 for (
size_t i = 0;
i < post.size();
i++)
251 for (
size_t j = 0;
j < post[
i].size();
j++)
252 weights(
i) += post[
i][
j].second;
253 }
else weights.Add(1.0);
288 KALDI_LOG <<
"Singular values of P are: " << l;
300 lvtln.
SetWarp(class_idx, warp);
303 Output ko(lvtln_wxfilename, binary);
307 }
catch(
const std::exception &e) {
308 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
void SetTransform(int32 i, const MatrixBase< BaseFloat > &transform)
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
void AddSpVec(const Real alpha, const SpMatrix< Real > &M, const VectorBase< Real > &v, const Real beta)
Add symmetric positive definite matrix times vector: this <– beta*this + alpha*M*v.
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
void AddVec2(const Real alpha, const VectorBase< OtherReal > &v)
rank-one update, this <– this + alpha v v'
void Cholesky(const SpMatrix< Real > &orig)
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
const T & Value(const std::string &key)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
Real VecSpVec(const VectorBase< Real > &v1, const SpMatrix< Real > &M, const VectorBase< Real > &v2)
Computes v1^T * M * v2.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
void Scale(Real alpha)
Multiplies all elements by this constant.
bool HasKey(const std::string &key)
void SetWarp(int32 i, BaseFloat warp)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void AddVecVec(const Real alpha, const VectorBase< OtherReal > &a, const VectorBase< OtherReal > &b)
*this += alpha * a * b^T
void Write(std::ostream &os, bool binary) const
int main(int argc, char *argv[])
void Invert(Real *logdet=NULL, Real *det_sign=NULL, bool inverse_needed=true)
matrix inverse.
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
std::string GetOptArg(int param) const