206 using namespace kaldi;
210 "Compute an LDA matrix for iVector system. Reads in iVectors per utterance,\n" 211 "and an utt2spk file which it uses to help work out the within-speaker and\n" 212 "between-speaker covariance matrices. Outputs an LDA projection to a\n" 213 "specified dimension. By default it will normalize so that the projected\n" 214 "within-class covariance is unit, but if you set --normalize-total-covariance\n" 215 "to true, it will normalize the total covariance.\n" 216 "Note: the transform we produce is actually an affine transform which will\n" 217 "also set the global mean to zero.\n" 219 "Usage: ivector-compute-lda [options] <ivector-rspecifier> <utt2spk-rspecifier> " 222 " ivector-compute-lda ark:ivectors.ark ark:utt2spk lda.mat\n";
228 covariance_floor = 1.0e-06;
231 po.Register(
"dim", &lda_dim,
"Dimension we keep with the LDA transform");
232 po.Register(
"total-covariance-factor", &total_covariance_factor,
233 "If this is 0.0 we normalize to make the within-class covariance " 234 "unit; if 1.0, the total covariance; if between, we normalize " 235 "an interpolated matrix.");
236 po.Register(
"covariance-floor", &covariance_floor,
"Floor the eigenvalues " 237 "of the interpolated covariance matrix to the product of its " 238 "largest eigenvalue and this number.");
239 po.Register(
"binary", &binary,
"Write output in binary mode");
243 if (po.NumArgs() != 3) {
248 std::string ivector_rspecifier = po.GetArg(1),
249 utt2spk_rspecifier = po.GetArg(2),
250 lda_wxfilename = po.GetArg(3);
254 int32 num_done = 0, num_err = 0, dim = 0;
259 std::map<std::string, Vector<BaseFloat> *> utt2ivector;
260 std::map<std::string, std::vector<std::string> > spk2utt;
262 for (; !ivector_reader.Done(); ivector_reader.Next()) {
263 std::string utt = ivector_reader.Key();
265 if (utt2ivector.count(utt) != 0) {
266 KALDI_WARN <<
"Duplicate iVector found for utterance " << utt
271 if (!utt2spk_reader.HasKey(utt)) {
272 KALDI_WARN <<
"utt2spk has no entry for utterance " << utt
277 std::string spk = utt2spk_reader.Value(utt);
284 spk2utt[spk].push_back(utt);
288 KALDI_LOG <<
"Read " << num_done <<
" utterances, " 289 << num_err <<
" with errors.";
292 KALDI_ERR <<
"Did not read any utterances.";
294 KALDI_LOG <<
"Computing within-class covariance.";
306 total_covariance_factor,
310 offset.AddMatVec(-1.0, linear_part,
kNoTrans, mean, 0.0);
311 lda_mat.CopyColFromVec(offset, dim);
313 KALDI_VLOG(2) <<
"2-norm of transformed iVector mean is " 321 std::map<std::string, Vector<BaseFloat> *>::iterator iter;
322 for (iter = utt2ivector.begin(); iter != utt2ivector.end(); ++iter)
327 }
catch(
const std::exception &e) {
328 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Real Norm(Real p) const
Compute the p-th norm of the vector.
Allows random access to a collection of objects in an archive or script file; see The Table concept...
void ComputeLdaTransform(const std::map< std::string, Vector< BaseFloat > *> &utt2ivector, const std::map< std::string, std::vector< std::string > > &spk2utt, BaseFloat total_covariance_factor, BaseFloat covariance_floor, MatrixBase< BaseFloat > *lda_out)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
void ComputeAndSubtractMean(std::map< std::string, Vector< BaseFloat > *> utt2ivector, Vector< BaseFloat > *mean_out)
MatrixIndexT Dim() const
Returns the dimension of the vector.
A class representing a vector.
#define KALDI_ASSERT(cond)
void WriteKaldiObject(const C &c, const std::string &filename, bool binary)
std::string PrintableWxfilename(const std::string &wxfilename)
PrintableWxfilename turns the wxfilename into a more human-readable form for error reporting...
Sub-matrix representation.