Accumulate LDA statistics based on pdf-ids.
Inputs are the source models, that serve as the input (and may potentially contain the current transformation), the un-transformed features and state posterior probabilities
33 using namespace kaldi;
37 "Accumulate LDA statistics based on pdf-ids.\n" 38 "Usage: acc-lda [options] <transition-gmm/model> <features-rspecifier> <posteriors-rspecifier> <lda-acc-out>\n" 40 " ali-to-post ark:1.ali ark:- | acc-lda 1.mdl \"ark:splice-feats scp:train.scp|\" ark:- ldaacc.1\n";
45 po.Register(
"binary", &binary,
"Write accumulators in binary mode.");
46 po.Register(
"rand-prune", &rand_prune,
47 "Randomized pruning threshold for posteriors");
50 if (po.NumArgs() != 4) {
55 std::string model_rxfilename = po.GetArg(1);
56 std::string features_rspecifier = po.GetArg(2);
57 std::string posteriors_rspecifier = po.GetArg(3);
58 std::string acc_wxfilename = po.GetArg(4);
63 Input ki(model_rxfilename, &binary_read);
64 trans_model.
Read(ki.Stream(), binary_read);
73 int32 num_done = 0, num_fail = 0;
74 for (;!feature_reader.Done(); feature_reader.Next()) {
75 std::string utt = feature_reader.Key();
76 if (!posterior_reader.HasKey(utt)) {
77 KALDI_WARN <<
"No posteriors for utterance " << utt;
81 const Posterior &post (posterior_reader.Value(utt));
87 if (feats.NumRows() !=
static_cast<int32
>(post.size())) {
88 KALDI_WARN <<
"Posterior vs. feats size mismatch " 89 << post.size() <<
" vs. " << feats.NumRows();
93 if (lda.
Dim() != 0 && lda.
Dim() != feats.NumCols()) {
95 <<
" vs. " << feats.NumCols();
102 for (int32
i = 0;
i < feats.NumRows();
i++) {
104 for (
size_t j = 0;
j < pdf_post[
i].size();
j++) {
105 int32 pdf_id = pdf_post[
i][
j].first;
113 if (num_done % 100 == 0)
114 KALDI_LOG <<
"Done " << num_done <<
" utterances.";
117 KALDI_LOG <<
"Done " << num_done <<
" files, failed for " 120 Output ko(acc_wxfilename, binary);
121 lda.
Write(ko.Stream(), binary);
123 return (num_done != 0 ? 0 : 1);
124 }
catch(
const std::exception &e) {
125 std::cerr << e.what();
void Accumulate(const VectorBase< BaseFloat > &data, int32 class_id, BaseFloat weight=1.0)
Accumulates data.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Class for computing linear discriminant analysis (LDA) transform.
int32 Dim() const
Returns the dimensionality of the feature vectors.
Float RandPrune(Float post, BaseFloat prune_thresh, struct RandomState *state=NULL)
void Write(std::ostream &out_stream, bool binary) const
void Init(int32 num_classes, int32 dimension)
Allocates memory for accumulators.
Allows random access to a collection of objects in an archive or script file; see The Table concept...
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void Read(std::istream &is, bool binary)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
void ConvertPosteriorToPdfs(const TransitionModel &tmodel, const Posterior &post_in, Posterior *post_out)
Converts a posterior over transition-ids to be a posterior over pdf-ids.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...