31   using namespace kaldi;
    34         "Accumulate stats for SGMM training.\n"    35         "Usage: sgmm2-acc-stats [options] <model-in> <feature-rspecifier> "    36         "<posteriors-rspecifier> <stats-out>\n"    37         "e.g.: sgmm2-acc-stats --gselect=ark:gselect.ark 1.mdl 1.ali scp:train.scp 'ark:ali-to-post 1.ali ark:-|' 1.acc\n"    38         "(note: gselect option is mandatory)\n";
    42     std::string gselect_rspecifier, spkvecs_rspecifier, utt2spk_rspecifier;
    43     std::string update_flags_str = 
"vMNwcSt";
    46     po.Register(
"binary", &binary, 
"Write output in binary mode");
    47     po.Register(
"gselect", &gselect_rspecifier, 
"Precomputed Gaussian indices (rspecifier)");
    48     po.Register(
"spk-vecs", &spkvecs_rspecifier, 
"Speaker vectors (rspecifier)");
    49     po.Register(
"utt2spk", &utt2spk_rspecifier,
    50                 "rspecifier for utterance to speaker map");
    51     po.Register(
"rand-prune", &rand_prune, 
"Pruning threshold for posteriors");
    52     po.Register(
"update-flags", &update_flags_str, 
"Which SGMM parameters to accumulate "    53                 "stats for: subset of vMNwcS.");
    59     if (po.NumArgs() != 4) {
    63     if (gselect_rspecifier == 
"")
    64       KALDI_ERR << 
"--gselect option is mandatory.";
    66     std::string model_filename = po.GetArg(1),
    67         feature_rspecifier = po.GetArg(2),
    68         posteriors_rspecifier = po.GetArg(3),
    69         accs_wxfilename = po.GetArg(4);
    71     using namespace kaldi;
    74     int32 num_done = 0, num_err = 0;
    96         Input ki(model_filename, &binary);
    97         trans_model.
Read(ki.Stream(), binary);
    98         am_sgmm.
Read(ki.Stream(), binary);
   103       sgmm_accs.ResizeAccumulators(am_sgmm, acc_flags, (spkvecs_rspecifier!=
""));
   105       double tot_like = 0.0;
   112       for (; !feature_reader.Done(); feature_reader.Next()) {
   113         std::string utt = feature_reader.Key();
   114         std::string spk = utt;
   115         if (!utt2spk_rspecifier.empty()) {
   116           if (!utt2spk_map.HasKey(utt)) {
   117             KALDI_WARN << 
"utt2spk map does not have value for " << utt
   118                        << 
", ignoring this utterance.";
   120           } 
else { spk = utt2spk_map.Value(utt); }
   123         if (spk != cur_spk && cur_spk != 
"")
   124           sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars);        
   126         if (spk != cur_spk || spk_vars.
Empty()) {
   128           if (spkvecs_reader.IsOpen()) {
   129             if (spkvecs_reader.HasKey(utt)) {
   133               KALDI_WARN << 
"Cannot find speaker vector for " << utt;
   143         if (!posteriors_reader.HasKey(utt) ||
   144             posteriors_reader.Value(utt).size() != features.
NumRows()) {
   145           KALDI_WARN << 
"No posterior info available for utterance "   146                      << utt << 
" (or wrong size)";
   150         const Posterior &posterior = posteriors_reader.Value(utt);
   152         if (!gselect_reader.HasKey(utt)
   153             && gselect_reader.Value(utt).size() != features.
NumRows()) {
   154           KALDI_WARN << 
"No Gaussian-selection info available for utterance "   155                      << utt << 
" (or wrong size)";
   158         const std::vector<std::vector<int32> > &gselect =
   159             gselect_reader.Value(utt);
   163         BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
   167         for (
size_t i = 0; 
i < posterior.size(); 
i++) {
   171           for (
size_t j = 0; 
j < pdf_posterior[
i].size(); 
j++) {
   172             int32 pdf_id = pdf_posterior[
i][
j].first;
   174             tot_like_this_file += sgmm_accs.Accumulate(am_sgmm, per_frame_vars,
   175                                                        pdf_id, weight, &spk_vars)
   177             tot_weight += weight;
   181           for (
size_t j = 0; 
j < posterior[
i].size(); 
j++) {
   182             int32 tid = posterior[
i][
j].first;
   184             trans_model.
Accumulate(weight, tid, &transition_accs);
   188         KALDI_VLOG(2) << 
"Average like for this file is "   189                       << (tot_like_this_file/tot_weight) << 
" over "   190                       << tot_weight <<
" frames.";
   191         tot_like += tot_like_this_file;
   193         if (num_done % 50 == 0) {
   194           KALDI_LOG << 
"Processed " << num_done << 
" utterances; for utterance "   195                     << utt << 
" avg. like is "   196                     << (tot_like_this_file/tot_weight)
   197                     << 
" over " << tot_weight <<
" frames.";
   200       sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars); 
   203       KALDI_LOG << 
"Overall like per frame (Gaussian only) = "   204                 << (tot_like/tot_t) << 
" over " << tot_t << 
" frames.";
   206       KALDI_LOG << 
"Done " << num_done << 
" files, " << num_err
   211       Output ko(accs_wxfilename, binary);
   212       transition_accs.
Write(ko.Stream(), binary);
   213       sgmm_accs.Write(ko.Stream(), binary);
   216     return (num_done != 0 ? 0 : 1);
   217   } 
catch(
const std::exception &e) {
   218     std::cerr << e.what();
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
Class for definition of the subspace Gmm acoustic model. 
 
This class is for when you are reading something in random access, but it may actually be stored per-...
 
void Write(std::ostream &Out, bool binary) const
Writes to C++ stream (option to write in binary). 
 
void Read(std::istream &is, bool binary)
 
SgmmUpdateFlagsType StringToSgmmUpdateFlags(std::string str)
 
void ComputePerSpkDerivedVars(Sgmm2PerSpkDerivedVars *vars) const
Computes the per-speaker derived vars; assumes vars->v_s is already set up. 
 
Allows random access to a collection of objects in an archive or script file; see The Table concept...
 
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
 
void InitStats(Vector< double > *stats) const
 
uint16 SgmmUpdateFlagsType
Bitwise OR of the above flags. 
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const]. 
 
void Read(std::istream &is, bool binary)
 
void Accumulate(BaseFloat prob, int32 trans_id, Vector< double > *stats) const
 
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
 
void ComputePerFrameVars(const VectorBase< BaseFloat > &data, const std::vector< int32 > &gselect, const Sgmm2PerSpkDerivedVars &spk_vars, Sgmm2PerFrameDerivedVars *per_frame_vars) const
This needs to be called with each new frame of data, prior to accumulation or likelihood evaluation: ...
 
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix). 
 
void SetSpeakerVector(const Vector< BaseFloat > &v_s_in)
 
void ConvertPosteriorToPdfs(const TransitionModel &tmodel, const Posterior &post_in, Posterior *post_out)
Converts a posterior over transition-ids to be a posterior over pdf-ids. 
 
Class for the accumulators associated with the phonetic-subspace model parameters. 
 
Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and n_{i}(t) (cf...