#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "matrix/kaldi-matrix.h"
#include "transform/cmvn.h"

Include dependency graph for compute-cmvn-stats-two-channel.cc:

Namespaces
	kaldi
	This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for mispronunciations detection tasks, the reference:

Functions
void	GetUtterancePairs (const std::string &reco2file_and_channel_rxfilename, std::vector< std::vector< std::string > > *utt_pairs)

void	AccCmvnStatsForPair (const std::string &utt1, const std::string &utt2, const MatrixBase< BaseFloat > &feats1, const MatrixBase< BaseFloat > &feats2, BaseFloat quieter_channel_weight, MatrixBase< double > cmvn_stats1, MatrixBase< double > cmvn_stats2)

int	main (int argc, char *argv[])

Function Documentation

◆ main()

int main	(	int	argc,
		char *	argv[]
	)

Definition at line 100 of file compute-cmvn-stats-two-channel.cc.

References kaldi::AccCmvnStats(), kaldi::AccCmvnStatsForPair(), ParseOptions::GetArg(), kaldi::GetUtterancePairs(), RandomAccessTableReader< Holder >::HasKey(), rnnlm::i, KALDI_ASSERT, KALDI_LOG, KALDI_WARN, ParseOptions::NumArgs(), MatrixBase< Real >::NumCols(), ParseOptions::PrintUsage(), ParseOptions::Read(), ParseOptions::Register(), RandomAccessTableReader< Holder >::Value(), and TableWriter< Holder >::Write().

                                  {
   try {
     using namespace kaldi;
     using kaldi::int32;
 
     const char *usage =
         "Compute cepstral mean and variance normalization statistics\n"
         "Specialized for two-sided telephone data where we only accumulate\n"
         "the louder of the two channels at each frame (and add it to that\n"
         "side's stats).  Reads a 'reco2file_and_channel' file, normally like\n"
         "sw02001-A sw02001 A\n"
         "sw02001-B sw02001 B\n"
         "sw02005-A sw02005 A\n"
         "sw02005-B sw02005 B\n"
         "interpreted as <utterance-id> <call-id> <side> and for each <call-id>\n"
         "that has two sides, does the 'only-the-louder' computation, else doesn\n"
         "per-utterance stats in the normal way.\n"
         "Note: loudness is judged by the first feature component, either energy or c0;\n"
         "only applicable to MFCCs or PLPs (this code could be modified to handle filterbanks).\n"
         "\n"
         "Usage: compute-cmvn-stats-two-channel  [options] <reco2file-and-channel> <feats-rspecifier> <stats-wspecifier>\n"
         "e.g.: compute-cmvn-stats-two-channel data/train_unseg/reco2file_and_channel scp:data/train_unseg/feats.scp ark,t:-\n";
 
 
     ParseOptions po(usage);
     BaseFloat quieter_channel_weight = 0.01;
 
     po.Register("quieter-channel-weight", &quieter_channel_weight,
                 "For the quieter channel, apply this weight to the stats, so "
                 "that we still get stats if one channel always dominates.");
 
     po.Read(argc, argv);
 
     if (po.NumArgs() != 3) {
       po.PrintUsage();
       exit(1);
     }
 
     int32 num_done = 0, num_err = 0;
 
     std::string reco2file_and_channel_rxfilename = po.GetArg(1),
         feats_rspecifier = po.GetArg(2),
         stats_wspecifier = po.GetArg(3);
 
 
     std::vector<std::vector<std::string> > utt_pairs;
     GetUtterancePairs(reco2file_and_channel_rxfilename, &utt_pairs);
 
     RandomAccessBaseFloatMatrixReader feat_reader(feats_rspecifier);
     DoubleMatrixWriter writer(stats_wspecifier);
 
     for (size_t i = 0; i < utt_pairs.size(); i++) {
       std::vector<std::string> this_pair(utt_pairs[i]);
 
       KALDI_ASSERT(this_pair.size() == 2 || this_pair.size() == 1);
       if (this_pair.size() == 2) {
         std::string utt1 = this_pair[0], utt2 = this_pair[1];
         if (!feat_reader.HasKey(utt1)) {
           KALDI_WARN << "No feature data for utterance " << utt1;
           num_err++;
           this_pair[0] = utt2;
           this_pair.pop_back();
           // and fall through to the singleton code below.
         } else if (!feat_reader.HasKey(utt2)) {
           KALDI_WARN << "No feature data for utterance " << utt2;
           num_err++;
           this_pair.pop_back();
           // and fall through to the singleton code below.
         } else {
           Matrix<BaseFloat> feats1 = feat_reader.Value(utt1),
               feats2 = feat_reader.Value(utt2);
           int32 dim = feats1.NumCols();
           Matrix<double> cmvn_stats1(2, dim + 1), cmvn_stats2(2, dim + 1);
           AccCmvnStatsForPair(utt1, utt2, feats1, feats2, quieter_channel_weight,
                               &cmvn_stats1, &cmvn_stats2);
           writer.Write(utt1, cmvn_stats1);
           writer.Write(utt2, cmvn_stats2);
           num_done += 2;
           continue; // continue so we don't go to the singleton-processing code
                     // below.
         }
       }
       // process singletons.
       std::string utt = this_pair[0];
       if (!feat_reader.HasKey(utt)) {
         KALDI_WARN << "No feature data for utterance " << utt;
         num_err++;
         continue;
       }
       const Matrix<BaseFloat> &feats = feat_reader.Value(utt);
       Matrix<double> cmvn_stats(2, feats.NumCols() + 1);
       AccCmvnStats(feats, NULL, &cmvn_stats);
       writer.Write(utt, cmvn_stats);
       num_done++;
     }
     KALDI_LOG << "Done accumulating CMVN stats for " << num_done
               << " utterances; " << num_err << " had errors.";
     return (num_done != 0 ? 0 : 1);
   } catch(const std::exception &e) {
     std::cerr << e.what();
     return -1;
   }
 }

Namespaces

Functions

Function Documentation

◆ main()