35     using namespace kaldi;
    37     using fst::SymbolTable;
    42         "Align features given [SGMM-based] models.\n"    43         "Usage: sgmm2-align-compiled [options] <model-in> <graphs-rspecifier> "    44         "<feature-rspecifier> <alignments-wspecifier>\n"    45         "e.g.: sgmm2-align-compiled 1.mdl ark:graphs.fsts scp:train.scp ark:1.ali\n";
    54     std::string gselect_rspecifier, spkvecs_rspecifier, utt2spk_rspecifier;
    55     std::string per_frame_acwt_wspecifier;
    58     po.Register(
"binary", &binary, 
"Write output in binary mode");
    59     po.Register(
"log-prune", &log_prune, 
"Pruning beam used to reduce number "    60                 "of exp() evaluations.");
    61     po.Register(
"spk-vecs", &spkvecs_rspecifier, 
"Speaker vectors (rspecifier)");
    62     po.Register(
"utt2spk", &utt2spk_rspecifier,
    63                 "rspecifier for utterance to speaker map");
    64     po.Register(
"acoustic-scale", &acoustic_scale, 
"Scaling factor for acoustic "    66     po.Register(
"transition-scale", &transition_scale, 
"Scaling factor for "    67                 "some transition probabilities [see also self-loop-scale].");
    68     po.Register(
"self-loop-scale", &self_loop_scale, 
"Scaling factor for "    69                 "self-loop versus non-self-loop probability mass [controls "    70                 "most transition probabilities.]");
    71     po.Register(
"write-per-frame-acoustic-loglikes", &per_frame_acwt_wspecifier,
    72                 "Wspecifier for table of vectors containing the acoustic log-likelihoods "    73                 "per frame for each utterance. E.g. ark:foo/per_frame_logprobs.1.ark");
    74     po.Register(
"gselect", &gselect_rspecifier, 
"Precomputed Gaussian indices "    79     if (po.NumArgs() != 4) {
    84     if (gselect_rspecifier == 
"")
    85       KALDI_ERR << 
"--gselect option is mandatory.";
    87     std::string model_in_filename = po.GetArg(1),
    88         fst_rspecifier = po.GetArg(2),
    89         feature_rspecifier = po.GetArg(3),
    90         alignment_wspecifier = po.GetArg(4);
    96       Input ki(model_in_filename, &binary);
    97       trans_model.
Read(ki.Stream(), binary);
    98       am_sgmm.
Read(ki.Stream(), binary);
   111     int num_done = 0, num_err = 0, num_retry = 0;
   112     double tot_like = 0.0;
   113     kaldi::int64 frame_count = 0;
   115     for (; !fst_reader.Done(); fst_reader.Next()) {
   116       std::string utt = fst_reader.Key();
   117       if (!feature_reader.HasKey(utt)) {
   118         KALDI_WARN << 
"No feature found for utterance " << utt;
   122       VectorFst<StdArc> decode_fst(fst_reader.Value());
   125       fst_reader.FreeCurrent();
   129         KALDI_WARN << 
"Zero-length utterance: " << utt;
   135       if (spkvecs_reader.IsOpen()) {
   136         if (spkvecs_reader.HasKey(utt)) {
   140           KALDI_WARN << 
"Cannot find speaker vector for " << utt;
   146       if (!gselect_reader.HasKey(utt)
   147           && gselect_reader.Value(utt).size() != features.
NumRows()) {
   148         KALDI_WARN << 
"No Gaussian-selection info available for utterance "   149                    << utt << 
" (or wrong size)";
   152       const std::vector<std::vector<int32> > &gselect =
   153           gselect_reader.Value(utt);
   156         std::vector<int32> disambig_syms;  
   158                            transition_scale, self_loop_scale,
   163                                             log_prune, acoustic_scale, &spk_vars);
   166                             acoustic_scale, &decode_fst, &sgmm_decodable,
   167                             &alignment_writer, NULL,
   168                             &num_done, &num_err, &num_retry,
   169                             &tot_like, &frame_count, &per_frame_acwt_writer);
   173     KALDI_LOG << 
"Overall log-likelihood per frame is " << (tot_like/frame_count)
   174               << 
" over " << frame_count<< 
" frames.";
   175     KALDI_LOG << 
"Retried " << num_retry << 
" out of "   176               << (num_done + num_err) << 
" utterances.";
   177     KALDI_LOG << 
"Done " << num_done << 
", errors on " << num_err;
   178     return (num_done != 0 ? 0 : 1);
   179   } 
catch(
const std::exception &e) {
   180     std::cerr << e.what();
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
Class for definition of the subspace Gmm acoustic model. 
 
void Register(OptionsItf *opts)
 
This class is for when you are reading something in random access, but it may actually be stored per-...
 
void Read(std::istream &is, bool binary)
 
A templated class for writing objects to an archive or script file; see The Table concept...
 
void ComputePerSpkDerivedVars(Sgmm2PerSpkDerivedVars *vars) const
Computes the per-speaker derived vars; assumes vars->v_s is already set up. 
 
Allows random access to a collection of objects in an archive or script file; see The Table concept...
 
void AddTransitionProbs(const TransitionModel &trans_model, const std::vector< int32 > &disambig_syms, BaseFloat transition_scale, BaseFloat self_loop_scale, fst::VectorFst< fst::StdArc > *fst)
Adds transition-probs, with the supplied scales (see Scaling of transition and acoustic probabilities...
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
void Read(std::istream &is, bool binary)
 
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
 
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix). 
 
void SetSpeakerVector(const Vector< BaseFloat > &v_s_in)
 
void AlignUtteranceWrapper(const AlignConfig &config, const std::string &utt, BaseFloat acoustic_scale, fst::VectorFst< fst::StdArc > *fst, DecodableInterface *decodable, Int32VectorWriter *alignment_writer, BaseFloatWriter *scores_writer, int32 *num_done, int32 *num_error, int32 *num_retried, double *tot_like, int64 *frame_count, BaseFloatVectorWriter *per_frame_acwt_writer)
AlignUtteranceWapper is a wrapper for alignment code used in training, that is called from many diffe...