32 int main(
int argc,
char *argv[]) {
35 using namespace kaldi;
37 "Compute MAP estimates per-utterance (default) or per-speaker for\n" 38 "the supplied set of speakers (spk2utt option). This will typically\n" 39 "be piped into gmm-latgen-map\n" 41 "Usage: gmm-adapt-map [options] <model-in> <feature-rspecifier> " 42 "<posteriors-rspecifier> <map-am-wspecifier>\n";
45 std::string spk2utt_rspecifier;
48 std::string update_flags_str =
"mw";
50 po.
Register(
"spk2utt", &spk2utt_rspecifier,
"rspecifier for speaker to " 51 "utterance-list map");
52 po.
Register(
"binary", &binary,
"Write output in binary mode");
53 po.
Register(
"update-flags", &update_flags_str,
"Which GMM parameters will be " 54 "updated: subset of mvw.");
64 std::string model_filename = po.
GetArg(1),
65 feature_rspecifier = po.
GetArg(2),
66 posteriors_rspecifier = po.
GetArg(3),
67 map_am_wspecifier = po.
GetArg(4);
78 Input is(model_filename, &binary);
83 double tot_like = 0.0, tot_like_change = 0.0, tot_t = 0.0,
85 int32 num_done = 0, num_err = 0;
87 if (spk2utt_rspecifier !=
"") {
90 for (; !spk2utt_reader.
Done(); spk2utt_reader.
Next()) {
91 std::string spk = spk2utt_reader.
Key();
95 map_accs.
Init(am_gmm, update_flags);
97 const std::vector<std::string> &uttlist = spk2utt_reader.
Value();
100 std::vector<std::string>::const_iterator iter = uttlist.begin(),
102 for (; iter != end; ++iter) {
103 std::string utt = *iter;
104 if (!feature_reader.
HasKey(utt)) {
105 KALDI_WARN <<
"Did not find features for utterance " << utt;
108 if (!posteriors_reader.
HasKey(utt)) {
109 KALDI_WARN <<
"Did not find posteriors for utterance " << utt;
115 if (posterior.size() != feats.
NumRows()) {
116 KALDI_WARN <<
"Posteriors has wrong size " << (posterior.size())
117 <<
" vs. " << (feats.
NumRows());
125 for (
size_t i = 0;
i < posterior.size();
i++ ) {
126 for (
size_t j = 0;
j < pdf_posterior[
i].size();
j++ ) {
127 int32 pdf_id = pdf_posterior[
i][
j].first;
136 KALDI_VLOG(2) <<
"Average like for utterance " << utt <<
" is " 137 << (file_like/file_t) <<
" over " << file_t <<
" frames.";
139 tot_like += file_like;
143 if (num_done % 10 == 0)
144 KALDI_VLOG(1) <<
"Avg like per frame so far is " 145 << (tot_like / tot_t);
149 BaseFloat spk_objf_change = 0.0, spk_frames = 0.0;
151 &spk_objf_change, &spk_frames);
152 KALDI_LOG <<
"For speaker " << spk <<
", objective function change " 153 <<
"from MAP was " << (spk_objf_change / spk_frames)
154 <<
" over " << spk_frames <<
" frames.";
155 tot_like_change += spk_objf_change;
156 tot_t_check += spk_frames;
159 map_am_writer.
Write(spk,copy_am_gmm);
163 for ( ; !feature_reader.
Done(); feature_reader.
Next() ) {
164 std::string utt = feature_reader.
Key();
168 map_accs.
Init(am_gmm, update_flags);
169 map_accs.
SetZero(update_flags);
171 if ( !posteriors_reader.
HasKey(utt) ) {
172 KALDI_WARN <<
"Did not find aligned transcription for utterance " 180 if ( posterior.size() != feats.
NumRows() ) {
181 KALDI_WARN <<
"Posteriors has wrong size " << (posterior.size())
182 <<
" vs. " << (feats.
NumRows());
190 for (
size_t i = 0;
i < posterior.size();
i++ ) {
191 for (
size_t j = 0;
j < pdf_posterior[
i].size();
j++ ) {
192 int32 pdf_id = pdf_posterior[
i][
j].first;
199 KALDI_VLOG(2) <<
"Average like for utterance " << utt <<
" is " 200 << (file_like/file_t) <<
" over " << file_t <<
" frames.";
201 tot_like += file_like;
203 if ( num_done % 10 == 0 )
204 KALDI_VLOG(1) <<
"Avg like per frame so far is " 205 << (tot_like / tot_t);
208 BaseFloat utt_objf_change = 0.0, utt_frames = 0.0;
210 &utt_objf_change, &utt_frames);
211 KALDI_LOG <<
"For utterance " << utt <<
", objective function change " 212 <<
"from MAP was " << (utt_objf_change / utt_frames)
213 <<
" over " << utt_frames <<
" frames.";
214 tot_like_change += utt_objf_change;
215 tot_t_check += utt_frames;
218 map_am_writer.
Write(feature_reader.
Key(), copy_am_gmm);
222 KALDI_LOG <<
"Done " << num_done <<
" files, " << num_err
224 KALDI_LOG <<
"Overall acoustic likelihood was " << (tot_like / tot_t)
225 <<
" and change in likelihod per frame was " 226 << (tot_like_change / tot_t) <<
" over " << tot_t <<
" frames.";
227 return (num_done != 0 ? 0 : 1);
228 }
catch(
const std::exception& e) {
229 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void CopyFromAmDiagGmm(const AmDiagGmm &other)
Copies the parameters from another model. Allocates necessary memory.
void MapAmDiagGmmUpdate(const MapDiagGmmOptions &config, const AccumAmDiagGmm &am_diag_gmm_acc, GmmFlagsType flags, AmDiagGmm *am_gmm, BaseFloat *obj_change_out, BaseFloat *count_out)
Maximum A Posteriori update.
GmmFlagsType StringToGmmFlags(std::string str)
Convert string which is some subset of "mSwa" to flags.
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
void SetZero(GmmFlagsType flags)
BaseFloat AccumulateForGmm(const AmDiagGmm &model, const VectorBase< BaseFloat > &data, int32 gmm_index, BaseFloat weight)
Accumulate stats for a single GMM in the model; returns log likelihood.
A templated class for writing objects to an archive or script file; see The Table concept...
uint16 GmmFlagsType
Bitwise OR of the above flags.
void Register(OptionsItf *opts)
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
const T & Value(const std::string &key)
int main(int argc, char *argv[])
void Read(std::istream &is, bool binary)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void ConvertPosteriorToPdfs(const TransitionModel &tmodel, const Posterior &post_in, Posterior *post_out)
Converts a posterior over transition-ids to be a posterior over pdf-ids.
void Read(std::istream &in_stream, bool binary)
void Init(const AmDiagGmm &model, GmmFlagsType flags)
Initializes accumulators for each GMM based on the number of components and dimension.
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Configuration variables for Maximum A Posteriori (MAP) update.