97 using namespace kaldi;
101 "Compute Goodness Of Pronunciation (GOP) from a matrix of " 102 "probabilities (e.g. from nnet3-compute).\n" 103 "Usage: compute-gop [options] <model> <alignments-rspecifier> " 104 "<prob-matrix-rspecifier> <gop-wspecifier> " 105 "[<phone-feature-wspecifier>]\n" 107 " nnet3-compute [args] | compute-gop 1.mdl ark:ali-phone.1 ark:-" 108 " ark:gop.1 ark:phone-feat.1\n";
112 bool log_applied =
true;
113 std::string phone_map_rxfilename;
115 po.Register(
"log-applied", &log_applied,
116 "If true, assume the input probabilities have been applied log.");
117 po.Register(
"phone-map", &phone_map_rxfilename,
118 "File name containing old->new phone mapping (each line is: " 119 "old-integer-id new-integer-id)");
123 if (po.NumArgs() != 4 && po.NumArgs() != 5) {
128 std::string model_filename = po.GetArg(1),
129 alignments_rspecifier = po.GetArg(2),
130 prob_rspecifier = po.GetArg(3),
131 gop_wspecifier = po.GetArg(4),
132 feat_wspecifier = po.GetArg(5);
137 Input ki(model_filename, &binary);
138 trans_model.
Read(ki.Stream(), binary);
140 std::vector<std::set<int32> > pdf2phones;
142 int32 phone_num = trans_model.
NumPhones();
144 std::vector<int32> phone_map;
145 if (phone_map_rxfilename !=
"") {
147 phone_num = phone_map[phone_map.size() - 1];
156 for (; !prob_reader.Done(); prob_reader.Next()) {
157 std::string key = prob_reader.Key();
158 if (!alignment_reader.HasKey(key)) {
159 KALDI_WARN <<
"No alignment for utterance " << key;
162 auto alignment = alignment_reader.Value(key);
166 int32 frame_num = alignment.size();
167 if (alignment.size() != probs.
NumRows()) {
168 KALDI_WARN <<
"The frame numbers of alignment and prob are not equal.";
173 int32 cur_phone_id = alignment[0] - 1;
177 std::vector<Vector<BaseFloat> > phone_level_feat_stdvector;
179 for (int32
i = 0;
i < frame_num;
i++) {
183 (phone_map_rxfilename !=
"") ? &phone_map : NULL,
187 lpp_part.AddVec(1, frame_level_lpp);
190 int32 next_phone_id = (
i < frame_num - 1) ? alignment[
i + 1] - 1: -1;
191 if (next_phone_id != cur_phone_id) {
193 lpp_part.Scale(1.0 / duration);
196 for (
int k = 0; k < phone_num; k++)
197 phone_level_feat(phone_num + k) = lpp_part(cur_phone_id) - lpp_part(k);
198 phone_level_feat_stdvector.push_back(phone_level_feat);
202 BaseFloat gop = lpp_part(cur_phone_id) - lpp_part.Max();
203 std::vector<std::pair<int32, BaseFloat> > posterior_item;
204 posterior_item.push_back(std::make_pair(cur_phone_id + 1, gop));
205 posterior_gop.push_back(posterior_item);
208 phone_level_feat.Set(0);
211 cur_phone_id = next_phone_id;
216 for (int32
i = 0;
i < phone_level_feat_stdvector.size();
i++) {
218 row.AddVec(1.0, phone_level_feat_stdvector[
i]);
220 feat_writer.Write(key, feats);
221 gop_writer.Write(key, posterior_gop);
225 KALDI_LOG <<
"Processed " << num_done <<
" prob matrices.";
226 return (num_done != 0 ? 0 : 1);
227 }
catch (
const std::exception &e) {
228 std::cerr << e.what() <<
'\n';
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
A templated class for writing objects to an archive or script file; see The Table concept...
Allows random access to a collection of objects in an archive or script file; see The Table concept...
void ReadPhoneMap(std::string phone_map_rxfilename, std::vector< int32 > *phone_map)
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void GetPdfToPhonesMap(const TransitionModel &trans_model, std::vector< std::set< int32 > > *pdf2phones)
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
void Read(std::istream &is, bool binary)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
A class representing a vector.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void FrameLevelLpp(const SubVector< BaseFloat > &prob_row, const std::vector< std::set< int32 > > &pdf2phones, const std::vector< int32 > *phone_map, Vector< BaseFloat > *out_frame_level_lpp)
FrameLevelLpp compute a log posterior for pure-phones by sum the posterior of the states belonging to...
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...