77 const std::vector<std::set<int32> > &pdf2phones,
78 const std::vector<int32> *phone_map,
81 std::set<int32> dest_idxs;
82 for (
int32 ph : pdf2phones.at(
i)) {
83 dest_idxs.insert((phone_map != NULL) ? (*phone_map)[ph] - 1 : ph - 1);
86 for (
int32 idx : dest_idxs) {
88 (*out_frame_level_lpp)(idx) += prob_row(
i);
96 int main(
int argc,
char *argv[]) {
97 using namespace kaldi;
101 "Compute Goodness Of Pronunciation (GOP) from a matrix of " 102 "probabilities (e.g. from nnet3-compute).\n" 103 "Usage: compute-gop [options] <model> <alignments-rspecifier> " 104 "<prob-matrix-rspecifier> <gop-wspecifier> " 105 "[<phone-feature-wspecifier>]\n" 107 " nnet3-compute [args] | compute-gop 1.mdl ark:ali-phone.1 ark:-" 108 " ark:gop.1 ark:phone-feat.1\n";
112 bool log_applied =
true;
113 std::string phone_map_rxfilename;
115 po.
Register(
"log-applied", &log_applied,
116 "If true, assume the input probabilities have been applied log.");
117 po.
Register(
"phone-map", &phone_map_rxfilename,
118 "File name containing old->new phone mapping (each line is: " 119 "old-integer-id new-integer-id)");
128 std::string model_filename = po.
GetArg(1),
129 alignments_rspecifier = po.
GetArg(2),
130 prob_rspecifier = po.
GetArg(3),
131 gop_wspecifier = po.
GetArg(4),
132 feat_wspecifier = po.
GetArg(5);
137 Input ki(model_filename, &binary);
140 std::vector<std::set<int32> > pdf2phones;
142 int32 phone_num = trans_model.
NumPhones();
144 std::vector<int32> phone_map;
145 if (phone_map_rxfilename !=
"") {
147 phone_num = phone_map[phone_map.size() - 1];
156 for (; !prob_reader.
Done(); prob_reader.
Next()) {
157 std::string key = prob_reader.
Key();
158 if (!alignment_reader.
HasKey(key)) {
159 KALDI_WARN <<
"No alignment for utterance " << key;
162 auto alignment = alignment_reader.
Value(key);
166 int32 frame_num = alignment.size();
167 if (alignment.size() != probs.
NumRows()) {
168 KALDI_WARN <<
"The frame numbers of alignment and prob are not equal.";
173 int32 cur_phone_id = alignment[0] - 1;
177 std::vector<Vector<BaseFloat> > phone_level_feat_stdvector;
179 for (int32
i = 0;
i < frame_num;
i++) {
183 (phone_map_rxfilename !=
"") ? &phone_map : NULL,
187 lpp_part.
AddVec(1, frame_level_lpp);
190 int32 next_phone_id = (
i < frame_num - 1) ? alignment[
i + 1] - 1: -1;
191 if (next_phone_id != cur_phone_id) {
193 lpp_part.
Scale(1.0 / duration);
196 for (
int k = 0; k < phone_num; k++)
197 phone_level_feat(phone_num + k) = lpp_part(cur_phone_id) - lpp_part(k);
198 phone_level_feat_stdvector.push_back(phone_level_feat);
202 BaseFloat gop = lpp_part(cur_phone_id) - lpp_part.
Max();
203 std::vector<std::pair<int32, BaseFloat> > posterior_item;
204 posterior_item.push_back(std::make_pair(cur_phone_id + 1, gop));
205 posterior_gop.push_back(posterior_item);
208 phone_level_feat.
Set(0);
211 cur_phone_id = next_phone_id;
216 for (int32
i = 0;
i < phone_level_feat_stdvector.size();
i++) {
218 row.
AddVec(1.0, phone_level_feat_stdvector[
i]);
220 feat_writer.
Write(key, feats);
221 gop_writer.
Write(key, posterior_gop);
225 KALDI_LOG <<
"Processed " << num_done <<
" prob matrices.";
226 return (num_done != 0 ? 0 : 1);
227 }
catch (
const std::exception &e) {
228 std::cerr << e.what() <<
'\n';
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
void ApplyLog()
Apply natural log to all elements.
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
int main(int argc, char *argv[])
void ReadPhoneMap(std::string phone_map_rxfilename, std::vector< int32 > *phone_map)
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void GetPdfToPhonesMap(const TransitionModel &trans_model, std::vector< std::set< int32 > > *pdf2phones)
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
const T & Value(const std::string &key)
void Read(std::istream &is, bool binary)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
Real Max() const
Returns the maximum value of any element, or -infinity for the empty vector.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void Scale(Real alpha)
Multiplies all elements by this constant.
bool HasKey(const std::string &key)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
A class representing a vector.
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void Set(Real f)
Set all members of a vector to a specified value.
void FrameLevelLpp(const SubVector< BaseFloat > &prob_row, const std::vector< std::set< int32 > > &pdf2phones, const std::vector< int32 > *phone_map, Vector< BaseFloat > *out_frame_level_lpp)
FrameLevelLpp compute a log posterior for pure-phones by sum the posterior of the states belonging to...
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...