33 phone2num_pdf_classes->clear();
34 phone2num_pdf_classes->resize(
phones_.back() + 1, -1);
46 while ( ! (is >> token).fail() ) {
47 if (token ==
"</Topology>") {
break; }
48 else if (token !=
"<TopologyEntry>") {
49 KALDI_ERR <<
"Reading HmmTopology object, expected </Topology> or <TopologyEntry>, got "<<token;
52 std::vector<int32> phones;
56 if (is.fail())
KALDI_ERR <<
"Reading HmmTopology object, unexpected end of file while expecting phones.";
57 if (s ==
"</ForPhones>")
break;
61 KALDI_ERR <<
"Reading HmmTopology object, expected " 62 <<
"integer, got instead " << s;
63 phones.push_back(phone);
67 std::vector<HmmState> this_entry;
70 while (token !=
"</TopologyEntry>") {
71 if (token !=
"<State>")
72 KALDI_ERR <<
"Expected </TopologyEntry> or <State>, got instead " << token;
75 if (state != static_cast<int32>(this_entry.size()))
76 KALDI_ERR <<
"States are expected to be in order from zero, expected " 77 << this_entry.size() <<
", got " << state;
80 if (token ==
"<PdfClass>") {
82 this_entry.push_back(
HmmState(forward_pdf_class));
84 if (token ==
"<SelfLoopPdfClass>")
85 KALDI_ERR <<
"pdf classes should be defined using <PdfClass> " 86 <<
"or <ForwardPdfClass>/<SelfLoopPdfClass> pair";
87 }
else if (token ==
"<ForwardPdfClass>") {
91 if (token !=
"<SelfLoopPdfClass>")
92 KALDI_ERR <<
"Expected <SelfLoopPdfClass>, got instead " << token;
94 this_entry.push_back(
HmmState(forward_pdf_class, self_loop_pdf_class));
97 this_entry.push_back(
HmmState(forward_pdf_class));
98 while (token ==
"<Transition>") {
103 this_entry.back().transitions.push_back(std::make_pair(dst_state, trans_prob));
106 if (token ==
"<Final>")
107 KALDI_ERR <<
"You are trying to read old-format topology with new Kaldi.";
108 if (token !=
"</State>")
109 KALDI_ERR <<
"Expected </State>, got instead " << token;
115 for (
size_t i = 0;
i < phones.size();
i++) {
117 if (static_cast<int32>(
phone2idx_.size()) <= phone)
121 KALDI_ERR <<
"Phone with index "<<(
i)<<
" appears in multiple topology entries.";
144 for (
int32 j = 0 ;
j < thist_sz;
j++) {
153 for (
int32 k = 0; k < thiss_sz; k++) {
166 bool is_hmm =
IsHmm();
197 for (
size_t k = 0; k <
entries_[
i][
j].transitions.size(); k++) {
221 for (
size_t k = 0; k <
entries_[
i][
j].transitions.size(); k++) {
229 if (!binary) os <<
"\n";
234 KALDI_ERR <<
"HmmTopology::Check(), empty object.";
235 std::vector<bool> is_seen(
entries_.size(),
false);
238 if (static_cast<size_t>(phone) >=
phone2idx_.size() ||
240 KALDI_ERR <<
"HmmTopology::Check(), phone has no valid index.";
245 KALDI_ERR <<
"HmmTopoloy::Check(), entry with no corresponding phones.";
248 KALDI_ERR <<
"HmmTopology::Check(), cannot only have one state (i.e., must " 249 "have at least one emitting state).";
250 if (!
entries_[i][num_states-1].transitions.empty())
251 KALDI_ERR <<
"HmmTopology::Check(), last state must have no transitions.";
254 KALDI_ERR <<
"HmmTopology::Check(), last state must not be emitting.";
256 std::vector<bool> has_trans_in(num_states,
false);
257 std::vector<int32> seen_pdf_classes;
259 for (
int32 j = 0;
j < num_states;
j++) {
262 seen_pdf_classes.push_back(
entries_[i][
j].forward_pdf_class);
263 seen_pdf_classes.push_back(
entries_[i][
j].self_loop_pdf_class);
265 std::set<int32> seen_transition;
267 static_cast<size_t>(k) <
entries_[i][
j].transitions.size();
269 tot_prob +=
entries_[
i][
j].transitions[k].second;
270 if (
entries_[i][
j].transitions[k].second <= 0.0)
271 KALDI_ERR <<
"HmmTopology::Check(), negative or zero transition prob.";
277 if (dst_state == num_states-1
279 KALDI_ERR <<
"We do not allow any state to be " 280 "nonemitting and have a transition to the final-state (this would " 281 "stop the SplitToPhones function from identifying the last state " 283 if (dst_state < 0 || dst_state >= num_states)
284 KALDI_ERR <<
"HmmTopology::Check(), invalid dest state " << (dst_state);
285 if (seen_transition.count(dst_state) != 0)
286 KALDI_ERR <<
"HmmTopology::Check(), duplicate transition found.";
287 if (dst_state == k) {
289 "Nonemitting states cannot have self-loops.");
291 seen_transition.insert(dst_state);
292 has_trans_in[dst_state] =
true;
294 if (
j+1 < num_states) {
295 KALDI_ASSERT(tot_prob > 0.0 &&
"Non-final state must have transitions out." 296 "(with nonzero probability)");
297 if (fabs(tot_prob - 1.0) > 0.01)
298 KALDI_WARN <<
"Total probability for state " <<
j <<
299 " in topology entry is " << tot_prob;
304 for (
int32 j = 1;
j < num_states;
j++)
305 if (!has_trans_in[
j])
306 KALDI_ERR <<
"HmmTopology::Check, state "<<(
j)<<
" has no input transitions.";
308 if (seen_pdf_classes.front() != 0 ||
309 seen_pdf_classes.back() !=
static_cast<int32>(seen_pdf_classes.size()) - 1) {
310 KALDI_ERR <<
"HmmTopology::Check(), pdf_classes are expected to be " 311 "contiguous and start from zero.";
317 const std::vector<int32> &phones =
GetPhones();
319 for (
size_t i = 0;
i < phones.size();
i++) {
322 for (
int32 j = 0; j < static_cast<int32>(entry.size());
j++) {
323 int32 forward_pdf_class = entry[
j].forward_pdf_class,
324 self_loop_pdf_class = entry[
j].self_loop_pdf_class;
325 if (forward_pdf_class != self_loop_pdf_class)
334 KALDI_ERR <<
"TopologyForPhone(), phone "<<(phone)<<
" not covered.";
342 int32 max_pdf_class = 0;
343 for (
size_t i = 0;
i < entry.size();
i++) {
344 max_pdf_class = std::max(max_pdf_class, entry[
i].forward_pdf_class);
345 max_pdf_class = std::max(max_pdf_class, entry[
i].self_loop_pdf_class);
347 return max_pdf_class+1;
354 std::vector<int32> min_length(entry.size(),
355 std::numeric_limits<int32>::max());
358 min_length[0] = (entry[0].forward_pdf_class == -1 ? 0 : 1);
359 int32 num_states = min_length.size();
363 for (
int32 s = 0; s < num_states; s++) {
364 const HmmState &this_state = entry[s];
365 std::vector<std::pair<int32, BaseFloat> >::const_iterator
368 for (; iter != end; ++iter) {
369 int32 next_state = iter->first;
371 int32 next_state_min_length = min_length[s] +
372 (entry[next_state].forward_pdf_class == -1 ? 0 : 1);
373 if (next_state_min_length < min_length[next_state]) {
374 min_length[next_state] = next_state_min_length;
382 KALDI_ASSERT(min_length.back() != std::numeric_limits<int32>::max());
384 return min_length.back();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
A structure defined inside HmmTopology to represent a HMM state.
bool IsHmm() const
Returns true if this HmmTopology is really 'hmm-like', i.e.
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
void Read(std::istream &is, bool binary)
std::vector< HmmState > TopologyEntry
TopologyEntry is a typedef that represents the topology of a single (prototype) state.
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq's (removes duplicates) from a vector.
static const int32 kNoPdf
A constant used in the HmmTopology class as the pdf-class kNoPdf, which is used when a HMM-state is n...
std::vector< int32 > phones_
int32 NumPdfClasses(int32 phone) const
Returns the number of pdf-classes for this phone; throws exception if phone not covered by this topol...
void Write(std::ostream &os, bool binary) const
void ReadIntegerVector(std::istream &is, bool binary, std::vector< T > *v)
Function for reading STL vector of integer types.
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
const TopologyEntry & TopologyForPhone(int32 phone) const
Returns the topology entry (i.e.
std::vector< std::pair< int32, BaseFloat > > transitions
A list of transitions, indexed by what we call a 'transition-index'.
std::vector< TopologyEntry > entries_
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
const std::vector< int32 > & GetPhones() const
Returns a reference to a sorted, unique list of phones covered by the topology (these phones will be ...
std::vector< int32 > phone2idx_
#define KALDI_ASSERT(cond)
void WriteIntegerVector(std::ostream &os, bool binary, const std::vector< T > &v)
Function for writing STL vectors of integer types.
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
int32 MinLength(int32 phone) const
void GetPhoneToNumPdfClasses(std::vector< int32 > *phone2num_pdf_classes) const
Outputs a vector of int32, indexed by phone, that gives the number of Pdf-classes pdf-classes for the...
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.