tree-accu.cc
Go to the documentation of this file.
1 // hmm/tree-accu.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 // 2013 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 
12 // http://www.apache.org/licenses/LICENSE-2.0
13 
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 #include "util/kaldi-io.h"
21 #include "hmm/tree-accu.h"
22 #include "hmm/hmm-utils.h"
23 
24 namespace kaldi {
25 
26 static int32 MapPhone(const std::vector<int32> &phone_map,
27  int32 phone) {
28  if (phone == 0 || phone_map.empty()) return phone;
29  else if (phone < 0 || phone >= phone_map.size()) {
30  KALDI_ERR << "Out-of-range phone " << phone << " bad --phone-map option?";
31  }
32  return phone_map[phone];
33 }
34 
35 
36 void AccumulateTreeStats(const TransitionModel &trans_model,
37  const AccumulateTreeStatsInfo &info,
38  const std::vector<int32> &alignment,
39  const Matrix<BaseFloat> &features,
40  std::map<EventType, GaussClusterable*> *stats) {
41  std::vector<std::vector<int32> > split_alignment;
42  bool ans = SplitToPhones(trans_model, alignment, &split_alignment);
43  if (!ans) {
44  KALDI_WARN << "AccumulateTreeStats: alignment appears to be bad, not using it";
45  return;
46  }
47  int32 cur_pos = 0;
48  int32 dim = features.NumCols();
49  KALDI_ASSERT(features.NumRows() == static_cast<int32>(alignment.size()));
50  for (int32 i = -info.context_width; i < static_cast<int32>(split_alignment.size()); i++) {
51  // consider window starting at i, only if i+info.central_position is within
52  // list of phones.
53  if (i + info.central_position >= 0 &&
54  i + info.central_position < static_cast<int32>(split_alignment.size())) {
55  int32 central_phone =
56  MapPhone(info.phone_map,
57  trans_model.TransitionIdToPhone(
58  split_alignment[i+info.central_position][0]));
59  bool is_ctx_dep = !std::binary_search(info.ci_phones.begin(),
60  info.ci_phones.end(),
61  central_phone);
62  EventType evec;
63  for (int32 j = 0; j < info.context_width; j++) {
64  int32 phone;
65  if (i + j >= 0 && i + j < static_cast<int32>(split_alignment.size()))
66  phone =
67  MapPhone(info.phone_map,
68  trans_model.TransitionIdToPhone(split_alignment[i+j][0]));
69  else
70  phone = 0; // ContextDependency class uses 0 to mean "out of window";
71  // we also set the phone arbitrarily to 0
72 
73  // Don't add stuff to the event that we don't "allow" to be asked, due
74  // to the central phone being context-independent: check "is_ctx_dep".
75  // Why not just set the value to zero in this
76  // case? It's for safety. By omitting the key from the event, we
77  // ensure that there is no way a question can ever be asked that might
78  // give an inconsistent answer in tree-training versus graph-building.
79  // [setting it to zero would have the same effect given the "normal"
80  // recipe but might be less robust to changes in tree-building recipe].
81  if (is_ctx_dep || j == info.central_position)
82  evec.push_back(std::make_pair(static_cast<EventKeyType>(j), static_cast<EventValueType>(phone)));
83  }
84  for (int32 j = 0; j < static_cast<int32>(split_alignment[i+info.central_position].size());j++) {
85  // for central phone of this window...
86  EventType evec_more(evec);
87  int32 pdf_class = trans_model.TransitionIdToPdfClass(
88  split_alignment[i+info.central_position][j]);
89  // pdf_class will normally by 0, 1 or 2 for 3-state HMM.
90  std::pair<EventKeyType, EventValueType> pr(kPdfClass, pdf_class);
91  evec_more.push_back(pr);
92  std::sort(evec_more.begin(), evec_more.end()); // these must be sorted!
93  if (stats->count(evec_more) == 0)
94  (*stats)[evec_more] = new GaussClusterable(dim, info.var_floor);
95 
96  BaseFloat weight = 1.0;
97  (*stats)[evec_more]->AddStats(features.Row(cur_pos), weight);
98  cur_pos++;
99  }
100  }
101  }
102  KALDI_ASSERT(cur_pos == static_cast<int32>(alignment.size()));
103 }
104 
105 
106 void ReadPhoneMap(std::string phone_map_rxfilename,
107  std::vector<int32> *phone_map) {
108  phone_map->clear();
109  // phone map file has format e.g.:
110  // 1 1
111  // 2 1
112  // 3 2
113  // 4 2
114  std::vector<std::vector<int32> > vec; // vector of vectors, each with two elements
115  // (if file has right format). first is old phone, second is new phone
116  if (!ReadIntegerVectorVectorSimple(phone_map_rxfilename, &vec))
117  KALDI_ERR << "Error reading phone map from " <<
118  PrintableRxfilename(phone_map_rxfilename);
119  for (size_t i = 0; i < vec.size(); i++) {
120  if (vec[i].size() != 2 || vec[i][0]<=0 || vec[i][1]<=0 ||
121  (vec[i][0]<static_cast<int32>(phone_map->size()) &&
122  (*phone_map)[vec[i][0]] != -1))
123  KALDI_ERR << "Error reading phone map from "
124  << PrintableRxfilename(phone_map_rxfilename)
125  << " (bad line " << i << ")";
126  if (vec[i][0]>=static_cast<int32>(phone_map->size()))
127  phone_map->resize(vec[i][0]+1, -1);
128  KALDI_ASSERT((*phone_map)[vec[i][0]] == -1);
129  (*phone_map)[vec[i][0]] = vec[i][1];
130  }
131  if (phone_map->empty()) {
132  KALDI_ERR << "Read empty phone map from "
133  << PrintableRxfilename(phone_map_rxfilename);
134  }
135 }
136 
138  const AccumulateTreeStatsOptions &opts):
139  var_floor(opts.var_floor),
140  context_width(opts.context_width),
141  central_position(opts.central_position) {
143  KALDI_ERR << "Invalid options: --central-position=" << central_position
144  << ", --context-width=" << context_width;
145  if (!opts.phone_map_rxfilename.empty())
147 
148  if (!opts.ci_phones_str.empty()) {
149  SplitStringToIntegers(opts.ci_phones_str, ":", false, &ci_phones);
150  std::sort(ci_phones.begin(), ci_phones.end());
151  if (!IsSortedAndUniq(ci_phones) || ci_phones.empty() || ci_phones[0] == 0)
152  KALDI_ERR << "Invalid --ci-phones option: " << opts.ci_phones_str;
153  }
154 }
155 
156 } // end namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
bool SplitStringToIntegers(const std::string &full, const char *delim, bool omit_empty_strings, std::vector< I > *out)
Split a string (e.g.
Definition: text-utils.h:68
std::vector< int32 > phone_map
Definition: tree-accu.h:67
int32 TransitionIdToPdfClass(int32 trans_id) const
kaldi::int32 int32
static int32 MapPhone(const std::vector< int32 > &phone_map, int32 phone)
Definition: tree-accu.cc:26
void AccumulateTreeStats(const TransitionModel &trans_model, const AccumulateTreeStatsInfo &info, const std::vector< int32 > &alignment, const Matrix< BaseFloat > &features, std::map< EventType, GaussClusterable *> *stats)
Accumulates the stats needed for training context-dependency trees (in the "normal" way)...
Definition: tree-accu.cc:36
bool SplitToPhones(const TransitionModel &trans_model, const std::vector< int32 > &alignment, std::vector< std::vector< int32 > > *split_alignment)
SplitToPhones splits up the TransitionIds in "alignment" into their individual phones (one vector per...
Definition: hmm-utils.cc:723
static const EventKeyType kPdfClass
Definition: context-dep.h:39
void ReadPhoneMap(std::string phone_map_rxfilename, std::vector< int32 > *phone_map)
Definition: tree-accu.cc:106
std::vector< std::pair< EventKeyType, EventValueType > > EventType
Definition: event-map.h:58
float BaseFloat
Definition: kaldi-types.h:29
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
bool ReadIntegerVectorVectorSimple(const std::string &rxfilename, std::vector< std::vector< int32 > > *list)
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
std::vector< int32 > ci_phones
Definition: tree-accu.h:65
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
std::string PrintableRxfilename(const std::string &rxfilename)
PrintableRxfilename turns the rxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:61
GaussClusterable wraps Gaussian statistics in a form accessible to generic clustering algorithms...
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
Definition: stl-utils.h:63
int32 TransitionIdToPhone(int32 trans_id) const
AccumulateTreeStatsInfo(const AccumulateTreeStatsOptions &opts)
Definition: tree-accu.cc:137