context-dep.cc
Go to the documentation of this file.
1 // tree/context-dep.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "tree/context-dep.h"
21 #include "base/kaldi-math.h"
22 #include "tree/build-tree.h"
23 
24 namespace kaldi {
25 
26 bool ContextDependency::Compute(const std::vector<int32> &phoneseq,
27  int32 pdf_class,
28  int32 *pdf_id) const {
29  KALDI_ASSERT(static_cast<int32>(phoneseq.size()) == N_);
30  EventType event_vec;
31  event_vec.reserve(N_+1);
32  event_vec.push_back(std::make_pair
33  (static_cast<EventKeyType>(kPdfClass), // -1
34  static_cast<EventValueType>(pdf_class)));
35  KALDI_COMPILE_TIME_ASSERT(kPdfClass < 0); // or it would not be sorted.
36  for (int32 i = 0;i < N_;i++) {
37  event_vec.push_back(std::make_pair
38  (static_cast<EventKeyType>(i),
39  static_cast<EventValueType>(phoneseq[i])));
40  KALDI_ASSERT(static_cast<EventAnswerType>(phoneseq[i]) >= 0);
41  }
42  KALDI_ASSERT(pdf_id != NULL);
43  return to_pdf_->Map(event_vec, pdf_id);
44 }
45 
46 ContextDependency *GenRandContextDependency(const std::vector<int32> &phone_ids,
47  bool ensure_all_covered,
48  std::vector<int32> *hmm_lengths) {
49  KALDI_ASSERT(IsSortedAndUniq(phone_ids));
50  int32 num_phones = phone_ids.size();
51  int32 num_stats = 1 + (Rand() % 15) * (Rand() % 15); // up to 14^2 + 1 separate stats.
52  int32 N = 2 + Rand() % 3; // 2, 3 or 4.
53  int32 P = Rand() % N;
54  float ctx_dep_prob = 0.7 + 0.3*RandUniform();
55  int32 max_phone = *std::max_element(phone_ids.begin(), phone_ids.end());
56  hmm_lengths->clear();
57  hmm_lengths->resize(max_phone + 1, -1);
58  std::vector<bool> is_ctx_dep(max_phone + 1);
59 
60  for (int32 i = 0; i <= max_phone; i++) {
61  (*hmm_lengths)[i] = 1 + Rand() % 3;
62  is_ctx_dep[i] = (RandUniform() < ctx_dep_prob); // true w.p. ctx_dep_prob.
63  }
64  for (size_t i = 0; i < (size_t) num_phones; i++)
65  KALDI_VLOG(2) << "For idx = " << i
66  << ", (phone_id, hmm_length, is_ctx_dep) == "
67  << (phone_ids[i]) << " " << ((*hmm_lengths)[phone_ids[i]])
68  << " " << (is_ctx_dep[phone_ids[i]]);
69  // Generate rand stats.
70  BuildTreeStatsType stats;
71  size_t dim = 3 + Rand() % 20;
72  GenRandStats(dim, num_stats, N, P, phone_ids, *hmm_lengths,
73  is_ctx_dep, ensure_all_covered, &stats);
74 
75  // Now build the tree.
76 
77  Questions qopts;
78  int32 num_quest = Rand() % 10, num_iters = rand () % 5;
79  qopts.InitRand(stats, num_quest, num_iters, kAllKeysUnion); // This was tested in build-tree-utils-test.cc
80 
81  float thresh = 100.0 * RandUniform();
82 
83  EventMap *tree = NULL;
84  std::vector<std::vector<int32> > phone_sets(phone_ids.size());
85  for (size_t i = 0; i < phone_ids.size(); i++)
86  phone_sets[i].push_back(phone_ids[i]);
87  std::vector<bool> share_roots(phone_sets.size(), true),
88  do_split(phone_sets.size(), true);
89 
90  tree = BuildTree(qopts, phone_sets, *hmm_lengths, share_roots,
91  do_split, stats, thresh, 1000, 0.0, P);
92  DeleteBuildTreeStats(&stats);
93  return new ContextDependency(N, P, tree);
94 }
95 
96 
97 ContextDependency *GenRandContextDependencyLarge(const std::vector<int32> &phone_ids,
98  int N, int P,
99  bool ensure_all_covered,
100  std::vector<int32> *hmm_lengths) {
101  KALDI_ASSERT(IsSortedAndUniq(phone_ids));
102  int32 num_phones = phone_ids.size();
103  int32 num_stats = 3000; // each is a separate context.
104  float ctx_dep_prob = 0.9;
105  KALDI_ASSERT(num_phones > 0);
106  hmm_lengths->clear();
107  int32 max_phone = *std::max_element(phone_ids.begin(), phone_ids.end());
108  hmm_lengths->resize(max_phone + 1, -1);
109  std::vector<bool> is_ctx_dep(max_phone + 1);
110 
111  for (int32 i = 0; i <= max_phone; i++) {
112  (*hmm_lengths)[i] = 1 + Rand() % 3;
113  is_ctx_dep[i] = (RandUniform() < ctx_dep_prob); // true w.p. ctx_dep_prob.
114  }
115  for (size_t i = 0; i < (size_t) num_phones; i++) {
116  KALDI_VLOG(2) << "For idx = "<< i << ", (phone_id, hmm_length, is_ctx_dep) == " << (phone_ids[i]) << " " << ((*hmm_lengths)[phone_ids[i]]) << " " << (is_ctx_dep[phone_ids[i]]);
117  }
118  // Generate rand stats.
119  BuildTreeStatsType stats;
120  size_t dim = 3 + Rand() % 20;
121  GenRandStats(dim, num_stats, N, P, phone_ids, *hmm_lengths, is_ctx_dep, ensure_all_covered, &stats);
122 
123  // Now build the tree.
124 
125  Questions qopts;
126  int32 num_quest = 40, num_iters = 0;
127  qopts.InitRand(stats, num_quest, num_iters, kAllKeysUnion); // This was tested in build-tree-utils-test.cc
128 
129  float thresh = 100.0 * RandUniform();
130 
131  EventMap *tree = NULL;
132  std::vector<std::vector<int32> > phone_sets(phone_ids.size());
133  for (size_t i = 0; i < phone_ids.size(); i++)
134  phone_sets[i].push_back(phone_ids[i]);
135  std::vector<bool> share_roots(phone_sets.size(), true),
136  do_split(phone_sets.size(), true);
137 
138  tree = BuildTree(qopts, phone_sets, *hmm_lengths, share_roots,
139  do_split, stats, thresh, 1000, 0.0, P);
140  DeleteBuildTreeStats(&stats);
141  return new ContextDependency(N, P, tree);
142 }
143 
144 
145 void ContextDependency::Write (std::ostream &os, bool binary) const {
146  WriteToken(os, binary, "ContextDependency");
147  WriteBasicType(os, binary, N_);
148  WriteBasicType(os, binary, P_);
149  WriteToken(os, binary, "ToPdf");
150  to_pdf_->Write(os, binary);
151  WriteToken(os, binary, "EndContextDependency");
152 }
153 
154 
155 void ContextDependency::Read (std::istream &is, bool binary) {
156  if (to_pdf_) {
157  delete to_pdf_;
158  to_pdf_ = NULL;
159  }
160  ExpectToken(is, binary, "ContextDependency");
161  ReadBasicType(is, binary, &N_);
162  ReadBasicType(is, binary, &P_);
163  EventMap *to_pdf = NULL;
164  std::string token;
165  ReadToken(is, binary, &token);
166  if (token == "ToLength") { // back-compat.
167  EventMap *to_num_pdf_classes = EventMap::Read(is, binary);
168  delete to_num_pdf_classes;
169  ReadToken(is, binary, &token);
170  }
171  if (token == "ToPdf") {
172  to_pdf = EventMap::Read(is , binary);
173  } else {
174  KALDI_ERR << "Got unexpected token " << token
175  << " reading context-dependency object.";
176  }
177  ExpectToken(is, binary, "EndContextDependency");
178  to_pdf_ = to_pdf;
179 }
180 
182  const std::vector<int32> &phones,
183  int32 self_loop_pdf_class, int32 forward_pdf_class,
184  const std::vector<int32> &phone_window,
185  unordered_set<std::pair<int32, int32>, PairHasher<int32> > *pairs) const {
186  std::vector<int32> new_phone_window(phone_window);
187  EventType vec;
188 
189  std::vector<EventAnswerType> forward_pdfs, self_loop_pdfs;
190 
191  // get list of possible forward pdfs
192  vec.clear();
193  for (size_t i = 0; i < N_; i++)
194  if (phone_window[i] >= 0)
195  vec.push_back(std::make_pair(static_cast<EventKeyType>(i),
196  static_cast<EventValueType>(phone_window[i])));
197  vec.push_back(std::make_pair(kPdfClass, static_cast<EventValueType>(forward_pdf_class)));
198  std::sort(vec.begin(), vec.end());
199  to_pdf_->MultiMap(vec, &forward_pdfs);
200  SortAndUniq(&forward_pdfs);
201 
202  if (self_loop_pdf_class < 0) {
203  // Invalid pdf-class because there was no self-loop. Return pairs
204  // where the self-loop pdf-id is -1.
205  for (int32 forward_pdf: forward_pdfs) {
206  pairs->insert(std::pair<int32,int32>(forward_pdf, -1));
207  }
208  return;
209  }
210 
211  // get list of possible self-loop pdfs
212  vec.clear();
213  for (size_t i = 0; i < N_; i++)
214  if (phone_window[i] >= 0)
215  vec.push_back(std::make_pair(static_cast<EventKeyType>(i),
216  static_cast<EventValueType>(phone_window[i])));
217  vec.push_back(std::make_pair(kPdfClass, static_cast<EventValueType>(self_loop_pdf_class)));
218  std::sort(vec.begin(), vec.end());
219  to_pdf_->MultiMap(vec, &self_loop_pdfs);
220  SortAndUniq(&self_loop_pdfs);
221 
222  if (forward_pdfs.size() == 1 || self_loop_pdfs.size() == 1) {
223  for (size_t m = 0; m < forward_pdfs.size(); m++)
224  for (size_t n = 0; n < self_loop_pdfs.size(); n++)
225  pairs->insert(std::make_pair(forward_pdfs[m], self_loop_pdfs[n]));
226  } else {
227  // Choose 'position' as a phone position in 'context' that's currently
228  // -1, and that is as close as possible to the central position P.
229  int32 position = 0;
230  int32 min_dist = N_ - 1;
231  for (int32 i = 0; i < N_; i++) {
232  int32 dist = (P_ - i > 0) ? (P_ - i) : (i - P_);
233  if (phone_window[i] == -1 && dist < min_dist) {
234  position = i;
235  min_dist = dist;
236  }
237  }
238  KALDI_ASSERT(min_dist < N_);
239  KALDI_ASSERT(position != P_);
240 
241  // The next two lines have to do with how BOS/EOS effects are handled in
242  // phone context. Zero phone value in a non-central position (i.e. not
243  // position P_... and 'position' will never equal P_) means 'there is no
244  // phone here because we're at BOS or EOS'.
245  new_phone_window[position] = 0;
246  EnumeratePairs(phones, self_loop_pdf_class, forward_pdf_class,
247  new_phone_window, pairs);
248 
249  for (size_t i = 0 ; i < phones.size(); i++) {
250  new_phone_window[position] = phones[i];
251  EnumeratePairs(phones, self_loop_pdf_class, forward_pdf_class,
252  new_phone_window, pairs);
253  }
254  }
255 }
256 
258  const std::vector<int32> &phones,
259  const std::vector<std::vector<std::pair<int32, int32> > > &pdf_class_pairs,
260  std::vector<std::vector<std::vector<std::pair<int32, int32> > > > *pdf_info) const {
261 
262  KALDI_ASSERT(pdf_info != NULL);
263  pdf_info->resize(1 + *std::max_element(phones.begin(), phones.end()));
264  std::vector<int32> phone_window(N_, -1);
265  EventType vec;
266  for (size_t i = 0 ; i < phones.size(); i++) {
267  // loop over phones
268  int32 phone = phones[i];
269  (*pdf_info)[phone].resize(pdf_class_pairs[phone].size());
270  for (size_t j = 0; j < pdf_class_pairs[phone].size(); j++) {
271  // loop over pdf_class pairs
272  int32 pdf_class = pdf_class_pairs[phone][j].first,
273  self_loop_pdf_class = pdf_class_pairs[phone][j].second;
274  phone_window[P_] = phone;
275 
276  unordered_set<std::pair<int32, int32>, PairHasher<int32> > pairs;
277  EnumeratePairs(phones, self_loop_pdf_class, pdf_class, phone_window, &pairs);
278  unordered_set<std::pair<int32, int32>, PairHasher<int32> >::iterator iter = pairs.begin(),
279  end = pairs.end();
280  for (; iter != end; ++iter)
281  (*pdf_info)[phone][j].push_back(*iter);
282  std::sort( ((*pdf_info)[phone][j]).begin(), ((*pdf_info)[phone][j]).end());
283  }
284  }
285 }
286 
288  const std::vector<int32> &phones,
289  const std::vector<int32> &num_pdf_classes, // indexed by phone,
290  std::vector<std::vector<std::pair<int32, int32> > > *pdf_info) const {
291 
292  EventType vec;
293  KALDI_ASSERT(pdf_info != NULL);
294  pdf_info->resize(NumPdfs());
295  for (size_t i = 0 ; i < phones.size(); i++) {
296  int32 phone = phones[i];
297  vec.clear();
298  vec.push_back(std::make_pair(static_cast<EventKeyType>(P_),
299  static_cast<EventValueType>(phone)));
300  // Now get length.
301  KALDI_ASSERT(static_cast<size_t>(phone) < num_pdf_classes.size());
302  EventAnswerType len = num_pdf_classes[phone];
303 
304  for (int32 pos = 0; pos < len; pos++) {
305  vec.resize(2);
306  vec[0] = std::make_pair(static_cast<EventKeyType>(P_),
307  static_cast<EventValueType>(phone));
308  vec[1] = std::make_pair(kPdfClass, static_cast<EventValueType>(pos));
309  std::sort(vec.begin(), vec.end());
310  std::vector<EventAnswerType> pdfs; // pdfs that can be at this pos as this phone.
311  to_pdf_->MultiMap(vec, &pdfs);
312  SortAndUniq(&pdfs);
313  if (pdfs.empty()) {
314  KALDI_WARN << "ContextDependency::GetPdfInfo, no pdfs returned for position "<< pos << " of phone " << phone << ". Continuing but this is a serious error.";
315  }
316  for (size_t j = 0; j < pdfs.size(); j++) {
317  KALDI_ASSERT(static_cast<size_t>(pdfs[j]) < pdf_info->size());
318  (*pdf_info)[pdfs[j]].push_back(std::make_pair(phone, pos));
319  }
320  }
321  }
322  for (size_t i = 0; i < pdf_info->size(); i++) {
323  std::sort( ((*pdf_info)[i]).begin(), ((*pdf_info)[i]).end());
324  KALDI_ASSERT(IsSortedAndUniq( ((*pdf_info)[i]))); // should have no dups.
325  }
326 }
327 
328 
329 
331 MonophoneContextDependency(const std::vector<int32> &phones,
332  const std::vector<int32> &phone2num_pdf_classes) {
333  std::vector<std::vector<int32> > phone_sets(phones.size());
334  for (size_t i = 0; i < phones.size(); i++) phone_sets[i].push_back(phones[i]);
335  std::vector<bool> share_roots(phones.size(), false); // don't share roots.
336  // N is context size, P = position of central phone (must be 0).
337  int32 num_leaves = 0, P = 0, N = 1;
338  EventMap *pdf_map = GetStubMap(P, phone_sets, phone2num_pdf_classes, share_roots, &num_leaves);
339  return new ContextDependency(N, P, pdf_map);
340 }
341 
343 MonophoneContextDependencyShared(const std::vector<std::vector<int32> > &phone_sets,
344  const std::vector<int32> &phone2num_pdf_classes) {
345  std::vector<bool> share_roots(phone_sets.size(), false); // don't share roots.
346  // N is context size, P = position of central phone (must be 0).
347  int32 num_leaves = 0, P = 0, N = 1;
348  EventMap *pdf_map = GetStubMap(P, phone_sets, phone2num_pdf_classes, share_roots, &num_leaves);
349  return new ContextDependency(N, P, pdf_map);
350 }
351 
352 
353 
354 
355 
356 } // end namespace kaldi.
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual void GetPdfInfo(const std::vector< int32 > &phones, const std::vector< int32 > &num_pdf_classes, std::vector< std::vector< std::pair< int32, int32 > > > *pdf_info) const
GetPdfInfo returns a vector indexed by pdf-id, saying for each pdf which pairs of (phone...
Definition: context-dep.cc:287
ContextDependency * GenRandContextDependencyLarge(const std::vector< int32 > &phone_ids, int N, int P, bool ensure_all_covered, std::vector< int32 > *hmm_lengths)
GenRandContextDependencyLarge is like GenRandContextDependency but generates a larger tree with speci...
Definition: context-dep.cc:97
This class defines, for each EventKeyType, a set of initial questions that it tries and also a number...
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
Definition: kaldi-math.h:151
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
virtual bool Compute(const std::vector< int32 > &phoneseq, int32 pdf_class, int32 *pdf_id) const
returns success or failure; outputs pdf to pdf_id For positions that were outside the sequence (due t...
Definition: context-dep.cc:26
ContextDependency * MonophoneContextDependency(const std::vector< int32 > &phones, const std::vector< int32 > &phone2num_pdf_classes)
Definition: context-dep.cc:331
ContextDependency * MonophoneContextDependencyShared(const std::vector< std::vector< int32 > > &phone_sets, const std::vector< int32 > &phone2num_pdf_classes)
Definition: context-dep.cc:343
kaldi::int32 int32
void ReadToken(std::istream &is, bool binary, std::string *str)
ReadToken gets the next token and puts it in str (exception on failure).
Definition: io-funcs.cc:154
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq&#39;s (removes duplicates) from a vector.
Definition: stl-utils.h:39
void GenRandStats(int32 dim, int32 num_stats, int32 N, int32 P, const std::vector< int32 > &phone_ids, const std::vector< int32 > &phone2hmm_length, const std::vector< bool > &is_ctx_dep, bool ensure_all_phones_covered, BuildTreeStatsType *stats_out)
GenRandStats generates random statistics of the form used by BuildTree.
Definition: build-tree.cc:30
virtual bool Map(const EventType &event, EventAnswerType *ans) const =0
void EnumeratePairs(const std::vector< int32 > &phones, int32 self_loop_pdf_class, int32 forward_pdf_class, const std::vector< int32 > &context, unordered_set< std::pair< int32, int32 >, PairHasher< int32 > > *pairs) const
Definition: context-dep.cc:181
static EventMap * Read(std::istream &is, bool binary)
a Read function that reads an arbitrary EventMap; also works for NULL pointers.
Definition: event-map.cc:36
void DeleteBuildTreeStats(BuildTreeStatsType *stats)
This frees the Clusterable* pointers in "stats", where non-NULL, and sets them to NULL...
static const EventKeyType kPdfClass
Definition: context-dep.h:39
virtual int32 NumPdfs() const
NumPdfs() returns the number of acoustic pdfs (they are numbered 0.. NumPdfs()-1).
Definition: context-dep.h:71
std::vector< std::pair< EventKeyType, EventValueType > > EventType
Definition: event-map.h:58
void Write(std::ostream &os, bool binary) const
Definition: context-dep.cc:145
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
void InitRand(const BuildTreeStatsType &stats, int32 num_quest, int32 num_iters_refine, AllKeysType all_keys_type)
InitRand attempts to generate "reasonable" random questions.
struct rnnlm::@11::@12 n
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
EventMap * GetStubMap(int32 P, const std::vector< std::vector< int32 > > &phone_sets, const std::vector< int32 > &phone2num_pdf_classes, const std::vector< bool > &share_roots, int32 *num_leaves_out)
GetStubMap is used in tree-building functions to get the initial to-states map, before the decision-t...
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
A class that is capable of representing a generic mapping from EventType (which is a vector of (key...
Definition: event-map.h:86
void Read(std::istream &is, bool binary)
Read context-dependency object from disk; throws on error.
Definition: context-dep.cc:155
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
EventMap * BuildTree(Questions &qopts, const std::vector< std::vector< int32 > > &phone_sets, const std::vector< int32 > &phone2num_pdf_classes, const std::vector< bool > &share_roots, const std::vector< bool > &do_split, const BuildTreeStatsType &stats, BaseFloat thresh, int32 max_leaves, BaseFloat cluster_thresh, int32 P, bool round_num_leaves)
BuildTree is the normal way to build a set of decision trees.
Definition: build-tree.cc:136
#define KALDI_VLOG(v)
Definition: kaldi-error.h:156
ContextDependency * GenRandContextDependency(const std::vector< int32 > &phone_ids, bool ensure_all_covered, std::vector< int32 > *hmm_lengths)
GenRandContextDependency is mainly of use for debugging.
Definition: context-dep.cc:46
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
int32 EventAnswerType
As far as the event-map code itself is concerned, things of type EventAnswerType may take any value e...
Definition: event-map.h:56
std::vector< std::pair< EventType, Clusterable * > > BuildTreeStatsType
virtual void MultiMap(const EventType &event, std::vector< EventAnswerType > *ans) const =0
bool IsSortedAndUniq(const std::vector< T > &vec)
Returns true if the vector is sorted and contains each element only once.
Definition: stl-utils.h:63
virtual void Write(std::ostream &os, bool binary)=0
Write to stream.
#define KALDI_COMPILE_TIME_ASSERT(b)
Definition: kaldi-utils.h:131
A hashing function-object for pairs of ints.
Definition: stl-utils.h:235