event-map.cc
Go to the documentation of this file.
1 // tree/event-map.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 // 2013 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #include <set>
22 #include <string>
23 #include "tree/event-map.h"
24 
25 namespace kaldi {
26 
27 
28 void EventMap::Write(std::ostream &os, bool binary, EventMap *emap) {
29  if (emap == NULL) {
30  WriteToken(os, binary, "NULL");
31  } else {
32  emap->Write(os, binary);
33  }
34 }
35 
36 EventMap *EventMap::Read(std::istream &is, bool binary) {
37  char c = Peek(is, binary);
38  if (c == 'N') {
39  ExpectToken(is, binary, "NULL");
40  return NULL;
41  } else if (c == 'C') {
42  return ConstantEventMap::Read(is, binary);
43  } else if (c == 'T') {
44  return TableEventMap::Read(is, binary);
45  } else if (c == 'S') {
46  return SplitEventMap::Read(is, binary);
47  } else {
48  KALDI_ERR << "EventMap::read, was not expecting character " << CharToString(c)
49  << ", at file position " << is.tellg();
50  return NULL; // suppress warning.
51  }
52 }
53 
54 
55 void ConstantEventMap::Write(std::ostream &os, bool binary) {
56  WriteToken(os, binary, "CE");
57  WriteBasicType(os, binary, answer_);
58  if (os.fail()) {
59  KALDI_ERR << "ConstantEventMap::Write(), could not write to stream.";
60  }
61 }
62 
63 // static member function.
64 ConstantEventMap* ConstantEventMap::Read(std::istream &is, bool binary) {
65  ExpectToken(is, binary, "CE");
66  EventAnswerType answer;
67  ReadBasicType(is, binary, &answer);
68  return new ConstantEventMap(answer);
69 }
70 
72  std::vector<EventMap*> table;
73  table.reserve(table_.size());
74  EventValueType size = table_.size();
75  for (EventKeyType value = 0; value < size; value++) {
76  if (table_[value] != NULL) {
77  EventMap *pruned_map = table_[value]->Prune();
78  if (pruned_map != NULL) {
79  table.resize(value + 1, NULL);
80  table[value] = pruned_map;
81  }
82  }
83  }
84  if (table.empty()) return NULL;
85  else return new TableEventMap(key_, table);
86 }
87 
89  const unordered_set<EventKeyType> &keys_to_map,
90  const unordered_map<EventValueType,EventValueType> &value_map) const {
91  std::vector<EventMap*> table;
92  table.reserve(table_.size());
93  EventValueType size = table_.size();
94  for (EventValueType value = 0; value < size; value++) {
95  if (table_[value] != NULL) {
96  EventMap *this_map = table_[value]->MapValues(keys_to_map, value_map);
97  EventValueType mapped_value;
98 
99  if (keys_to_map.count(key_) == 0) mapped_value = value;
100  else {
101  unordered_map<EventValueType,EventValueType>::const_iterator
102  iter = value_map.find(value);
103  if (iter == value_map.end()) {
104  KALDI_ERR << "Could not map value " << value
105  << " for key " << key_;
106  }
107  mapped_value = iter->second;
108  }
109  KALDI_ASSERT(mapped_value >= 0);
110  if (static_cast<EventValueType>(table.size()) <= mapped_value)
111  table.resize(mapped_value + 1, NULL);
112  if (table[mapped_value] != NULL)
113  KALDI_ERR << "Multiple values map to the same point: this code cannot "
114  << "handle this case.";
115  table[mapped_value] = this_map;
116  }
117  }
118  return new TableEventMap(key_, table);
119 }
120 
121 
122 void TableEventMap::Write(std::ostream &os, bool binary) {
123  WriteToken(os, binary, "TE");
124  WriteBasicType(os, binary, key_);
125  uint32 size = table_.size();
126  WriteBasicType(os, binary, size);
127  WriteToken(os, binary, "(");
128  for (size_t t = 0; t < size; t++) {
129  // This Write function works for NULL pointers.
130  EventMap::Write(os, binary, table_[t]);
131  }
132  WriteToken(os, binary, ")");
133  if (!binary) os << '\n';
134  if (os.fail()) {
135  KALDI_ERR << "TableEventMap::Write(), could not write to stream.";
136  }
137 }
138 
139 // static member function.
140 TableEventMap* TableEventMap::Read(std::istream &is, bool binary) {
141  ExpectToken(is, binary, "TE");
142  EventKeyType key;
143  ReadBasicType(is, binary, &key);
144  uint32 size;
145  ReadBasicType(is, binary, &size);
146  std::vector<EventMap*> table(size);
147  ExpectToken(is, binary, "(");
148  for (size_t t = 0; t < size; t++) {
149  // This Read function works for NULL pointers.
150  table[t] = EventMap::Read(is, binary);
151  }
152  ExpectToken(is, binary, ")");
153  return new TableEventMap(key, table);
154 }
155 
157  EventMap *yes = yes_->Prune(),
158  *no = no_->Prune();
159  if (yes == NULL && no == NULL) return NULL;
160  else if (yes == NULL) return no;
161  else if (no == NULL) return yes;
162  else return new SplitEventMap(key_, yes_set_, yes, no);
163 }
164 
166  const unordered_set<EventKeyType> &keys_to_map,
167  const unordered_map<EventValueType,EventValueType> &value_map) const {
168  EventMap *yes = yes_->MapValues(keys_to_map, value_map),
169  *no = no_->MapValues(keys_to_map, value_map);
170 
171  if (keys_to_map.count(key_) == 0) {
172  return new SplitEventMap(key_, yes_set_, yes, no);
173  } else {
174  std::vector<EventValueType> yes_set;
175  for (ConstIntegerSet<EventValueType>::iterator iter = yes_set_.begin();
176  iter != yes_set_.end();
177  ++iter) {
178  EventValueType value = *iter;
179  unordered_map<EventValueType, EventValueType>::const_iterator
180  map_iter = value_map.find(value);
181  if (map_iter == value_map.end())
182  KALDI_ERR << "Value " << value << ", for key "
183  << key_ << ", cannot be mapped.";
184  EventValueType mapped_value = map_iter->second;
185  yes_set.push_back(mapped_value);
186  }
187  SortAndUniq(&yes_set);
188  return new SplitEventMap(key_, yes_set, yes, no);
189  }
190 }
191 
192 void SplitEventMap::Write(std::ostream &os, bool binary) {
193  WriteToken(os, binary, "SE");
194  WriteBasicType(os, binary, key_);
195  // WriteIntegerVector(os, binary, yes_set_);
196  yes_set_.Write(os, binary);
197  KALDI_ASSERT(yes_ != NULL && no_ != NULL);
198  WriteToken(os, binary, "{");
199  yes_->Write(os, binary);
200  no_->Write(os, binary);
201  WriteToken(os, binary, "}");
202  if (!binary) os << '\n';
203  if (os.fail()) {
204  KALDI_ERR << "SplitEventMap::Write(), could not write to stream.";
205  }
206 }
207 
208 // static member function.
209 SplitEventMap* SplitEventMap::Read(std::istream &is, bool binary) {
210  ExpectToken(is, binary, "SE");
211  EventKeyType key;
212  ReadBasicType(is, binary, &key);
213  // std::vector<EventValueType> yes_set;
214  // ReadIntegerVector(is, binary, &yes_set);
216  yes_set.Read(is, binary);
217  ExpectToken(is, binary, "{");
218  EventMap *yes = EventMap::Read(is, binary);
219  EventMap *no = EventMap::Read(is, binary);
220  ExpectToken(is, binary, "}");
221  // yes and no should be non-NULL because NULL values are not valid for SplitEventMap;
222  // the constructor checks this. Therefore this is an unlikely error.
223  if (yes == NULL || no == NULL) KALDI_ERR << "SplitEventMap::Read, NULL pointers.";
224  return new SplitEventMap(key, yes_set, yes, no);
225 }
226 
227 
228 void WriteEventType(std::ostream &os, bool binary, const EventType &evec) {
229  WriteToken(os, binary, "EV");
230  uint32 size = evec.size();
231  WriteBasicType(os, binary, size);
232  for (size_t i = 0; i < size; i++) {
233  WriteBasicType(os, binary, evec[i].first);
234  WriteBasicType(os, binary, evec[i].second);
235  }
236  if (!binary) os << '\n';
237 }
238 
239 void ReadEventType(std::istream &is, bool binary, EventType *evec) {
240  KALDI_ASSERT(evec != NULL);
241  ExpectToken(is, binary, "EV");
242  uint32 size;
243  ReadBasicType(is, binary, &size);
244  evec->resize(size);
245  for (size_t i = 0; i < size; i++) {
246  ReadBasicType(is, binary, &( (*evec)[i].first ));
247  ReadBasicType(is, binary, &( (*evec)[i].second ));
248  }
249 }
250 
251 
252 
253 std::string EventTypeToString(const EventType &evec) {
254  std::stringstream ss;
255  EventType::const_iterator iter = evec.begin(), end = evec.end();
256  std::string sep = "";
257  for (; iter != end; ++iter) {
258  ss << sep << iter->first <<":"<<iter->second;
259  sep = " ";
260  }
261  return ss.str();
262 }
263 
265  EventType::const_iterator iter = vec.begin(), end = vec.end();
266  size_t ans = 0;
267  const size_t kPrime1=47087, kPrime2=1321;
268  for (; iter != end; ++iter) {
269 #ifdef KALDI_PARANOID // Check names are distinct and increasing.
270  EventType::const_iterator iter2=iter; iter2++;
271  if (iter2 != end) { KALDI_ASSERT(iter->first < iter2->first); }
272 #endif
273  ans += iter->first + kPrime1*iter->second;
274  ans *= kPrime2;
275  }
276  return ans;
277 }
278 
279 
280 // static member of EventMap.
281 void EventMap::Check(const std::vector<std::pair<EventKeyType, EventValueType> > &event) {
282  // will crash if not sorted or has duplicates
283  size_t sz = event.size();
284  for (size_t i = 0;i+1 < sz;i++)
285  KALDI_ASSERT(event[i].first < event[i+1].first);
286 }
287 
288 
289 // static member of EventMap.
290 bool EventMap::Lookup(const EventType &event,
291  EventKeyType key, EventValueType *ans) {
292  // this assumes that the "event" array is sorted (e.g. on the KeyType value;
293  // just doing std::sort will do this) and has no duplicate values with the same
294  // key. call Check() to verify this.
295 #ifdef KALDI_PARANOID
296  Check(event);
297 #endif
298  std::vector<std::pair<EventKeyType, EventValueType> >::const_iterator
299  begin = event.begin(),
300  end = event.end(),
301  middle; // "middle" is used as a temporary variable in the algorithm.
302  // begin and sz store the current region where the first instance of
303  // "value" might appear.
304  // This is like this stl algorithm "lower_bound".
305  size_t sz = end-begin, half;
306  while (sz > 0) {
307  half = sz >> 1;
308  middle = begin + half; // "end" here is now reallly the middle.
309  if (middle->first < key) {
310  begin = middle;
311  ++begin;
312  sz = sz - half - 1;
313  } else {
314  sz = half;
315  }
316  }
317  if (begin != end && begin->first == key) {
318  *ans = begin->second;
319  return true;
320  } else {
321  return false;
322  }
323 }
324 
325 TableEventMap::TableEventMap(EventKeyType key, const std::map<EventValueType, EventMap*> &map_in): key_(key) {
326  if (map_in.size() == 0)
327  return; // empty table.
328  else {
329  EventValueType highest_val = map_in.rbegin()->first;
330  table_.resize(highest_val+1, NULL);
331  std::map<EventValueType, EventMap*>::const_iterator iter = map_in.begin(), end = map_in.end();
332  for (; iter != end; ++iter) {
333  KALDI_ASSERT(iter->first >= 0 && iter->first <= highest_val);
334  table_[iter->first] = iter->second;
335  }
336  }
337 }
338 
339 TableEventMap::TableEventMap(EventKeyType key, const std::map<EventValueType, EventAnswerType> &map_in): key_(key) {
340  if (map_in.size() == 0)
341  return; // empty table.
342  else {
343  EventValueType highest_val = map_in.rbegin()->first;
344  table_.resize(highest_val+1, NULL);
345  std::map<EventValueType, EventAnswerType>::const_iterator iter = map_in.begin(), end = map_in.end();
346  for (; iter != end; ++iter) {
347  KALDI_ASSERT(iter->first >= 0 && iter->first <= highest_val);
348  table_[iter->first] = new ConstantEventMap(iter->second);
349  }
350  }
351 }
352 
353 // This function is only used inside this .cc file so make it static.
354 static bool IsLeafNode(const EventMap *e) {
355  std::vector<EventMap*> children;
356  e->GetChildren(&children);
357  return children.empty();
358 }
359 
360 
361 // This helper function called from GetTreeStructure outputs the tree structure
362 // of the EventMap in a more convenient form. At input, the objects pointed to
363 // by last three pointers should be empty. The function will return false if
364 // the EventMap "map" doesn't have the required structure (see the comments in
365 // the header for GetTreeStructure). If it returns true, then at output,
366 // "nonleaf_nodes" will be a vector of pointers to the EventMap* values
367 // corresponding to nonleaf nodes, in an order where the root node comes first
368 // and child nodes are after their parents; "nonleaf_parents" will be a map
369 // from each nonleaf node to its parent, and the root node points to itself;
370 // and "leaf_parents" will be a map from the numeric id of each leaf node
371 // (corresponding to the value returned by the EventMap) to its parent node;
372 // leaf_parents will contain no NULL pointers, otherwise we would have returned
373 // false as the EventMap would not have had the required structure.
374 
376  const EventMap &map,
377  std::vector<const EventMap*> *nonleaf_nodes,
378  std::map<const EventMap*, const EventMap*> *nonleaf_parents,
379  std::vector<const EventMap*> *leaf_parents) {
380 
381  std::vector<const EventMap*> queue; // parents to be processed.
382 
383  const EventMap *top_node = &map;
384 
385  queue.push_back(top_node);
386  nonleaf_nodes->push_back(top_node);
387  (*nonleaf_parents)[top_node] = top_node;
388 
389  while (!queue.empty()) {
390  const EventMap *parent = queue.back();
391  queue.pop_back();
392  std::vector<EventMap*> children;
393  parent->GetChildren(&children);
394  KALDI_ASSERT(!children.empty());
395  for (size_t i = 0; i < children.size(); i++) {
396  EventMap *child = children[i];
397  if (IsLeafNode(child)) {
398  int32 leaf;
399  if (!child->Map(EventType(), &leaf)
400  || leaf < 0) return false;
401  if (static_cast<int32>(leaf_parents->size()) <= leaf)
402  leaf_parents->resize(leaf+1, NULL);
403  if ((*leaf_parents)[leaf] != NULL) {
404  KALDI_WARN << "Repeated leaf! Did you suppress leaf clustering when building the tree?";
405  return false; // repeated leaf.
406  }
407  (*leaf_parents)[leaf] = parent;
408  } else {
409  nonleaf_nodes->push_back(child);
410  (*nonleaf_parents)[child] = parent;
411  queue.push_back(child);
412  }
413  }
414  }
415 
416  for (size_t i = 0; i < leaf_parents->size(); i++)
417  if ((*leaf_parents)[i] == NULL) {
418  KALDI_WARN << "non-consecutively numbered leaves";
419  return false;
420  }
421  // non-consecutively numbered leaves.
422 
423  KALDI_ASSERT(!leaf_parents->empty()); // or no leaves.
424 
425  return true;
426 }
427 
428 // See the header for a description of what this function does.
429 bool GetTreeStructure(const EventMap &map,
430  int32 *num_leaves,
431  std::vector<int32> *parents) {
432  KALDI_ASSERT (num_leaves != NULL && parents != NULL);
433 
434  if (IsLeafNode(&map)) { // handle degenerate case where root is a leaf.
435  int32 leaf;
436  if (!map.Map(EventType(), &leaf)
437  || leaf != 0) return false;
438  *num_leaves = 1;
439  parents->resize(1);
440  (*parents)[0] = 0;
441  return true;
442  }
443 
444 
445  // This vector gives the address of nonleaf nodes in the tree,
446  // in a numbering where 0 is the root and children always come
447  // after parents.
448  std::vector<const EventMap*> nonleaf_nodes;
449 
450  // Map from each nonleaf node to its parent node
451  // (or to itself for the root node).
452  std::map<const EventMap*, const EventMap*> nonleaf_parents;
453 
454  // Map from leaf nodes to their parent nodes.
455  std::vector<const EventMap*> leaf_parents;
456 
457  if (!GetTreeStructureInternal(map, &nonleaf_nodes,
458  &nonleaf_parents,
459  &leaf_parents)) return false;
460 
461  *num_leaves = leaf_parents.size();
462  int32 num_nodes = leaf_parents.size() + nonleaf_nodes.size();
463 
464  std::map<const EventMap*, int32> nonleaf_indices;
465 
466  // number the nonleaf indices so they come after the leaf
467  // indices and the root is last.
468  for (size_t i = 0; i < nonleaf_nodes.size(); i++)
469  nonleaf_indices[nonleaf_nodes[i]] = num_nodes - i - 1;
470 
471  parents->resize(num_nodes);
472  for (size_t i = 0; i < leaf_parents.size(); i++) {
473  KALDI_ASSERT(nonleaf_indices.count(leaf_parents[i]) != 0);
474  (*parents)[i] = nonleaf_indices[leaf_parents[i]];
475  }
476  for (size_t i = 0; i < nonleaf_nodes.size(); i++) {
477  KALDI_ASSERT(nonleaf_indices.count(nonleaf_nodes[i]) != 0);
478  KALDI_ASSERT(nonleaf_parents.count(nonleaf_nodes[i]) != 0);
479  KALDI_ASSERT(nonleaf_indices.count(nonleaf_parents[nonleaf_nodes[i]]) != 0);
480  int32 index = nonleaf_indices[nonleaf_nodes[i]],
481  parent_index = nonleaf_indices[nonleaf_parents[nonleaf_nodes[i]]];
482  KALDI_ASSERT(index > 0 && parent_index >= index);
483  (*parents)[index] = parent_index;
484  }
485  for (int32 i = 0; i < num_nodes; i++)
486  KALDI_ASSERT ((*parents)[i] > i || (i+1==num_nodes && (*parents)[i] == i));
487  return true;
488 }
489 
490 
491 
492 } // end namespace kaldi
bool GetTreeStructure(const EventMap &map, int32 *num_leaves, std::vector< int32 > *parents)
This function gets the tree structure of the EventMap "map" in a convenient form. ...
Definition: event-map.cc:429
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual EventMap * MapValues(const unordered_set< EventKeyType > &keys_to_map, const unordered_map< EventValueType, EventValueType > &value_map) const
Definition: event-map.cc:165
virtual void Write(std::ostream &os, bool binary)
Write to stream.
Definition: event-map.cc:122
void ReadBasicType(std::istream &is, bool binary, T *t)
ReadBasicType is the name of the read function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:55
static TableEventMap * Read(std::istream &is, bool binary)
Definition: event-map.cc:140
std::string EventTypeToString(const EventType &evec)
Definition: event-map.cc:253
kaldi::int32 int32
void SortAndUniq(std::vector< T > *vec)
Sorts and uniq&#39;s (removes duplicates) from a vector.
Definition: stl-utils.h:39
EventKeyType key_
Definition: event-map.h:261
int Peek(std::istream &is, bool binary)
Peek consumes whitespace (if binary == false) and then returns the peek() value of the stream...
Definition: io-funcs.cc:145
virtual bool Map(const EventType &event, EventAnswerType *ans) const =0
static EventMap * Read(std::istream &is, bool binary)
a Read function that reads an arbitrary EventMap; also works for NULL pointers.
Definition: event-map.cc:36
std::vector< EventMap * > table_
Definition: event-map.h:262
void WriteEventType(std::ostream &os, bool binary, const EventType &evec)
Definition: event-map.cc:228
virtual void Write(std::ostream &os, bool binary)
Write to stream.
Definition: event-map.cc:55
static void Check(const EventType &event)
Definition: event-map.cc:281
std::vector< std::pair< EventKeyType, EventValueType > > EventType
Definition: event-map.h:58
void ExpectToken(std::istream &is, bool binary, const char *token)
ExpectToken tries to read in the given token, and throws an exception on failure. ...
Definition: io-funcs.cc:191
size_t operator()(const EventType &vec)
Definition: event-map.cc:264
virtual EventMap * MapValues(const unordered_set< EventKeyType > &keys_to_map, const unordered_map< EventValueType, EventValueType > &value_map) const
Definition: event-map.cc:88
int32 EventKeyType
Things of type EventKeyType can take any value.
Definition: event-map.h:45
void ReadEventType(std::istream &is, bool binary, EventType *evec)
Definition: event-map.cc:239
#define KALDI_ERR
Definition: kaldi-error.h:147
virtual EventMap * MapValues(const unordered_set< EventKeyType > &keys_to_map, const unordered_map< EventValueType, EventValueType > &value_map) const =0
#define KALDI_WARN
Definition: kaldi-error.h:150
void WriteToken(std::ostream &os, bool binary, const char *token)
The WriteToken functions are for writing nonempty sequences of non-space characters.
Definition: io-funcs.cc:134
static bool GetTreeStructureInternal(const EventMap &map, std::vector< const EventMap *> *nonleaf_nodes, std::map< const EventMap *, const EventMap *> *nonleaf_parents, std::vector< const EventMap *> *leaf_parents)
Definition: event-map.cc:375
std::string CharToString(const char &c)
Definition: kaldi-utils.cc:36
void Read(std::istream &is, bool binary)
virtual void Write(std::ostream &os, bool binary)
Write to stream.
Definition: event-map.cc:192
static bool Lookup(const EventType &event, EventKeyType key, EventValueType *ans)
Definition: event-map.cc:290
A class that is capable of representing a generic mapping from EventType (which is a vector of (key...
Definition: event-map.h:86
static bool IsLeafNode(const EventMap *e)
Definition: event-map.cc:354
static SplitEventMap * Read(std::istream &is, bool binary)
Definition: event-map.cc:209
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
iterator begin() const
virtual void GetChildren(std::vector< EventMap *> *out) const =0
TableEventMap(EventKeyType key, const std::vector< EventMap *> &table)
Takes ownership of pointers.
Definition: event-map.h:243
void WriteBasicType(std::ostream &os, bool binary, T t)
WriteBasicType is the name of the write function for bool, integer types, and floating-point types...
Definition: io-funcs-inl.h:34
virtual EventMap * Prune() const
Definition: event-map.cc:71
static ConstantEventMap * Read(std::istream &is, bool binary)
Definition: event-map.cc:64
int32 EventAnswerType
As far as the event-map code itself is concerned, things of type EventAnswerType may take any value e...
Definition: event-map.h:56
int32 EventValueType
Given current code, things of type EventValueType should generally be nonnegative and in a reasonably...
Definition: event-map.h:51
virtual EventMap * Prune() const =0
virtual EventMap * Prune() const
Definition: event-map.cc:156
virtual void Write(std::ostream &os, bool binary)=0
Write to stream.