build-tree.h
Go to the documentation of this file.
1 // tree/build-tree.h
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #ifndef KALDI_TREE_BUILD_TREE_H_
21 #define KALDI_TREE_BUILD_TREE_H_
22 
23 // The file build-tree.h contains outer-level routines used in tree-building
24 // and related tasks, that are directly called by the command-line tools.
25 
26 #include "tree/build-tree-utils.h"
27 #include "tree/context-dep.h"
28 namespace kaldi {
29 
34 
35 // Note, in tree_group_top we also include AccumulateTreeStats, in
36 // ../hmm/tree-accu.h (it has some extra dependencies so we didn't
37 // want to include it here).
38 
87 EventMap *BuildTree(Questions &qopts,
88  const std::vector<std::vector<int32> > &phone_sets,
89  const std::vector<int32> &phone2num_pdf_classes,
90  const std::vector<bool> &share_roots,
91  const std::vector<bool> &do_split,
92  const BuildTreeStatsType &stats,
93  BaseFloat thresh,
94  int32 max_leaves,
95  BaseFloat cluster_thresh, // typically == thresh. If negative, use smallest split.
96  int32 P,
97  bool round_num_leaves = true);
98 
99 
151 EventMap *BuildTreeTwoLevel(Questions &qopts,
152  const std::vector<std::vector<int32> > &phone_sets,
153  const std::vector<int32> &phone2num_pdf_classes,
154  const std::vector<bool> &share_roots,
155  const std::vector<bool> &do_split,
156  const BuildTreeStatsType &stats,
157  int32 max_leaves_first,
158  int32 max_leaves_second,
159  bool cluster_leaves,
160  int32 P,
161  std::vector<int32> *leaf_map);
162 
163 
177 
189 
190 void GenRandStats(int32 dim, int32 num_stats, int32 N, int32 P,
191  const std::vector<int32> &phone_ids,
192  const std::vector<int32> &hmm_lengths,
193  const std::vector<bool> &is_ctx_dep,
194  bool ensure_all_phones_covered,
195  BuildTreeStatsType *stats_out);
196 
197 
201 void ReadSymbolTableAsIntegers(std::string filename,
202  bool include_eps,
203  std::vector<int32> *syms);
204 
205 
206 
226  const std::vector<std::vector<int32> > &phone_sets_in,
227  const std::vector<int32> &all_pdf_classes_in,
228  int32 P,
229  std::vector<std::vector<int32> > *questions_out);
230 
234 
236  const std::vector<std::vector<int32> > &phone_sets_in,
237  const std::vector<int32> &all_pdf_classes_in,
238  int32 P,
239  int32 num_classes,
240  std::vector<std::vector<int32> > *sets_out);
241 
246 void ReadRootsFile(std::istream &is,
247  std::vector<std::vector<int32> > *phone_sets,
248  std::vector<bool> *is_shared_root,
249  std::vector<bool> *is_split_root);
250 
251 
253 
254 }// end namespace kaldi
255 
256 #endif
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
EventMap * BuildTreeTwoLevel(Questions &qopts, const std::vector< std::vector< int32 > > &phone_sets, const std::vector< int32 > &phone2num_pdf_classes, const std::vector< bool > &share_roots, const std::vector< bool > &do_split, const BuildTreeStatsType &stats, int32 max_leaves_first, int32 max_leaves_second, bool cluster_leaves, int32 P, std::vector< int32 > *leaf_map)
BuildTreeTwoLevel builds a two-level tree, useful for example in building tied mixture systems with m...
Definition: build-tree.cc:387
void AutomaticallyObtainQuestions(BuildTreeStatsType &stats, const std::vector< std::vector< int32 > > &phone_sets_in, const std::vector< int32 > &all_pdf_classes_in, int32 P, std::vector< std::vector< int32 > > *questions_out)
Outputs sets of phones that are reasonable for questions to ask in the tree-building algorithm...
Definition: build-tree.cc:615
kaldi::int32 int32
void GenRandStats(int32 dim, int32 num_stats, int32 N, int32 P, const std::vector< int32 > &phone_ids, const std::vector< int32 > &phone2hmm_length, const std::vector< bool > &is_ctx_dep, bool ensure_all_phones_covered, BuildTreeStatsType *stats_out)
GenRandStats generates random statistics of the form used by BuildTree.
Definition: build-tree.cc:30
float BaseFloat
Definition: kaldi-types.h:29
void ReadSymbolTableAsIntegers(std::string filename, bool include_eps, std::vector< int32 > *syms)
included here because it&#39;s used in some tree-building calling code.
Definition: build-tree.cc:502
EventMap * BuildTree(Questions &qopts, const std::vector< std::vector< int32 > > &phone_sets, const std::vector< int32 > &phone2num_pdf_classes, const std::vector< bool > &share_roots, const std::vector< bool > &do_split, const BuildTreeStatsType &stats, BaseFloat thresh, int32 max_leaves, BaseFloat cluster_thresh, int32 P, bool round_num_leaves)
BuildTree is the normal way to build a set of decision trees.
Definition: build-tree.cc:136
void ReadRootsFile(std::istream &is, std::vector< std::vector< int32 > > *phone_sets, std::vector< bool > *is_shared_root, std::vector< bool > *is_split_root)
Reads the roots file (throws on error).
Definition: build-tree.cc:857
std::vector< std::pair< EventType, Clusterable * > > BuildTreeStatsType
void KMeansClusterPhones(BuildTreeStatsType &stats, const std::vector< std::vector< int32 > > &phone_sets_in, const std::vector< int32 > &all_pdf_classes_in, int32 P, int32 num_classes, std::vector< std::vector< int32 > > *sets_out)
This function clusters the phones (or some initially specified sets of phones) into sets of phones...
Definition: build-tree.cc:748