regression-tree-test.cc
Go to the documentation of this file.
1 // transform/regression-tree-test.cc
2 
3 // Copyright 2009-2011 Jan Silovsky; Saarland University
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
21 #include "util/common-utils.h"
22 
23 using namespace kaldi;
24 
25 void
26 test_io(const RegressionTree &regtree,
27  const AmDiagGmm &acmodel,
28  bool binary) {
29  std::cout << "Testing I/O, binary = " << binary << '\n';
30 
31  regtree.Write(Output("tmp_regtree", binary).Stream(),
32  binary);
33 
34  bool binary_in;
35  RegressionTree regtree2;
36 
37  Input ki("tmp_regtree", &binary_in);
38  regtree2.Read(ki.Stream(),
39  binary_in, acmodel);
40 
41  std::ostringstream s1, s2;
42  regtree.Write(s1, false);
43  regtree2.Write(s2, false);
44  KALDI_ASSERT(s1.str() == s2.str());
45 
46  unlink("tmp_regtree");
47 }
48 
49 // void
50 // join_gmm(const DiagGmm &gmm1, const DiagGmm &gmm2, DiagGmm *gmm) {
51 // KALDI_ASSERT(gmm1.Dimension() == gmm2.Dimension());
52 // size_t num_comp = gmm1.NumGauss() + gmm2.NumGauss();
53 // size_t dim = gmm1.Dimension();
54 //
55 // Matrix<BaseFloat> means1(gmm1.NumGauss());
56 // size_t num_comp
57 // }
58 
59 void
60 rand_diag_gmm(size_t num_comp, size_t dim, DiagGmm *gmm) {
61  Vector<BaseFloat> weights(num_comp);
62  Matrix<BaseFloat> means(num_comp, dim);
63  Matrix<BaseFloat> vars(num_comp, dim);
64 
65  BaseFloat tot_weight = 0.0;
66  for (size_t m = 0; m < num_comp; m++) {
67  weights(m) = kaldi::RandUniform();
68  for (size_t d= 0; d < dim; d++) {
69  means(m, d) = kaldi::RandGauss();
70  vars(m, d) = Exp(kaldi::RandGauss()) + 1e-5;
71  }
72  tot_weight += weights(m);
73  }
74  weights.Scale(1.0/tot_weight);
75 
76  vars.InvertElements();
77  gmm->SetWeights(weights);
78  gmm->SetInvVarsAndMeans(vars, means);
79  gmm->ComputeGconsts();
80 }
81 
82 void
84  // using namespace kaldi;
85 
86  // dimension of the gmm
87  // size_t dim = kaldi::RandInt(5, 20);
88  size_t dim = 2;
89 
90  // number of mixtures in the data
91  size_t num_comp = kaldi::RandInt(2, 2);;
92 
93  std::cout << "Running test with " << num_comp << " components and "
94  << dim << " dimensional vectors" << '\n';
95 
96  // generate random gmm
97  DiagGmm gmm1;
98  gmm1.Resize(num_comp, dim);
99  rand_diag_gmm(num_comp, dim, &gmm1);
100 
101  // shift means for components
102  Matrix<BaseFloat> means2(num_comp, dim);
103  Vector<BaseFloat> tmp_vec(dim);
104  gmm1.GetMeans(&means2);
105  for (int32 c = 0; c < static_cast<int32>(num_comp); c++) {
106  // tmp_vec.SetRandn();
107  // tmp_vec.Scale(0.01);
108  tmp_vec.Set(0.001 * means2.Row(c).Max());
109  means2.Row(c).AddVec(1.0, tmp_vec);
110  }
111 
112  // let's have another gmm with shifted means
113  DiagGmm gmm2;
114  gmm2.CopyFromDiagGmm(gmm1);
115  gmm2.SetMeans(means2);
116 
117  AmDiagGmm acmodel;
118  acmodel.AddPdf(gmm1);
119  acmodel.AddPdf(gmm2);
120 
121  // let's have uniform occupancies
122  size_t num_pdfs = 2;
123  Vector<BaseFloat> occs(num_pdfs);
124  for (int32 i = 0; i < static_cast<int32>(num_pdfs); i++) {
125  occs(i) = 1.0/static_cast<BaseFloat>(num_pdfs*num_comp);
126  }
127 
128  for (int32 i = 0; i < gmm1.NumGauss(); i++) {
129  gmm1.GetComponentMean(i, &tmp_vec);
130  tmp_vec.Write(std::cout, false);
131  gmm2.GetComponentMean(i, &tmp_vec);
132  tmp_vec.Write(std::cout, false);
133  }
134 
135  RegressionTree regtree;
136  std::vector<int32> sil_pdfs;
137  if (Rand() % 2 == 0)
138  sil_pdfs.push_back(Rand() % 2);
139  regtree.BuildTree(occs, sil_pdfs, acmodel, 2);
140 
141  // test I/O
142  test_io(regtree, acmodel, false);
143  // test_io(regtree, acmodel, true);
144 }
145 
146 int
147 main() {
148  // repeat the test X times
149  for (int i = 0; i < 4; i++)
151  std::cout << "Test OK.\n";
152 }
void Read(std::istream &in, bool binary, const AmDiagGmm &am)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
double Exp(double x)
Definition: kaldi-math.h:83
void AddPdf(const DiagGmm &gmm)
Adds a GMM to the model, and increments the total number of PDFs.
Definition: am-diag-gmm.cc:57
void CopyFromDiagGmm(const DiagGmm &diaggmm)
Copies from given DiagGmm.
Definition: diag-gmm.cc:83
void SetInvVarsAndMeans(const MatrixBase< Real > &invvars, const MatrixBase< Real > &means)
Use SetInvVarsAndMeans if updating both means and (inverse) variances.
Definition: diag-gmm-inl.h:63
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1.
Definition: kaldi-math.h:151
void Write(std::ostream &Out, bool binary) const
Writes to C++ stream (option to write in binary).
void rand_diag_gmm(size_t num_comp, size_t dim, DiagGmm *gmm)
void GetComponentMean(int32 gauss, VectorBase< Real > *out) const
Accessor for single component mean.
Definition: diag-gmm-inl.h:135
void Resize(int32 nMix, int32 dim)
Resizes arrays to this dim. Does not initialize data.
Definition: diag-gmm.cc:66
int32 ComputeGconsts()
Sets the gconsts.
Definition: diag-gmm.cc:114
float RandGauss(struct RandomState *state=NULL)
Definition: kaldi-math.h:155
kaldi::int32 int32
void SetMeans(const MatrixBase< Real > &m)
Use SetMeans to update only the Gaussian means (and not variances)
Definition: diag-gmm-inl.h:43
void Write(std::ostream &out, bool binary) const
std::istream & Stream()
Definition: kaldi-io.cc:826
float BaseFloat
Definition: kaldi-types.h:29
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
Definition: kaldi-matrix.h:188
void BuildTree(const Vector< BaseFloat > &state_occs, const std::vector< int32 > &sil_indices, const AmDiagGmm &am, int32 max_clusters)
Top-down clustering of the Gaussians in a model based on their means.
A regression tree is a clustering of Gaussian densities in an acoustic model, such that the group of ...
void test_io(const RegressionTree &regtree, const AmDiagGmm &acmodel, bool binary)
void GetMeans(Matrix< Real > *m) const
Accessor for means.
Definition: diag-gmm-inl.h:123
int32 NumGauss() const
Returns the number of mixture components in the GMM.
Definition: diag-gmm.h:72
void Scale(Real alpha)
Multiplies all elements by this constant.
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
int main()
void InvertElements()
Inverts all the elements of the matrix.
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void Set(Real f)
Set all members of a vector to a specified value.
Definition for Gaussian Mixture Model with diagonal covariances.
Definition: diag-gmm.h:42
void SetWeights(const VectorBase< Real > &w)
Mutators for both float or double.
Definition: diag-gmm-inl.h:28
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Definition: kaldi-math.cc:95
void UnitTestRegressionTree()