context-dep-test.cc
Go to the documentation of this file.
1 // tree/context-dep-test.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "tree/context-dep.h"
22 #include "util/kaldi-io.h"
23 
24 namespace kaldi {
26  BaseFloat varFloor = 0.1;
27  size_t dim = 1 + Rand() % 20;
28  size_t nGauss = 1 + Rand() % 10;
29  std::vector< GaussClusterable * > v(nGauss);
30  for (size_t i = 0;i < nGauss;i++) {
31  v[i] = new GaussClusterable(dim, varFloor);
32  }
33  for (size_t i = 0;i < nGauss;i++) {
34  size_t nPoints = 1 + Rand() % 30;
35  for (size_t j = 0;j < nPoints;j++) {
36  BaseFloat post = 0.5 *(Rand()%3);
37  Vector<BaseFloat> vec(dim);
38  for (size_t k = 0;k < dim;k++) vec(k) = RandGauss();
39  v[i]->AddStats(vec, post);
40  }
41  }
42  for (size_t i = 0;i+1 < nGauss;i++) {
43  BaseFloat like_before = (v[i]->Objf() + v[i+1]->Objf()) / (v[i]->Normalizer() + v[i+1]->Normalizer());
44  Clusterable *tmp = v[i]->Copy();
45  tmp->Add(*(v[i+1]));
46  BaseFloat like_after = tmp->Objf() / tmp->Normalizer();
47  std::cout << "Like_before = " << like_before <<", after = "<<like_after <<" over "<<tmp->Normalizer()<<" frames.\n";
48  if (tmp->Normalizer() > 0.1)
49  KALDI_ASSERT(like_after <= like_before); // should get worse after combining stats.
50  delete tmp;
51  }
52  for (size_t i = 0;i < nGauss;i++)
53  delete v[i];
54 }
55 
57  std::set<int32> phones_set;
58  for (size_t i = 1; i <= 20; i++) phones_set.insert(1 + Rand() % 30);
59  std::vector<int32> phones;
60  CopySetToVector(phones_set, &phones);
61  std::vector<int32> phone2num_classes(1 + *std::max_element(phones.begin(), phones.end()));
62  for (size_t i = 0; i < phones.size(); i++)
63  phone2num_classes[phones[i]] = 3;
65  phone2num_classes);
66 
67  std::vector<std::vector<std::pair<int32, int32> > > pdf_info;
68  cd->GetPdfInfo(phones, phone2num_classes, &pdf_info);
69  KALDI_ASSERT(pdf_info.size() == phones.size() * 3 &&
70  pdf_info[Rand() % pdf_info.size()].size() == 1);
71  delete cd;
72 }
73 // Also tests I/O of ContextDependency
75  bool binary = (Rand()%2 == 0);
76  size_t num_phones = 1 + Rand() % 10;
77  std::set<int32> phones_set;
78  while (phones_set.size() < num_phones) phones_set.insert(Rand() % (num_phones + 5));
79  std::vector<int32> phones;
80  CopySetToVector(phones_set, &phones);
81  bool ensure_all_covered = (Rand() % 2 == 0);
82  std::vector<int32> phone2num_pdf_classes;
84  ensure_all_covered, // false == don't ensure all phones covered.
85  &phone2num_pdf_classes);
86  // stuff here.
87  const char *filename = "tmpf";
88  {
89  Output ko(filename, binary);
90  std::ostream &outfile = ko.Stream();
91  { // Test GetPdfInfo
92  std::vector<std::vector<std::pair<int32, int32> > > pdf_info;
93  dep->GetPdfInfo(phones, phone2num_pdf_classes, &pdf_info);
94  std::vector<bool> all_phones(phones.back()+1, false); // making sure all covered.
95  for (size_t i = 0; i < pdf_info.size(); i++) {
96  KALDI_ASSERT(!pdf_info[i].empty()); // make sure pdf seen.
97  for (size_t j = 0; j < pdf_info[i].size(); j++) {
98  int32 idx = pdf_info[i][j].first;
99  KALDI_ASSERT(static_cast<size_t>(idx) < all_phones.size());
100  all_phones[pdf_info[i][j].first] = true;
101  }
102  }
103  if (ensure_all_covered)
104  for (size_t k = 0; k < phones.size(); k++) KALDI_ASSERT(all_phones[phones[k]]);
105  }
106 
107  dep->Write(outfile, binary);
108  ko.Close();
109  }
110  {
111  bool binary_in;
112  Input ki(filename, &binary_in);
113  std::istream &infile = ki.Stream();
114  ContextDependency dep2;
115  dep2.Read(infile, binary_in);
116 
117  std::ostringstream ostr1, ostr2;
118  dep->Write(ostr1, false);
119  dep2.Write(ostr2, false);
120  KALDI_ASSERT(ostr1.str() == ostr2.str());
121  }
122 
123  delete dep;
124 
125  unlink("tmpf");
126 
127  std::cout << "Note: any \"serious error\" warnings preceding this line are OK.\n";
128 }
129 
130 } // end namespace kaldi
131 
132 int main() {
133  for (size_t i = 0;i < 10;i++) {
135  kaldi::TestGenRandContextDependency(); // Also tests I/O of ContextDependency
137  }
138 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
virtual void GetPdfInfo(const std::vector< int32 > &phones, const std::vector< int32 > &num_pdf_classes, std::vector< std::vector< std::pair< int32, int32 > > > *pdf_info) const
GetPdfInfo returns a vector indexed by pdf-id, saying for each pdf which pairs of (phone...
Definition: context-dep.cc:287
void CopySetToVector(const std::set< T > &s, std::vector< T > *v)
Copies the elements of a set to a vector.
Definition: stl-utils.h:86
virtual void Add(const Clusterable &other)=0
Add other stats.
virtual BaseFloat Objf() const =0
Return the objective function associated with the stats [assuming ML estimation]. ...
ContextDependency * MonophoneContextDependency(const std::vector< int32 > &phones, const std::vector< int32 > &phone2num_pdf_classes)
Definition: context-dep.cc:331
float RandGauss(struct RandomState *state=NULL)
Definition: kaldi-math.h:155
kaldi::int32 int32
virtual Clusterable * Copy() const =0
Return a copy of this object.
void TestContextDep()
std::istream & Stream()
Definition: kaldi-io.cc:826
void Write(std::ostream &os, bool binary) const
Definition: context-dep.cc:145
std::ostream & Stream()
Definition: kaldi-io.cc:701
int main()
void TestMonophoneContextDependency()
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
virtual BaseFloat Normalizer() const =0
Return the normalizer (typically, count) associated with the stats.
A class representing a vector.
Definition: kaldi-vector.h:406
void Read(std::istream &is, bool binary)
Read context-dependency object from disk; throws on error.
Definition: context-dep.cc:155
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
ContextDependency * GenRandContextDependency(const std::vector< int32 > &phone_ids, bool ensure_all_covered, std::vector< int32 > *hmm_lengths)
GenRandContextDependency is mainly of use for debugging.
Definition: context-dep.cc:46
GaussClusterable wraps Gaussian statistics in a form accessible to generic clustering algorithms...
void TestGenRandContextDependency()
bool Close()
Definition: kaldi-io.cc:677