online-nnet2-decodable-test.cc
Go to the documentation of this file.
1 // nnet2/online-nnet2-decodable-test.cc
2 
3 // Copyright 2014 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "hmm/transition-model.h"
21 #include "nnet2/nnet-component.h"
24 #include "feat/online-feature.h"
25 #include "hmm/hmm-test-utils.h"
26 
27 namespace kaldi {
28 namespace nnet2 {
29 
30 
32  std::vector<int32> phones;
33  phones.push_back(1);
34  for (int32 i = 2; i < 20; i++)
35  if (rand() % 2 == 0)
36  phones.push_back(i);
37  int32 N = 2 + rand() % 2, // context-size N is 2 or 3.
38  P = rand() % N; // Central-phone is random on [0, N)
39 
40  std::vector<int32> num_pdf_classes;
41 
42  ContextDependency *ctx_dep =
43  GenRandContextDependencyLarge(phones, N, P,
44  true, &num_pdf_classes);
45 
46  HmmTopology topo = GetDefaultTopology(phones);
47 
48  TransitionModel trans_model(*ctx_dep, topo);
49 
50  delete ctx_dep; // We won't need this further.
51  ctx_dep = NULL;
52 
53  int32 input_dim = 40, output_dim = trans_model.NumPdfs();
54  Nnet *nnet = GenRandomNnet(input_dim, output_dim);
55 
56  AmNnet am_nnet(*nnet);
57  delete nnet;
58  nnet = NULL;
59  Vector<BaseFloat> priors(output_dim);
60  priors.SetRandn();
61  priors.ApplyExp();
62  priors.Scale(1.0 / priors.Sum());
63 
64  am_nnet.SetPriors(priors);
65 
67  opts.max_nnet_batch_size = 20;
68  opts.acoustic_scale = 0.1;
69 
70  opts.pad_input = (rand() % 2 == 0);
71 
72  int32 num_input_frames = 400;
73  Matrix<BaseFloat> input_feats(num_input_frames, input_dim);
74  input_feats.SetRandn();
75 
76  OnlineMatrixFeature matrix_feature(input_feats);
77 
78  DecodableNnet2Online online_decodable(am_nnet, trans_model,
79  opts, &matrix_feature);
80 
81  DecodableAmNnet offline_decodable(trans_model, am_nnet,
82  CuMatrix<BaseFloat>(input_feats),
83  opts.pad_input,
84  opts.acoustic_scale);
85 
86  KALDI_ASSERT(online_decodable.NumFramesReady() ==
87  offline_decodable.NumFramesReady());
88  int32 num_frames = online_decodable.NumFramesReady(),
89  num_tids = trans_model.NumTransitionIds();
90 
91  for (int32 i = 0; i < 50; i++) {
92 
93  int32 t = rand() % num_frames, tid = 1 + rand() % num_tids;
94  BaseFloat l1 = online_decodable.LogLikelihood(t, tid),
95  l2 = offline_decodable.LogLikelihood(t, tid);
96  KALDI_ASSERT(ApproxEqual(l1, l2));
97  }
98 }
99 
100 } // namespace nnet2
101 } // namespace kaldi
102 
103 
104 int main() {
105  using namespace kaldi;
106  using namespace kaldi::nnet2;
107  using kaldi::int32;
108 
109  for (int32 i = 0; i < 3; i++)
111  return 0;
112 }
113 
114 
This class takes a Matrix<BaseFloat> and wraps it as an OnlineFeatureInterface: this can be useful wh...
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void ApplyExp()
Apply exponential to each value in vector.
HmmTopology GetDefaultTopology(const std::vector< int32 > &phones_in)
This function returns a HmmTopology object giving a normal 3-state topology, covering all phones in t...
ContextDependency * GenRandContextDependencyLarge(const std::vector< int32 > &phone_ids, int N, int P, bool ensure_all_covered, std::vector< int32 > *hmm_lengths)
GenRandContextDependencyLarge is like GenRandContextDependency but generates a larger tree with speci...
Definition: context-dep.cc:97
A class for storing topology information for phones.
Definition: hmm-topology.h:93
Nnet * GenRandomNnet(int32 input_dim, int32 output_dim)
This function generates a random neural net, for testing purposes.
Definition: nnet-nnet.cc:772
kaldi::int32 int32
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
int32 NumTransitionIds() const
Returns the total number of transition-ids (note, these are one-based).
void SetRandn()
Sets to random values of a normal distribution.
virtual BaseFloat LogLikelihood(int32 frame, int32 transition_id)
Returns the log likelihood, which will be negated in the decoder.
This Decodable object for class nnet2::AmNnet takes feature input from class OnlineFeatureInterface, unlike, say, class DecodableAmNnet which takes feature input from a matrix.
void Scale(Real alpha)
Multiplies all elements by this constant.
virtual int32 NumFramesReady() const
The call NumFramesReady() will return the number of frames currently available for this decodable obj...
Real Sum() const
Returns sum of the elements.
void SetRandn()
Set vector to random normally-distributed noise.
virtual BaseFloat LogLikelihood(int32 frame, int32 index)
Returns the scaled log likelihood.
A class representing a vector.
Definition: kaldi-vector.h:406
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
DecodableAmNnet is a decodable object that decodes with a neural net acoustic model of type AmNnet...
void SetPriors(const VectorBase< BaseFloat > &priors)
Definition: am-nnet.cc:44
static bool ApproxEqual(float a, float b, float relative_tolerance=0.001)
return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
Definition: kaldi-math.h:265