nnet-am-switch-preconditioning.cc
Go to the documentation of this file.
1 // nnet2bin/nnet-am-switch-preconditioning.cc
2 
3 // Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "base/kaldi-common.h"
21 #include "util/common-utils.h"
22 #include "nnet2/am-nnet.h"
23 #include "hmm/transition-model.h"
24 #include "tree/context-dep.h"
25 
26 int main(int argc, char *argv[]) {
27  try {
28  using namespace kaldi;
29  using namespace kaldi::nnet2;
30  typedef kaldi::int32 int32;
31 
32  const char *usage =
33  "Copy a (cpu-based) neural net and its associated transition model,\n"
34  "and switch it to online preconditioning, i.e. change any components\n"
35  "derived from AffineComponent to components of type\n"
36  "AffineComponentPreconditionedOnline.\n"
37  "\n"
38  "Usage: nnet-am-switch-preconditioning [options] <nnet-in> <nnet-out>\n"
39  "e.g.:\n"
40  " nnet-am-switch-preconditioning --binary=false 1.mdl text.mdl\n";
41 
42  int32 rank_in = 20, rank_out = 80, update_period = 4;
43  BaseFloat num_samples_history = 2000.0;
44  BaseFloat alpha = 4.0;
45  bool binary_write = true;
46 
47  ParseOptions po(usage);
48  po.Register("binary", &binary_write, "Write output in binary mode");
49  po.Register("rank-in", &rank_in,
50  "Rank used in online-preconditioning on input side of each layer");
51  po.Register("rank-out", &rank_out,
52  "Rank used in online-preconditioning on output side of each layer");
53  po.Register("update-period", &update_period,
54  "Affects how frequently we update the Fisher-matrix estimate (every "
55  "this-many minibatches).");
56  po.Register("num-samples-history", &num_samples_history,
57  "Number of samples of history to use in online preconditioning "
58  "(affects speed vs accuracy of update of Fisher matrix)");
59  po.Register("alpha", &alpha,
60  "Parameter that affects amount of smoothing with unit matrix "
61  "in online preconditioning (larger -> more smoothing)");
62 
63  po.Read(argc, argv);
64 
65  if (po.NumArgs() != 2) {
66  po.PrintUsage();
67  exit(1);
68  }
69 
70  std::string nnet_rxfilename = po.GetArg(1),
71  nnet_wxfilename = po.GetArg(2);
72 
73  TransitionModel trans_model;
74  AmNnet am_nnet;
75  {
76  bool binary;
77  Input ki(nnet_rxfilename, &binary);
78  trans_model.Read(ki.Stream(), binary);
79  am_nnet.Read(ki.Stream(), binary);
80  }
81 
82  am_nnet.GetNnet().SwitchToOnlinePreconditioning(rank_in, rank_out, update_period,
83  num_samples_history, alpha);
84 
85  {
86  Output ko(nnet_wxfilename, binary_write);
87  trans_model.Write(ko.Stream(), binary_write);
88  am_nnet.Write(ko.Stream(), binary_write);
89  }
90  KALDI_LOG << "Copied neural net from " << nnet_rxfilename
91  << " to " << nnet_wxfilename;
92  return 0;
93  } catch(const std::exception &e) {
94  std::cerr << e.what() << '\n';
95  return -1;
96  }
97 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
void Read(std::istream &is, bool binary)
Definition: am-nnet.cc:39
kaldi::int32 int32
void Register(const std::string &name, bool *ptr, const std::string &doc)
int main(int argc, char *argv[])
void SwitchToOnlinePreconditioning(int32 rank_in, int32 rank_out, int32 update_period, BaseFloat num_samples_history, BaseFloat alpha)
Replaces any components of type AffineComponent or derived classes, with components of type AffineCom...
Definition: nnet-nnet.cc:551
std::istream & Stream()
Definition: kaldi-io.cc:826
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
std::ostream & Stream()
Definition: kaldi-io.cc:701
void Read(std::istream &is, bool binary)
void Write(std::ostream &os, bool binary) const
Definition: am-nnet.cc:31
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void Write(std::ostream &os, bool binary) const
#define KALDI_LOG
Definition: kaldi-error.h:153
const Nnet & GetNnet() const
Definition: am-nnet.h:61