All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
get-silence-probs.cc File Reference
Include dependency graph for get-silence-probs.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 

Function Documentation

int main ( int  argc,
char *  argv[] 
)

Definition at line 29 of file get-silence-probs.cc.

References VectorBase< Real >::Add(), VectorBase< Real >::Dim(), SequentialTableReader< Holder >::Done(), kaldi::Exp(), ParseOptions::GetArg(), RandomAccessTableReader< Holder >::HasKey(), rnnlm::i, KALDI_ASSERT, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), kaldi::Log(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), ParseOptions::PrintUsage(), ParseOptions::Read(), ParseOptions::Register(), VectorBase< Real >::Scale(), VectorBase< Real >::Sum(), RandomAccessTableReader< Holder >::Value(), SequentialTableReader< Holder >::Value(), and TableWriter< Holder >::Write().

29  {
30  using namespace kaldi;
31  typedef kaldi::int32 int32;
32  typedef kaldi::int64 int64;
33  try {
34  const char *usage =
35  "This program takes two archives of Vector<BaseFloat>, representing\n"
36  "per-frame log-likelihoods for silence and non-silence models respectively.\n"
37  "It outputs per-frame silence probabilities in the same format.\n"
38  "To get non-silence probs instead, use --write-nonsil-probs "
39  "Usage: get-silence-probs [options] <silence-loglikes-rspecifier> "
40  " <nonsilence-loglikes-rspecifier> <silence-probs-wspecifier>\n"
41  "e.g.: get-silence-probs --silence-prior=0.9 --quantize=0.25 ark:sil.likes "
42  "ark:nonsil.likes ark:sil.probs\n";
43 
44 
45  ParseOptions po(usage);
46 
47  BaseFloat sil_prior = 0.5;
48  BaseFloat quantize = 0.0;
49  bool write_nonsil_probs = false;
50  po.Register("sil-prior", &sil_prior,
51  "Prior probability of silence, must be strictly between 0 and 1.");
52  po.Register("quantize", &quantize,
53  "If nonzero, quantize probs to this level (to improve "
54  "compressibility).");
55  po.Register("write-nonsil-probs", &write_nonsil_probs,
56  "If true, write non-silence probs instead of silence probs");
57 
58  po.Read(argc, argv);
59 
60  if (po.NumArgs() != 3) {
61  po.PrintUsage();
62  exit(1);
63  }
64 
65  KALDI_ASSERT(sil_prior > 0.0 && sil_prior < 1.0);
66  KALDI_ASSERT(quantize >= 0.0 && quantize <= 1.0);
67 
68 
69  double sil_log_bias = Log(sil_prior / (1.0 - sil_prior));
70 
71  std::string silence_likes_rspecifier = po.GetArg(1),
72  nonsilence_likes_rspecifier = po.GetArg(2),
73  silence_probs_wspecifier = po.GetArg(3);
74 
75  SequentialBaseFloatVectorReader silence_likes_reader(silence_likes_rspecifier);
76  RandomAccessBaseFloatVectorReader nonsilence_likes_reader(nonsilence_likes_rspecifier);
77  BaseFloatVectorWriter silence_probs_writer(silence_probs_wspecifier);
78 
79  int num_done = 0, num_err = 0;
80  double tot_frames = 0.0, tot_sil_prob = 0.0;
81 
82  for (; !silence_likes_reader.Done(); silence_likes_reader.Next()) {
83  std::string key = silence_likes_reader.Key();
84  if (!nonsilence_likes_reader.HasKey(key)) {
85  KALDI_WARN << "No non-silence likes available for utterance " << key;
86  num_err++;
87  continue;
88  }
89  const Vector<BaseFloat> &sil_likes = silence_likes_reader.Value();
90  const Vector<BaseFloat> &nonsil_likes = nonsilence_likes_reader.Value(key);
91  if (sil_likes.Dim() != nonsil_likes.Dim()) {
92  KALDI_WARN << "Dimension mismatch between sil and non-sil likes";
93  num_err++;
94  continue;
95  }
96  int32 num_frames = sil_likes.Dim();
97  Vector<BaseFloat> sil_probs(num_frames);
98  for (int32 f = 0; f < num_frames; f++) {
99  // We're basically just applying Bayes' rule here to get the
100  // posterior prob of silence.
101  BaseFloat sil_loglike = sil_likes(f), nonsil_loglike = nonsil_likes(f);
102  sil_loglike -= nonsil_loglike; nonsil_loglike = 0; // improve floating-point range.
103  sil_loglike += sil_log_bias; // relates to prior. Zero if prior==0.5.
104  if (sil_loglike > 10) {
105  sil_probs(f) = 1.0; // because the exp below might fail.
106  } else {
107  BaseFloat e_sil_loglike = Exp(sil_loglike);
108  BaseFloat sil_prob = e_sil_loglike / (1.0 + e_sil_loglike);
109  if ( !(sil_prob >= 0.0 && sil_prob <= 1.0)) {
110  KALDI_WARN << "Bad silence prob (NaNs found?), setting to 0.5";
111  sil_prob = 0.5;
112  }
113  sil_probs(f) = sil_prob;
114  }
115  if (quantize != 0.0) {
116  int64 i = static_cast<int64>(0.5 + (sil_probs(f) / quantize));
117  sil_probs(f) = quantize * i;
118  }
119  }
120  tot_frames += num_frames;
121  tot_sil_prob += sil_probs.Sum();
122  if (write_nonsil_probs) { // sil_prob <-- 1.0 - sil_prob
123  sil_probs.Scale(-1.0);
124  sil_probs.Add(1.0);
125  }
126  silence_probs_writer.Write(key, sil_probs);
127  num_done++;
128  }
129  KALDI_LOG << "Done " << num_done << " utterances, " << num_err << " with errors.";
130  KALDI_LOG << "Average silence prob is " << (tot_sil_prob/tot_frames)
131  << " over " << tot_frames << " frames.";
132  return (num_done != 0 ? 0 : 1);
133  } catch(const std::exception &e) {
134  std::cerr << e.what();
135  return -1;
136  }
137 }
Relabels neural network egs with the read pdf-id alignments.
Definition: chain.dox:20
double Exp(double x)
Definition: kaldi-math.h:83
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:366
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
float BaseFloat
Definition: kaldi-types.h:29
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
double Log(double x)
Definition: kaldi-math.h:100
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_WARN
Definition: kaldi-error.h:130
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:169
#define KALDI_LOG
Definition: kaldi-error.h:133
MatrixIndexT Dim() const
Returns the dimension of the vector.
Definition: kaldi-vector.h:62