kws-index-union.cc
Go to the documentation of this file.
1 // kwsbin/kws-index-union.cc
2 
3 // Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
4 // Lucas Ondel
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #include "base/kaldi-common.h"
22 #include "util/common-utils.h"
23 #include "fstext/fstext-utils.h"
24 #include "kws/kaldi-kws.h"
25 #include "kws/kws-functions.h"
26 
27 int main(int argc, char *argv[]) {
28  try {
29  using namespace kaldi;
30  using namespace fst;
31  typedef kaldi::int32 int32;
32  typedef kaldi::uint64 uint64;
33 
34  const char *usage =
35  "Take a union of the indexed lattices. The input index is in "
36  " the T*T*T semiring and\n"
37  "the output index is also in the T*T*T semiring. At the end of "
38  "this program, encoded\n"
39  "epsilon removal, determinization and minimization will be applied.\n"
40  "\n"
41  "Usage: kws-index-union [options] index-rspecifier index-wspecifier\n"
42  " e.g.: kws-index-union ark:input.idx ark:global.idx\n";
43 
44  ParseOptions po(usage);
45 
46  bool strict = true;
47  bool skip_opt = false;
48  int32 max_states = -1;
49  po.Register("strict", &strict,
50  "Will allow 0 lattice if it is set to false.");
51  po.Register("skip-optimization", &skip_opt,
52  "Skip optimization if it's set to true.");
53  po.Register("max-states", &max_states,
54  "Maximum states for DeterminizeStar.");
55 
56  po.Read(argc, argv);
57 
58  if (po.NumArgs() < 2 || po.NumArgs() > 3) {
59  po.PrintUsage();
60  exit(1);
61  }
62 
63  std::string index_rspecifier = po.GetArg(1),
64  index_wspecifier = po.GetOptArg(2);
65 
67  index_reader(index_rspecifier);
69  index_writer(index_wspecifier);
70 
71  int32 n_done = 0;
72  KwsLexicographicFst global_index;
73  for (; !index_reader.Done(); index_reader.Next()) {
74  std::string key = index_reader.Key();
75  KwsLexicographicFst index = index_reader.Value();
76  index_reader.FreeCurrent();
77 
78  Union(&global_index, index);
79 
80  n_done++;
81  }
82 
83  if (skip_opt == false) {
84  // Do the encoded epsilon removal, determinization and minimization
85  KwsLexicographicFst ifst = global_index;
86  EncodeMapper<KwsLexicographicArc> encoder(kEncodeLabels, ENCODE);
87  Encode(&ifst, &encoder);
88  try {
89  DeterminizeStar(ifst, &global_index, kDelta, NULL, max_states);
90  } catch(const std::exception &e) {
91  KALDI_WARN << e.what()
92  << " (should affect speed of search but not results)";
93  global_index = ifst;
94  }
95  Minimize(&global_index, static_cast<KwsLexicographicFst*>(NULL), kDelta, true);
96  Decode(&global_index, encoder);
97  } else {
98  KALDI_LOG << "Skipping index optimization...";
99  }
100 
101  // Write the result
102  index_writer.Write("global", global_index);
103 
104  KALDI_LOG << "Done " << n_done << " indices";
105  if (strict == true)
106  return (n_done != 0 ? 0 : 1);
107  else
108  return 0;
109  } catch(const std::exception &e) {
110  std::cerr << e.what();
111  return -1;
112  }
113 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
fst::VectorFst< KwsLexicographicArc > KwsLexicographicFst
Definition: kaldi-kws.h:46
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_LOG
Definition: kaldi-error.h:153
int main(int argc, char *argv[])
bool DeterminizeStar(F &ifst, MutableFst< typename F::Arc > *ofst, float delta, bool *debug_ptr, int max_states, bool allow_partial)
This function implements the normal version of DeterminizeStar, in which the output strings are repre...
std::string GetOptArg(int param) const