print-proxy-keywords.cc
Go to the documentation of this file.
1 // kwsbin/print-proxy-keywords.cc
2 //
3 // Copyright 2014-2016 Johns Hopkins University (Author: Guoguo Chen,
4 // Yenda Trmal)
5 //
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
22 #include "base/kaldi-common.h"
23 #include "util/common-utils.h"
24 #include "fstext/fstext-utils.h"
25 #include "fstext/kaldi-fst-io.h"
26 
27 namespace fst {
28 using std::vector;
29 
30 bool PrintProxyFstPath(const VectorFst<StdArc> &proxy,
31  vector<vector<StdArc::Label> > *path,
32  vector<StdArc::Weight> *cost,
33  StdArc::StateId cur_state,
34  vector<StdArc::Label> cur_path,
35  StdArc::Weight cur_cost) {
36 
37  if (proxy.Final(cur_state) != StdArc::Weight::Zero()) {
38  cur_cost = Times(proxy.Final(cur_state), cur_cost);
39  path->push_back(cur_path);
40  cost->push_back(cur_cost);
41  // even final state can have outgoing args, so no return here
42  }
43 
44  for (ArcIterator<StdFst> aiter(proxy, cur_state);
45  !aiter.Done(); aiter.Next()) {
46  const StdArc &arc = aiter.Value();
47  StdArc::Weight temp_cost = Times(arc.weight, cur_cost);
48  cur_path.push_back(arc.ilabel);
49  PrintProxyFstPath(proxy, path, cost,
50  arc.nextstate, cur_path, temp_cost);
51  cur_path.pop_back();
52  }
53 
54  return true;
55 }
56 } // namespace fst
57 
58 int main(int argc, char *argv[]) {
59  try {
60  using namespace kaldi;
61  using namespace fst;
62  typedef kaldi::int32 int32;
63  typedef kaldi::uint64 uint64;
64  typedef StdArc::StateId StateId;
65  typedef StdArc::Weight Weight;
66 
67  const char *usage =
68  "Reads in the proxy keywords FSTs and print them to a file where each\n"
69  "line is \"kwid w1 w2 .. 2n\"\n"
70  "\n"
71  "Usage: \n"
72  " print-proxy-keywords [options] <proxy-rspecifier> "
73  " <kwlist-wspecifier> [<cost-wspecifier>]]\n"
74  "e.g.:\n"
75  " print-proxy-keywords ark:proxy.fsts ark,t:kwlist.txt"
76  " ark,t:costs.txt\n";
77 
78  ParseOptions po(usage);
79 
80  po.Read(argc, argv);
81 
82  if (po.NumArgs() < 2 || po.NumArgs() > 3) {
83  po.PrintUsage();
84  exit(1);
85  }
86 
87  std::string proxy_rspecifier = po.GetArg(1),
88  kwlist_wspecifier = po.GetArg(2),
89  cost_wspecifier = po.GetOptArg(3);
90 
91 
92  SequentialTableReader<VectorFstHolder> proxy_reader(proxy_rspecifier);
93  TableWriter<BasicVectorHolder<int32> > kwlist_writer(kwlist_wspecifier);
94  TableWriter<BasicVectorHolder<double> > cost_writer(cost_wspecifier);
95 
96  // Start processing the keywords
97  int32 n_done = 0;
98  for (; !proxy_reader.Done(); proxy_reader.Next()) {
99  std::string key = proxy_reader.Key();
100  VectorFst<StdArc> proxy = proxy_reader.Value();
101  proxy_reader.FreeCurrent();
102 
103  if (proxy.Properties(kAcyclic, true) == 0) {
104  KALDI_WARN << "Proxy FST has cycles, skip printing paths for " << key;
105  continue;
106  }
107 
108  vector<vector<StdArc::Label> > paths;
109  vector<StdArc::Weight> costs;
110  PrintProxyFstPath(proxy, &paths, &costs, proxy.Start(),
111  vector<StdArc::Label>(), StdArc::Weight::One());
112  KALDI_ASSERT(paths.size() == costs.size());
113  for (int32 i = 0; i < paths.size(); i++) {
114  vector<int32> kwlist;
115  vector<double> cost;
116  cost.push_back(costs[i].Value());
117  for (int32 j = 0; j < paths[i].size(); j++) {
118  kwlist.push_back(paths[i][j]);
119  }
120  kwlist_writer.Write(key, kwlist);
121  if (cost_wspecifier != "")
122  cost_writer.Write(key, cost);
123  }
124  n_done++;
125  }
126 
127  KALDI_LOG << "Done " << n_done << " keywords";
128  return (n_done != 0 ? 0 : 1);
129  } catch(const std::exception &e) {
130  std::cerr << e.what();
131  return -1;
132  }
133 }
134 
135 
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
fst::StdArc StdArc
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Write(const std::string &key, const T &value) const
LatticeWeightTpl< FloatType > Times(const LatticeWeightTpl< FloatType > &w1, const LatticeWeightTpl< FloatType > &w2)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
bool PrintProxyFstPath(const VectorFst< StdArc > &proxy, vector< vector< StdArc::Label > > *path, vector< StdArc::Weight > *weight, StdArc::StateId cur_state, vector< StdArc::Label > cur_path, StdArc::Weight cur_weight)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
fst::StdArc::Weight Weight
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
#define KALDI_LOG
Definition: kaldi-error.h:153
std::string GetOptArg(int param) const