fstcomposecontext.cc
Go to the documentation of this file.
1 // fstbin/fstcomposecontext.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #include "base/kaldi-common.h"
22 #include "util/common-utils.h"
23 #include "fst/fstlib.h"
24 #include "fstext/context-fst.h"
26 #include "fstext/fstext-utils.h"
27 #include "fstext/kaldi-fst-io.h"
28 
29 /*
30  A couple of test examples:
31 
32  pushd ~/tmpdir
33  # (1) with no disambig syms.
34  ( echo "0 1 1 1"; echo "1 2 2 2"; echo "2 3 3 3"; echo "3 0" ) | fstcompile | fstcomposecontext ilabels.sym > tmp.fst
35  ( echo "<eps> 0"; echo "a 1"; echo "b 2"; echo "c 3" ) > phones.txt
36  fstmakecontextsyms phones.txt ilabels.sym > context.txt
37  fstprint --isymbols=context.txt --osymbols=phones.txt tmp.fst
38  # and the result is:
39 
40 WARNING (fstcomposecontext[5.4]:main():fstcomposecontext.cc:130) Disambiguation symbols list is empty; this likely indicates an error in data preparation.
41 0 1 <eps> a
42 1 2 <eps>/a/b b
43 2 3 a/b/c c
44 3 4 b/c/<eps> <eps>
45 4
46 
47 
48  # (2) with disambig syms:
49  ( echo 4; echo 5) > disambig.list
50  ( echo "<eps> 0"; echo "a 1"; echo "b 2"; echo "c 3"; echo "#0 4"; echo "#1 5") > phones.txt
51  ( echo "0 1 1 1"; echo "1 2 2 2"; echo " 2 3 4 4"; echo "3 4 3 3"; echo "4 5 5 5"; echo "5 0" ) | fstcompile > in.fst
52  fstcomposecontext --read-disambig-syms=disambig.list ilabels.sym in.fst tmp.fst
53  fstmakecontextsyms phones.txt ilabels.sym > context.txt
54  cp phones.txt phones_disambig.txt; ( echo "#0 4"; echo "#1 5" ) >> phones_disambig.txt
55  fstprint --isymbols=context.txt --osymbols=phones_disambig.txt tmp.fst
56 
57 0 1 #-1 a
58 1 2 <eps>/a/b b
59 2 3 #0 #0
60 3 4 a/b/c c
61 4 5 #1 #1
62 5 6 b/c/<eps> <eps>
63 
64 */
65 
66 int main(int argc, char *argv[]) {
67  try {
68  using namespace kaldi;
69  using namespace fst;
70  using kaldi::int32;
71  /*
72  # fstcomposecontext composes efficiently with a context fst
73  # that it generates. Without --disambig-syms specified, it
74  # assumes that all input symbols of in.fst are phones.
75  # It adds the subsequential symbol itself (it does not
76  # appear in the output so doesn't need to be specified by the user).
77  # the disambig.list is a list of disambiguation symbols on the LHS
78  # of in.fst. The symbols on the LHS of out.fst are indexes into
79  # the ilabels.list file, which is a kaldi-format file containing a
80  # vector<vector<int32> >, which specifies what the labels mean in
81  # terms of windows of symbols.
82  fstcomposecontext ilabels.sym [ in.fst [ out.fst ] ]
83  --disambig-syms=disambig.list
84  --context-size=3
85  --central-position=1
86  --binary=false
87  */
88 
89  const char *usage =
90  "Composes on the left with a dynamically created context FST\n"
91  "\n"
92  "Usage: fstcomposecontext <ilabels-output-file> [<in.fst> [<out.fst>] ]\n"
93  "E.g: fstcomposecontext ilabels.sym < LG.fst > CLG.fst\n";
94 
95 
96  ParseOptions po(usage);
97  bool binary = true;
98  std::string disambig_rxfilename,
99  disambig_wxfilename;
100  int32 context_width = 3, central_position = 1;
101  int32 nonterm_phones_offset = -1;
102  po.Register("binary", &binary,
103  "If true, output ilabels-output-file in binary format");
104  po.Register("read-disambig-syms", &disambig_rxfilename,
105  "List of disambiguation symbols on input of in.fst");
106  po.Register("write-disambig-syms", &disambig_wxfilename,
107  "List of disambiguation symbols on input of out.fst");
108  po.Register("context-size", &context_width, "Size of phone context window");
109  po.Register("central-position", &central_position,
110  "Designated central position in context window");
111  po.Register("nonterm-phones-offset", &nonterm_phones_offset,
112  "The integer id of #nonterm_bos in your phones.txt, if present "
113  "(only relevant for grammar-FST construction, see "
114  "doc/grammar.dox");
115 
116  po.Read(argc, argv);
117 
118  if (po.NumArgs() < 1 || po.NumArgs() > 3) {
119  po.PrintUsage();
120  exit(1);
121  }
122 
123  std::string ilabels_out_filename = po.GetArg(1),
124  fst_in_filename = po.GetOptArg(2),
125  fst_out_filename = po.GetOptArg(3);
126 
127  VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);
128 
129  if ( (disambig_wxfilename != "") && (disambig_rxfilename == "") )
130  KALDI_ERR << "fstcomposecontext: cannot specify --write-disambig-syms if "
131  "not specifying --read-disambig-syms\n";
132 
133  std::vector<int32> disambig_in;
134  if (disambig_rxfilename != "")
135  if (!ReadIntegerVectorSimple(disambig_rxfilename, &disambig_in))
136  KALDI_ERR << "fstcomposecontext: Could not read disambiguation symbols from "
137  << PrintableRxfilename(disambig_rxfilename);
138 
139  if (disambig_in.empty()) {
140  KALDI_WARN << "Disambiguation symbols list is empty; this likely "
141  << "indicates an error in data preparation.";
142  }
143 
144  std::vector<std::vector<int32> > ilabels;
145  VectorFst<StdArc> composed_fst;
146 
147  // Work gets done here (see context-fst.h)
148  if (nonterm_phones_offset < 0) {
149  // The normal case.
150  ComposeContext(disambig_in, context_width, central_position,
151  fst, &composed_fst, &ilabels);
152  } else {
153  // The grammar-FST case. See ../doc/grammar.dox for an intro.
154  if (context_width != 2 || central_position != 1) {
155  KALDI_ERR << "Grammar-fst graph creation only supports models with left-"
156  "biphone context. (--nonterm-phones-offset option was supplied).";
157  }
158  ComposeContextLeftBiphone(nonterm_phones_offset, disambig_in,
159  *fst, &composed_fst, &ilabels);
160  }
161  WriteILabelInfo(Output(ilabels_out_filename, binary).Stream(),
162  binary, ilabels);
163 
164  if (disambig_wxfilename != "") {
165  std::vector<int32> disambig_out;
166  for (size_t i = 0; i < ilabels.size(); i++)
167  if (ilabels[i].size() == 1 && ilabels[i][0] <= 0)
168  disambig_out.push_back(static_cast<int32>(i));
169  if (!WriteIntegerVectorSimple(disambig_wxfilename, disambig_out)) {
170  std::cerr << "fstcomposecontext: Could not write disambiguation symbols to "
171  << PrintableWxfilename(disambig_wxfilename) << '\n';
172  return 1;
173  }
174  }
175 
176  WriteFstKaldi(composed_fst, fst_out_filename);
177  delete fst;
178  return 0;
179  } catch(const std::exception &e) {
180  std::cerr << e.what();
181  return -1;
182  }
183 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void WriteILabelInfo(std::ostream &os, bool binary, const vector< vector< int32 > > &info)
Utility function for writing ilabel-info vectors to disk.
Definition: context-fst.cc:325
int main(int argc, char *argv[])
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
kaldi::int32 int32
void Register(const std::string &name, bool *ptr, const std::string &doc)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
void ComposeContextLeftBiphone(int32 nonterm_phones_offset, const vector< int32 > &disambig_syms_in, const VectorFst< StdArc > &ifst, VectorFst< StdArc > *ofst, std::vector< std::vector< int32 > > *ilabels)
This is a variant of the function ComposeContext() which is to be used with our "grammar FST" framewo...
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void ComposeContext(const vector< int32 > &disambig_syms_in, int32 context_width, int32 central_position, VectorFst< StdArc > *ifst, VectorFst< StdArc > *ofst, vector< vector< int32 > > *ilabels_out, bool project_ifst)
Used in the command-line tool fstcomposecontext.
Definition: context-fst.cc:246
void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst< Arc > &t)
void ReadFstKaldi(std::istream &is, bool binary, VectorFst< Arc > *fst)
bool WriteIntegerVectorSimple(const std::string &wxfilename, const std::vector< int32 > &list)
WriteToList attempts to write this list of integers, one per line, to the given file, in text format.
std::string PrintableRxfilename(const std::string &rxfilename)
PrintableRxfilename turns the rxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:61
std::string PrintableWxfilename(const std::string &wxfilename)
PrintableWxfilename turns the wxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:73
bool ReadIntegerVectorSimple(const std::string &rxfilename, std::vector< int32 > *list)
ReadFromList attempts to read this list of integers, one per line, from the given file...
std::string GetOptArg(int param) const