fstmakecontextsyms.cc
Go to the documentation of this file.
1 // fstbin/fstmakecontextsyms.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "tree/context-dep.h"
21 #include "util/common-utils.h"
22 #include "fst/fstlib.h"
23 #include "fstext/fstext-utils.h"
24 #include "fstext/context-fst.h"
25 
26 
27 /*
28  Test for this and makecontextfst:
29  mkdir -p ~/tmpdir
30  pushd ~/tmpdir
31  (echo "<eps> 0"; echo "a 1"; echo "b 2"; echo "#0 3"; echo "#1 4"; echo "#$ 5" ) > phones.txt
32  ( echo 3; echo 4 ) > disambig.list
33  fstmakecontextfst --read-disambig-syms=disambig.list <(grep -v '#' phones.txt) 5 ilabels.int > C.fst
34  fstmakecontextsyms phones.txt ilabels.int > context_syms.txt
35  fstprint --isymbols=context_syms.txt --osymbols=phones.txt C.fst > C.txt
36 
37  fstrandgen C.fst | fstprint --isymbols=context_syms.txt --osymbols=phones.txt
38 
39  Example output:
40 
41  fstrandgen C.fst | fstprint --isymbols=context_syms.txt --osymbols=phones.txt
42 0 1 #-1 b
43 1 2 <eps>/b/<eps> #$
44 2 3 #1 #1
45 3 4 #0 #0
46 4 5 #0 #0
47 5 6 #0 #0
48 6 7 #0 #0
49 7 8 #0 #0
50 8 9 #1 #1
51 9
52 */
53 
54 
55 int main(int argc, char *argv[]) {
56  try {
57  using namespace kaldi;
58  using namespace fst;
59  typedef fst::StdArc::Label Label;
60  const char *usage = "Create input symbols for CLG\n"
61  "Usage: fstmakecontextsyms phones-symtab ilabels_input_file [output-symtab.txt]\n"
62  "E.g.: fstmakecontextsyms phones.txt ilabels.sym > context_symbols.txt\n";
63 
64  ParseOptions po(usage);
65 
66  std::string disambig_list_file = "",
67  phone_separator = "/",
68  initial_disambig = "#-1";
69 
70  po.Register("phone-separator", &phone_separator,
71  "Separator for phones in phone-in-context symbols.");
72  po.Register("initial-disambig", &initial_disambig,
73  "Name for special disambiguation symbol that occurs at start "
74  "of context-dependent phone sequences");
75 
76  po.Read(argc, argv);
77 
78  if (po.NumArgs() < 2 || po.NumArgs() > 3) {
79  po.PrintUsage();
80  exit(1);
81  }
82 
83  std::string phones_symtab_filename = po.GetArg(1),
84  ilabel_info_filename = po.GetArg(2),
85  clg_symtab_filename = po.GetOptArg(3);
86 
87  std::vector<std::vector<kaldi::int32> > ilabel_info;
88  {
89  bool binary;
90  Input ki(ilabel_info_filename, &binary);
92  binary, &ilabel_info);
93  }
94 
95  fst::SymbolTable *phones_symtab = NULL;
96  { // read phone symbol table.
97  std::ifstream is(phones_symtab_filename.c_str());
98  phones_symtab = fst::SymbolTable::ReadText(is, phones_symtab_filename);
99  if (!phones_symtab) KALDI_ERR << "Could not read phones symbol-table file "<<phones_symtab_filename;
100  }
101 
102  fst::SymbolTable *clg_symtab =
103  CreateILabelInfoSymbolTable(ilabel_info,
104  *phones_symtab,
105  phone_separator,
106  initial_disambig);
107 
108  if (clg_symtab_filename == "") {
109  if (!clg_symtab->WriteText(std::cout))
110  KALDI_ERR << "Cannot write symbol table to standard output.";
111  } else {
112  if (!clg_symtab->WriteText(clg_symtab_filename))
113  KALDI_ERR << "Cannot open symbol table file "<<clg_symtab_filename<<" for writing.";
114  }
115  delete clg_symtab;
116  delete phones_symtab;
117  return 0;
118  } catch(const std::exception &e) {
119  std::cerr << e.what();
120  return -1;
121  }
122 }
fst::StdArc::Label Label
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadILabelInfo(std::istream &is, bool binary, vector< vector< int32 > > *info)
Utility function for reading ilabel-info vectors from disk.
Definition: context-fst.cc:335
std::istream & Stream()
Definition: kaldi-io.cc:826
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
fst::StdArc::Label Label
int NumArgs() const
Number of positional parameters (c.f. argc-1).
int main(int argc, char *argv[])
SymbolTable * CreateILabelInfoSymbolTable(const vector< vector< int32 > > &info, const SymbolTable &phones_symtab, std::string separator, std::string initial_disambig)
The following function is mainly of use for printing and debugging.
Definition: context-fst.cc:345
std::string GetOptArg(int param) const