All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
phones-to-prons.cc File Reference
#include "base/kaldi-common.h"
#include "hmm/transition-model.h"
#include "hmm/hmm-utils.h"
#include "util/common-utils.h"
#include "fst/fstlib.h"
#include "fstext/fstext-lib.h"
Include dependency graph for phones-to-prons.cc:

Go to the source code of this file.

Functions

void CreatePhonesAltFst (const std::vector< int32 > &phones, int32 word_start_sym, int32 word_end_sym, fst::VectorFst< fst::StdArc > *ofst)
 
int main (int argc, char *argv[])
 

Function Documentation

void CreatePhonesAltFst ( const std::vector< int32 > &  phones,
int32  word_start_sym,
int32  word_end_sym,
fst::VectorFst< fst::StdArc > *  ofst 
)

Definition at line 31 of file phones-to-prons.cc.

References rnnlm::i.

Referenced by main().

34  {
35  using fst::StdArc;
38 
39  ofst->DeleteStates();
40  StateId cur_s = ofst->AddState();
41  ofst->SetStart(cur_s); // will be 0.
42  for (size_t i = 0; i < phones.size(); i++) {
43  StateId next_s = ofst->AddState();
44  // add arc to next state.
45  ofst->AddArc(cur_s, StdArc(phones[i], phones[i], Weight::One(),
46  next_s));
47  cur_s = next_s;
48  }
49  for (StateId s = 0; s <= cur_s; s++) {
50  ofst->AddArc(s, StdArc(word_end_sym, word_end_sym,
51  Weight::One(), s));
52  ofst->AddArc(s, StdArc(word_start_sym, word_start_sym,
53  Weight::One(), s));
54  }
55  ofst->SetFinal(cur_s, Weight::One());
56  {
57  fst::OLabelCompare<StdArc> olabel_comp;
58  ArcSort(ofst, olabel_comp);
59  }
60 }
fst::StdArc::StateId StateId
fst::StdArc StdArc
fst::StdArc::Weight Weight
int main ( int  argc,
char *  argv[] 
)

Definition at line 62 of file phones-to-prons.cc.

References kaldi::ConvertPhnxToProns(), kaldi::ConvertStringToInteger(), CreatePhonesAltFst(), SequentialTableReader< Holder >::Done(), kaldi::g_kaldi_verbose_level, ParseOptions::GetArg(), fst::GetLinearSymbolSequence(), RandomAccessTableReader< Holder >::HasKey(), rnnlm::i, KALDI_ERR, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), fst::MakeLinearAcceptor(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), ParseOptions::PrintUsage(), ParseOptions::Read(), fst::ReadFstKaldi(), RandomAccessTableReader< Holder >::Value(), SequentialTableReader< Holder >::Value(), words, and TableWriter< Holder >::Write().

62  {
63  using namespace kaldi;
64  using fst::VectorFst;
65  using fst::StdArc;
66  typedef kaldi::int32 int32;
67  try {
68  const char *usage =
69  "Convert pairs of (phone-level, word-level) transcriptions to\n"
70  "output that indicates the phones assigned to each word.\n"
71  "Format is standard format for archives of vector<vector<int32> >\n"
72  "i.e. :\n"
73  "utt-id 600 4 7 19 ; 512 4 18 ; 0 1\n"
74  "where 600, 512 and 0 are the word-ids (0 for non-word phones, e.g.\n"
75  "optional-silence introduced by the lexicon), and the phone-ids\n"
76  "follow the word-ids.\n"
77  "Note: L_align.fst must have word-start and word-end symbols in it\n"
78  "\n"
79  "Usage: phones-to-prons [options] <L_align.fst> <word-start-sym> "
80  "<word-end-sym> <phones-rspecifier> <words-rspecifier> <prons-wspecifier>\n"
81  "e.g.: \n"
82  " ali-to-phones 1.mdl ark:1.ali ark:- | \\\n"
83  " phones-to-prons L_align.fst 46 47 ark:- 1.tra ark:1.prons\n";
84 
85  ParseOptions po(usage);
86  po.Read(argc, argv);
87 
88  if (po.NumArgs() != 6) {
89  po.PrintUsage();
90  exit(1);
91  }
92 
93 
94  std::string lex_fst_filename = po.GetArg(1),
95  word_start_sym_str = po.GetArg(2),
96  word_end_sym_str = po.GetArg(3),
97  phones_rspecifier = po.GetArg(4),
98  words_rspecifier = po.GetArg(5),
99  prons_wspecifier = po.GetArg(6);
100 
101  int32 word_start_sym, word_end_sym;
102 
103  if (!ConvertStringToInteger(word_start_sym_str, &word_start_sym)
104  || word_start_sym <= 0)
105  KALDI_ERR << "Invalid word start symbol (expecting integer >= 0): "
106  << word_start_sym_str;
107  if (!ConvertStringToInteger(word_end_sym_str, &word_end_sym)
108  || word_end_sym <= 0 || word_end_sym == word_start_sym)
109  KALDI_ERR << "Invalid word end symbol (expecting integer >= 0"
110  << ", different from word start symbol): "
111  << word_end_sym_str;
112 
113  // L should be lexicon with word start and end symbols marked.
114  VectorFst<StdArc> *L = fst::ReadFstKaldi(lex_fst_filename);
115  {
116  // Make sure that L is sorted on output symbol (words).
117  fst::OLabelCompare<StdArc> olabel_comp;
118  ArcSort(L, olabel_comp);
119  }
120 
121  SequentialInt32VectorReader phones_reader(phones_rspecifier);
122  RandomAccessInt32VectorReader words_reader(words_rspecifier);
123 
124  int32 n_done = 0, n_err = 0;
125 
126  std::string empty;
127  Int32VectorVectorWriter prons_writer(prons_wspecifier);
128 
129  for (; !phones_reader.Done(); phones_reader.Next()) {
130  std::string key = phones_reader.Key();
131  const std::vector<int32> &phones = phones_reader.Value();
132  if (!words_reader.HasKey(key)) {
133  KALDI_WARN << "Not processing utterance " << key << " because no word "
134  << "transcription found.";
135  n_err++;
136  continue;
137  }
138  const std::vector<int32> &words = words_reader.Value(key);
139 
140  // convert word alignment to acceptor and compose it with lexicon.
141  // phn2word will have phones (and word start/end symbols) on its
142  // input, and words on its output. It will enode the alternative
143  // pronunciations of this word-sequence, with word start and end
144  // symbols at the appropriate places.
145  VectorFst<StdArc> phn2word;
146  {
147  VectorFst<StdArc> words_acceptor;
148  MakeLinearAcceptor(words, &words_acceptor);
149  Compose(*L, words_acceptor, &phn2word);
150  }
151  if (phn2word.Start() == fst::kNoStateId) {
152  KALDI_WARN << "Phone to word FST for utterance " << key
153  << "is empty (either decoding for this utterance did "
154  << "not reach end-state, or mismatched lexicon.)";
155  n_err++;
156  continue;
157  }
158 
159  VectorFst<StdArc> phones_alt_fst;
160  CreatePhonesAltFst(phones, word_start_sym, word_end_sym, &phones_alt_fst);
161 
162  // phnx2word will have phones and word-start and word-end symbols
163  // on the input side, and words on the output side.
164  VectorFst<StdArc> phnx2word;
165  Compose(phones_alt_fst, phn2word, &phnx2word);
166 
167  if (phnx2word.Start() == fst::kNoStateId) {
168  KALDI_WARN << "phnx2word FST for utterance " << key
169  << "is empty (either decoding for this utterance did "
170  << "not reach end-state, or mismatched lexicon.)";
171  if (g_kaldi_verbose_level >= 2) {
172  KALDI_LOG << "phn2word FST is below:";
173 #ifdef HAVE_OPENFST_GE_10400
174  fst::FstPrinter<StdArc> fstprinter(phn2word, NULL, NULL, NULL, false, true, "\t");
175 #else
176  fst::FstPrinter<StdArc> fstprinter(phn2word, NULL, NULL, NULL, false, true);
177 #endif
178  fstprinter.Print(&std::cerr, "standard error");
179  KALDI_LOG << "phone sequence is: ";
180  for (size_t i = 0; i < phones.size(); i++)
181  std::cerr << phones[i] << ' ';
182  std::cerr << '\n';
183  }
184  continue;
185  }
186 
187  // Now get the best path in phnx2word.
188  VectorFst<StdArc> phnx2word_best;
189  ShortestPath(phnx2word, &phnx2word_best);
190 
191  // Now get seqs of phones and words.
192  std::vector<int32> phnx, words2;
193  StdArc::Weight garbage;
194  if (!fst::GetLinearSymbolSequence(phnx2word_best,
195  &phnx, &words2, &garbage))
196  KALDI_ERR << "phnx2word is not a linear transducer (code error?)";
197  if (words2 != words)
198  KALDI_ERR << "words have changed! (code error?)";
199 
200  // Now, "phnx" should be the phone sequence with start and end
201  // symbols included. At this point we break it up into segments,
202  // and try to match it up with words.
203  std::vector<std::vector<int32> > prons;
204  if (!ConvertPhnxToProns(phnx, words,
205  word_start_sym, word_end_sym,
206  &prons)) {
207  KALDI_WARN << "Error converting phones and words to prons "
208  << " (mismatched or non-marked lexicon or partial "
209  << " alignment?)";
210  n_err++;
211  continue;
212  }
213  prons_writer.Write(key, prons);
214  n_done++;
215  }
216  KALDI_LOG << "Done " << n_done << " utterances; " << n_err << " had errors.";
217  } catch(const std::exception &e) {
218  std::cerr << e.what();
219  return -1;
220  }
221 }
int32 words[kMaxOrder]
Relabels neural network egs with the read pdf-id alignments.
Definition: chain.dox:20
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
Definition: text-utils.h:114
bool ConvertPhnxToProns(const std::vector< int32 > &phnx, const std::vector< int32 > &words, int32 word_start_sym, int32 word_end_sym, std::vector< std::vector< int32 > > *prons)
Definition: hmm-utils.cc:1035
fst::StdArc StdArc
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:366
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
void MakeLinearAcceptor(const vector< I > &labels, MutableFst< Arc > *ofst)
Creates unweighted linear acceptor from symbol sequence.
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
void CreatePhonesAltFst(const std::vector< int32 > &phones, int32 word_start_sym, int32 word_end_sym, fst::VectorFst< fst::StdArc > *ofst)
#define KALDI_ERR
Definition: kaldi-error.h:127
#define KALDI_WARN
Definition: kaldi-error.h:130
bool GetLinearSymbolSequence(const Fst< Arc > &fst, vector< I > *isymbols_out, vector< I > *osymbols_out, typename Arc::Weight *tot_weight_out)
GetLinearSymbolSequence gets the symbol sequence from a linear FST.
fst::StdArc::Weight Weight
int32 g_kaldi_verbose_level
This is set by util/parse-options.{h, cc} if you set –verbose=? option.
Definition: kaldi-error.cc:40
void ReadFstKaldi(std::istream &is, bool binary, VectorFst< Arc > *fst)
#define KALDI_LOG
Definition: kaldi-error.h:133