All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
phones-to-prons.cc File Reference
#include "base/kaldi-common.h"
#include "hmm/transition-model.h"
#include "hmm/hmm-utils.h"
#include "util/common-utils.h"
#include "fst/fstlib.h"
#include "fstext/fstext-lib.h"
Include dependency graph for phones-to-prons.cc:

Go to the source code of this file.

Functions

void CreatePhonesAltFst (const std::vector< int32 > &phones, int32 word_start_sym, int32 word_end_sym, fst::VectorFst< fst::StdArc > *ofst)
 
int main (int argc, char *argv[])
 

Function Documentation

void CreatePhonesAltFst ( const std::vector< int32 > &  phones,
int32  word_start_sym,
int32  word_end_sym,
fst::VectorFst< fst::StdArc > *  ofst 
)

Definition at line 31 of file phones-to-prons.cc.

References rnnlm::i.

Referenced by main().

34  {
35  using fst::StdArc;
38 
39  ofst->DeleteStates();
40  StateId cur_s = ofst->AddState();
41  ofst->SetStart(cur_s); // will be 0.
42  for (size_t i = 0; i < phones.size(); i++) {
43  StateId next_s = ofst->AddState();
44  // add arc to next state.
45  ofst->AddArc(cur_s, StdArc(phones[i], phones[i], Weight::One(),
46  next_s));
47  cur_s = next_s;
48  }
49  for (StateId s = 0; s <= cur_s; s++) {
50  ofst->AddArc(s, StdArc(word_end_sym, word_end_sym,
51  Weight::One(), s));
52  ofst->AddArc(s, StdArc(word_start_sym, word_start_sym,
53  Weight::One(), s));
54  }
55  ofst->SetFinal(cur_s, Weight::One());
56  {
57  fst::OLabelCompare<StdArc> olabel_comp;
58  ArcSort(ofst, olabel_comp);
59  }
60 }
fst::StdArc::StateId StateId
fst::StdArc StdArc
fst::StdArc::Weight Weight
int main ( int  argc,
char *  argv[] 
)

Definition at line 62 of file phones-to-prons.cc.

References kaldi::ConvertPhnxToProns(), kaldi::ConvertStringToInteger(), CreatePhonesAltFst(), SequentialTableReader< Holder >::Done(), kaldi::g_kaldi_verbose_level, ParseOptions::GetArg(), fst::GetLinearSymbolSequence(), RandomAccessTableReader< Holder >::HasKey(), rnnlm::i, KALDI_ERR, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), fst::MakeLinearAcceptor(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), ParseOptions::PrintUsage(), ParseOptions::Read(), fst::ReadFstKaldi(), RandomAccessTableReader< Holder >::Value(), SequentialTableReader< Holder >::Value(), words, and TableWriter< Holder >::Write().

62  {
63  using namespace kaldi;
64  using fst::VectorFst;
65  using fst::StdArc;
66  typedef kaldi::int32 int32;
67  try {
68  const char *usage =
69  "Convert pairs of (phone-level, word-level) transcriptions to\n"
70  "output that indicates the phones assigned to each word.\n"
71  "Format is standard format for archives of vector<vector<int32> >\n"
72  "i.e. :\n"
73  "utt-id 600 4 7 19 ; 512 4 18 ; 0 1\n"
74  "where 600, 512 and 0 are the word-ids (0 for non-word phones, e.g.\n"
75  "optional-silence introduced by the lexicon), and the phone-ids\n"
76  "follow the word-ids.\n"
77  "Note: L_align.fst must have word-start and word-end symbols in it\n"
78  "\n"
79  "Usage: phones-to-prons [options] <L_align.fst> <word-start-sym> "
80  "<word-end-sym> <phones-rspecifier> <words-rspecifier> <prons-wspecifier>\n"
81  "e.g.: \n"
82  " ali-to-phones 1.mdl ark:1.ali ark:- | \\\n"
83  " phones-to-prons L_align.fst 46 47 ark:- 1.tra ark:1.prons\n";
84 
85  ParseOptions po(usage);
86  po.Read(argc, argv);
87 
88  if (po.NumArgs() != 6) {
89  po.PrintUsage();
90  exit(1);
91  }
92 
93 
94  std::string lex_fst_filename = po.GetArg(1),
95  word_start_sym_str = po.GetArg(2),
96  word_end_sym_str = po.GetArg(3),
97  phones_rspecifier = po.GetArg(4),
98  words_rspecifier = po.GetArg(5),
99  prons_wspecifier = po.GetArg(6);
100 
101  int32 word_start_sym, word_end_sym;
102 
103  if (!ConvertStringToInteger(word_start_sym_str, &word_start_sym)
104  || word_start_sym <= 0)
105  KALDI_ERR << "Invalid word start symbol (expecting integer >= 0): "
106  << word_start_sym_str;
107  if (!ConvertStringToInteger(word_end_sym_str, &word_end_sym)
108  || word_end_sym <= 0 || word_end_sym == word_start_sym)
109  KALDI_ERR << "Invalid word end symbol (expecting integer >= 0"
110  << ", different from word start symbol): "
111  << word_end_sym_str;
112 
113  // L should be lexicon with word start and end symbols marked.
114  VectorFst<StdArc> *L = fst::ReadFstKaldi(lex_fst_filename);
115  {
116  // Make sure that L is sorted on output symbol (words).
117  fst::OLabelCompare<StdArc> olabel_comp;
118  ArcSort(L, olabel_comp);
119  }
120 
121  SequentialInt32VectorReader phones_reader(phones_rspecifier);
122  RandomAccessInt32VectorReader words_reader(words_rspecifier);
123 
124  int32 n_done = 0, n_err = 0;
125 
126  std::string empty;
127  Int32VectorVectorWriter prons_writer(prons_wspecifier);
128 
129  for (; !phones_reader.Done(); phones_reader.Next()) {
130  std::string key = phones_reader.Key();
131  const std::vector<int32> &phones = phones_reader.Value();
132  if (!words_reader.HasKey(key)) {
133  KALDI_WARN << "Not processing utterance " << key << " because no word "
134  << "transcription found.";
135  n_err++;
136  continue;
137  }
138  const std::vector<int32> &words = words_reader.Value(key);
139 
140  // convert word alignment to acceptor and compose it with lexicon.
141  // phn2word will have phones (and word start/end symbols) on its
142  // input, and words on its output. It will enode the alternative
143  // pronunciations of this word-sequence, with word start and end
144  // symbols at the appropriate places.
145  VectorFst<StdArc> phn2word;
146  {
147  VectorFst<StdArc> words_acceptor;
148  MakeLinearAcceptor(words, &words_acceptor);
149  Compose(*L, words_acceptor, &phn2word);
150  }
151  if (phn2word.Start() == fst::kNoStateId) {
152  KALDI_WARN << "Phone to word FST for utterance " << key
153  << "is empty (either decoding for this utterance did "
154  << "not reach end-state, or mismatched lexicon.)";
155  n_err++;
156  continue;
157  }
158 
159  VectorFst<StdArc> phones_alt_fst;
160  CreatePhonesAltFst(phones, word_start_sym, word_end_sym, &phones_alt_fst);
161 
162  // phnx2word will have phones and word-start and word-end symbols
163  // on the input side, and words on the output side.
164  VectorFst<StdArc> phnx2word;
165  Compose(phones_alt_fst, phn2word, &phnx2word);
166 
167  if (phnx2word.Start() == fst::kNoStateId) {
168  KALDI_WARN << "phnx2word FST for utterance " << key
169  << "is empty (either decoding for this utterance did "
170  << "not reach end-state, or mismatched lexicon.)";
171  if (g_kaldi_verbose_level >= 2) {
172  KALDI_LOG << "phn2word FST is below:";
173  fst::FstPrinter<StdArc> fstprinter(phn2word, NULL, NULL, NULL, false, true, "\t");
174  fstprinter.Print(&std::cerr, "standard error");
175  KALDI_LOG << "phone sequence is: ";
176  for (size_t i = 0; i < phones.size(); i++)
177  std::cerr << phones[i] << ' ';
178  std::cerr << '\n';
179  }
180  continue;
181  }
182 
183  // Now get the best path in phnx2word.
184  VectorFst<StdArc> phnx2word_best;
185  ShortestPath(phnx2word, &phnx2word_best);
186 
187  // Now get seqs of phones and words.
188  std::vector<int32> phnx, words2;
189  StdArc::Weight garbage;
190  if (!fst::GetLinearSymbolSequence(phnx2word_best,
191  &phnx, &words2, &garbage))
192  KALDI_ERR << "phnx2word is not a linear transducer (code error?)";
193  if (words2 != words)
194  KALDI_ERR << "words have changed! (code error?)";
195 
196  // Now, "phnx" should be the phone sequence with start and end
197  // symbols included. At this point we break it up into segments,
198  // and try to match it up with words.
199  std::vector<std::vector<int32> > prons;
200  if (!ConvertPhnxToProns(phnx, words,
201  word_start_sym, word_end_sym,
202  &prons)) {
203  KALDI_WARN << "Error converting phones and words to prons "
204  << " (mismatched or non-marked lexicon or partial "
205  << " alignment?)";
206  n_err++;
207  continue;
208  }
209  prons_writer.Write(key, prons);
210  n_done++;
211  }
212  KALDI_LOG << "Done " << n_done << " utterances; " << n_err << " had errors.";
213  } catch(const std::exception &e) {
214  std::cerr << e.what();
215  return -1;
216  }
217 }
int32 words[kMaxOrder]
Relabels neural network egs with the read pdf-id alignments.
Definition: chain.dox:20
bool ConvertStringToInteger(const std::string &str, Int *out)
Converts a string into an integer via strtoll and returns false if there was any kind of problem (i...
Definition: text-utils.h:114
bool ConvertPhnxToProns(const std::vector< int32 > &phnx, const std::vector< int32 > &words, int32 word_start_sym, int32 word_end_sym, std::vector< std::vector< int32 > > *prons)
Definition: hmm-utils.cc:1115
fst::StdArc StdArc
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:366
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
void MakeLinearAcceptor(const vector< I > &labels, MutableFst< Arc > *ofst)
Creates unweighted linear acceptor from symbol sequence.
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
void CreatePhonesAltFst(const std::vector< int32 > &phones, int32 word_start_sym, int32 word_end_sym, fst::VectorFst< fst::StdArc > *ofst)
#define KALDI_ERR
Definition: kaldi-error.h:127
#define KALDI_WARN
Definition: kaldi-error.h:130
bool GetLinearSymbolSequence(const Fst< Arc > &fst, vector< I > *isymbols_out, vector< I > *osymbols_out, typename Arc::Weight *tot_weight_out)
GetLinearSymbolSequence gets the symbol sequence from a linear FST.
fst::StdArc::Weight Weight
int32 g_kaldi_verbose_level
This is set by util/parse-options.{h, cc} if you set –verbose=? option.
Definition: kaldi-error.cc:40
void ReadFstKaldi(std::istream &is, bool binary, VectorFst< Arc > *fst)
#define KALDI_LOG
Definition: kaldi-error.h:133