align-equal.cc File Reference
Include dependency graph for align-equal.cc:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 Write equally spaced alignments of utterances (to get training started). More...
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

Write equally spaced alignments of utterances (to get training started).

Definition at line 31 of file align-equal.cc.

References TrainingGraphCompiler::CompileGraphFromText(), SequentialTableReader< Holder >::Done(), fst::EqualAlign(), ParseOptions::GetArg(), fst::GetLinearSymbolSequence(), RandomAccessTableReader< Holder >::HasKey(), KALDI_ASSERT, KALDI_ERR, KALDI_LOG, KALDI_WARN, SequentialTableReader< Holder >::Key(), SequentialTableReader< Holder >::Next(), ParseOptions::NumArgs(), ParseOptions::PrintUsage(), ParseOptions::Read(), fst::ReadFstKaldi(), kaldi::ReadIntegerVectorSimple(), kaldi::ReadKaldiObject(), ParseOptions::Register(), RandomAccessTableReader< Holder >::Value(), SequentialTableReader< Holder >::Value(), words, and TableWriter< Holder >::Write().

31  {
32  try {
33  using namespace kaldi;
34  typedef kaldi::int32 int32;
35  using fst::SymbolTable;
36  using fst::VectorFst;
37  using fst::StdArc;
38 
39  const char *usage = "Write equally spaced alignments of utterances "
40  "(to get training started)\n"
41  "Usage: align-equal <tree-in> <model-in> <lexicon-fst-in> "
42  "<features-rspecifier> <transcriptions-rspecifier> <alignments-wspecifier>\n"
43  "e.g.: \n"
44  " align-equal 1.tree 1.mdl lex.fst scp:train.scp "
45  "'ark:sym2int.pl -f 2- words.txt text|' ark:equal.ali\n";
46 
47  ParseOptions po(usage);
48  std::string disambig_rxfilename;
49  po.Register("read-disambig-syms", &disambig_rxfilename, "File containing "
50  "list of disambiguation symbols in phone symbol table");
51  po.Read(argc, argv);
52 
53  if (po.NumArgs() != 6) {
54  po.PrintUsage();
55  exit(1);
56  }
57 
58  std::string tree_in_filename = po.GetArg(1);
59  std::string model_in_filename = po.GetArg(2);
60  std::string lex_in_filename = po.GetArg(3);
61  std::string feature_rspecifier = po.GetArg(4);
62  std::string transcript_rspecifier = po.GetArg(5);
63  std::string alignment_wspecifier = po.GetArg(6);
64 
65  ContextDependency ctx_dep;
66  ReadKaldiObject(tree_in_filename, &ctx_dep);
67 
68  TransitionModel trans_model;
69  ReadKaldiObject(model_in_filename, &trans_model);
70 
71  // need VectorFst because we will change it by adding subseq symbol.
72  VectorFst<StdArc> *lex_fst = fst::ReadFstKaldi(lex_in_filename);
73 
74  TrainingGraphCompilerOptions gc_opts(1.0, true); // true -> Dan style graph.
75 
76  std::vector<int32> disambig_syms;
77  if (disambig_rxfilename != "")
78  if (!ReadIntegerVectorSimple(disambig_rxfilename, &disambig_syms))
79  KALDI_ERR << "fstcomposecontext: Could not read disambiguation symbols from "
80  << disambig_rxfilename;
81 
82  TrainingGraphCompiler gc(trans_model,
83  ctx_dep,
84  lex_fst,
85  disambig_syms,
86  gc_opts);
87 
88  lex_fst = NULL; // we gave ownership to gc.
89 
90 
91  SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
92 
93  RandomAccessInt32VectorReader transcript_reader(transcript_rspecifier);
94 
95  Int32VectorWriter alignment_writer(alignment_wspecifier);
96 
97  int32 done = 0, no_transcript = 0, other_error = 0;
98  for (; !feature_reader.Done(); feature_reader.Next()) {
99  std::string key = feature_reader.Key();
100  if (transcript_reader.HasKey(key)) {
101  const std::vector<int32> &transcript = transcript_reader.Value(key);
102  int32 num_frames = feature_reader.Value().NumRows();
103  if (num_frames == 0) {
104  KALDI_WARN << "Zero-length utterance for key " << key;
105  other_error++;
106  continue;
107  }
108  VectorFst<StdArc> decode_fst;
109  if (!gc.CompileGraphFromText(transcript, &decode_fst)) {
110  KALDI_WARN << "Problem creating decoding graph for utterance "
111  << key <<" [serious error]";
112  other_error++;
113  continue;
114  }
115  VectorFst<StdArc> path;
116  int32 rand_seed = StringHasher()(key); // StringHasher() produces new anonymous
117  // object of type StringHasher; we then call operator () on it, with "key".
118  if (EqualAlign(decode_fst, num_frames, rand_seed, &path) ) {
119  std::vector<int32> aligned_seq, words;
120  StdArc::Weight w;
121  GetLinearSymbolSequence(path, &aligned_seq, &words, &w);
122  KALDI_ASSERT(aligned_seq.size() == num_frames);
123  KALDI_ASSERT(words == transcript);
124  alignment_writer.Write(key, aligned_seq);
125  done++;
126  } else {
127  KALDI_WARN << "AlignEqual: did not align utterence " << key;
128  other_error++;
129  }
130  } else {
131  KALDI_WARN << "AlignEqual: no transcript for utterance " << key;
132  no_transcript++;
133  }
134  }
135  if (done != 0 && no_transcript == 0 && other_error == 0) {
136  KALDI_LOG << "Success: done " << done << " utterances.";
137  } else {
138  KALDI_WARN << "Computed " << done << " alignments; " << no_transcript
139  << " lacked transcripts, " << other_error
140  << " had other errors.";
141  }
142  if (done != 0) return 0;
143  else return 1;
144  } catch(const std::exception &e) {
145  std::cerr << e.what();
146  return -1;
147  }
148 }
int32 words[kMaxOrder]
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
fst::StdArc StdArc
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
A hashing function object for strings.
Definition: stl-utils.h:248
bool GetLinearSymbolSequence(const Fst< Arc > &fst, std::vector< I > *isymbols_out, std::vector< I > *osymbols_out, typename Arc::Weight *tot_weight_out)
GetLinearSymbolSequence gets the symbol sequence from a linear FST.
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
bool EqualAlign(const Fst< Arc > &ifst, typename Arc::StateId length, int rand_seed, MutableFst< Arc > *ofst, int num_retries)
EqualAlign is similar to RandGen, but it generates a sequence with exactly "length" input symbols...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
fst::StdArc::Weight Weight
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void ReadFstKaldi(std::istream &is, bool binary, VectorFst< Arc > *fst)
bool ReadIntegerVectorSimple(const std::string &rxfilename, std::vector< int32 > *list)
ReadFromList attempts to read this list of integers, one per line, from the given file...
#define KALDI_LOG
Definition: kaldi-error.h:153