fsts-concat.cc
Go to the documentation of this file.
1 // fstbin/fsts-concat.cc
2 
3 // Copyright 2016 Johns Hopkins University (Authors: Jan "Yenda" Trmal)
4 // 2018 Soapbox Labs (Author: Karel Vesely)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 
22 #include "base/kaldi-common.h"
23 #include "util/common-utils.h"
24 #include "fstext/fstext-utils.h"
25 #include "fstext/kaldi-fst-io.h"
26 
27 
28 int main(int argc, char *argv[]) {
29  try {
30  using namespace kaldi;
31  using namespace fst;
32  typedef kaldi::int32 int32;
33  typedef kaldi::uint64 uint64;
34 
35  const char *usage =
36  "Reads kaldi archives with FSTs. Concatenates the fsts from all the rspecifiers.\n"
37  "The fsts to concatenate must have same key. The sequencing is given by the position of arguments.\n"
38  "\n"
39  "Usage: fsts-concat [options] <fsts-rspecifier1> <fsts-rspecifier2> ... <fsts-wspecifier>\n"
40  " e.g.: fsts-concat scp:fsts1.scp scp:fsts2.scp ... ark:fsts_out.ark\n"
41  "\n"
42  "see also: fstconcat (from the OpenFst toolkit)\n";
43 
44  ParseOptions po(usage);
45 
46  po.Read(argc, argv);
47 
48  if (po.NumArgs() < 3) {
49  po.PrintUsage();
50  exit(1);
51  }
52 
53  std::string fsts_rspecifier = po.GetArg(1),
54  fsts_wspecifier = po.GetArg(po.NumArgs());
55 
56  SequentialTableReader<VectorFstHolder> fst_reader(fsts_rspecifier);
57  std::vector<RandomAccessTableReader<VectorFstHolder>*> fst_readers;
58  TableWriter<VectorFstHolder> fst_writer(fsts_wspecifier);
59 
60  for (int32 i = 2; i < po.NumArgs(); i++)
61  fst_readers.push_back(new RandomAccessTableReader<VectorFstHolder>(po.GetArg(i)));
62  const int32 num_fst_readers = fst_readers.size();
63 
64  int32 n_done = 0,
65  n_skipped = 0;
66 
67  for (; !fst_reader.Done(); fst_reader.Next()) {
68  std::string key = fst_reader.Key();
69 
70  // Check that the key exists in all 'fst_readers'.
71  bool skip_key = false;
72  for (int32 i = 0; i < num_fst_readers; i++) {
73  if (!fst_readers[i]->HasKey(key)) {
74  KALDI_WARN << "Skipping '" << key << "'"
75  << " due to missing the fst in " << (i+2) << "th <rspecifier> : "
76  << "'" << po.GetArg(i+2) << "'";
77  skip_key = true;
78  }
79  }
80  if (skip_key) {
81  n_skipped++;
82  continue;
83  }
84 
85  // Concatenate!
86  VectorFst<StdArc> fst_out = fst_readers.back()->Value(key);
87  // Loop from (last-1) to first, as 'prepending' the fsts is faster,
88  // see: http://www.openfst.org/twiki/bin/view/FST/ConcatDoc
89  for (int32 i = num_fst_readers-2; i >= 0; i--) {
90  fst::Concat(fst_readers[i]->Value(key), &fst_out);
91  }
92  // Finally, prepend the fst from the 'Sequential' reader.
93  fst::Concat(fst_reader.Value(), &fst_out);
94 
95  // Write the output.
96  fst_writer.Write(key, fst_out);
97  n_done++;
98  }
99 
100  // Cleanup.
101  for (int32 i = 0; i < num_fst_readers; i++)
102  delete fst_readers[i];
103  fst_readers.clear();
104 
105  KALDI_LOG << "Produced " << n_done << " FSTs by concatenating " << po.NumArgs()-1
106  << " streams " << "(" << n_skipped << " keys skipped).";
107  return (n_done != 0 ? 0 : 1);
108  } catch(const std::exception &e) {
109  std::cerr << e.what();
110  return -1;
111  }
112 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
For an extended explanation of the framework of which grammar-fsts are a part, please see Support for...
Definition: graph.dox:21
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Write(const std::string &key, const T &value) const
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
int main(int argc, char *argv[])
Definition: fsts-concat.cc:28
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_LOG
Definition: kaldi-error.h:153