align-text.cc
Go to the documentation of this file.
1 // bin/align-text.cc
2 
3 // Copyright 2014 Guoguo Chen
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 #include "util/common-utils.h"
21 #include "util/parse-options.h"
22 #include "util/edit-distance.h"
23 #include <algorithm>
24 
25 bool IsNotToken(const std::string &token) {
26  return ! kaldi::IsToken(token);
27 }
28 
29 int main(int argc, char *argv[]) {
30  using namespace kaldi;
31  typedef kaldi::int32 int32;
32 
33  try {
34  const char *usage =
35  "Computes alignment between two sentences with the same key in the\n"
36  "two given input text-rspecifiers. The current implementation uses\n"
37  "Levenshtein distance as the distance metric.\n"
38  "\n"
39  "The input text file looks like follows:\n"
40  " key1 a b c\n"
41  " key2 d e\n"
42  "\n"
43  "The output alignment file looks like follows:\n"
44  " key1 a a ; b <eps> ; c c \n"
45  " key2 d f ; e e \n"
46  "where the aligned pairs are separated by \";\"\n"
47  "\n"
48  "Usage: align-text [options] <text1-rspecifier> <text2-rspecifier> \\\n"
49  " <alignment-wspecifier>\n"
50  " e.g.: align-text ark:text1.txt ark:text2.txt ark,t:alignment.txt\n"
51  "See also: compute-wer,\n"
52  "Example scoring script: egs/wsj/s5/steps/score_kaldi.sh\n";
53 
54  ParseOptions po(usage);
55 
56  std::string special_symbol = "<eps>";
57  std::string separator = ";";
58  po.Register("special-symbol", &special_symbol, "Special symbol to be "
59  "aligned with the inserted or deleted words. Your sentences "
60  "should not contain this symbol.");
61  po.Register("separator", &separator, "Separator for each aligned pair in "
62  "the output alignment file. Note: it should not be necessary "
63  "to change this even if your sentences contain ';', because "
64  "to parse the output of this program you can just split on "
65  "space and then assert that every third token is ';'.");
66 
67  po.Read(argc, argv);
68 
69  if (po.NumArgs() != 3) {
70  po.PrintUsage();
71  exit(1);
72  }
73 
74  std::string text1_rspecifier = po.GetArg(1),
75  text2_rspecifier = po.GetArg(2),
76  align_wspecifier = po.GetArg(3);
77 
78  SequentialTokenVectorReader text1_reader(text1_rspecifier);
79  RandomAccessTokenVectorReader text2_reader(text2_rspecifier);
80  TokenVectorWriter align_writer(align_wspecifier);
81 
82  int32 n_done = 0;
83  int32 n_fail = 0;
84  for (; !text1_reader.Done(); text1_reader.Next()) {
85  std::string key = text1_reader.Key();
86 
87  if (!text2_reader.HasKey(key)) {
88  KALDI_WARN << "Key " << key << " is in " << text1_rspecifier
89  << ", but not in " << text2_rspecifier;
90  n_fail++;
91  continue;
92  }
93  const std::vector<std::string> &text1 = text1_reader.Value();
94  const std::vector<std::string> &text2 = text2_reader.Value(key);
95 
96  if (std::find_if(text1.begin(), text1.end(), IsNotToken) != text1.end()) {
97  KALDI_ERR << "In text1, the utterance " << key
98  << " contains unprintable characters. That means there is"
99  << " a problem with the text (such as incorrect encoding).";
100  }
101  if (std::find_if(text2.begin(), text2.end(), IsNotToken) != text2.end()) {
102  KALDI_ERR << "In text2, the utterance " << key
103  << " contains unprintable characters. That means there is"
104  << " a problem with the text (such as incorrect encoding).";
105  }
106 
107  // Verify that the special symbol is not in the string.
108  if (std::find(text1.begin(), text1.end(), special_symbol) != text1.end()){
109  KALDI_ERR << "In text1, the utterance " << key
110  << " contains the special symbol '" << special_symbol
111  << "'. This is not allowed.";
112  }
113  if (std::find(text2.begin(), text2.end(), special_symbol) != text2.end()){
114  KALDI_ERR << "In text2, the utterance " << key
115  << " contains the special symbol '" << special_symbol
116  << "'. This is not allowed.";
117  }
118 
119  std::vector<std::pair<std::string, std::string> > aligned;
120  LevenshteinAlignment(text1, text2, special_symbol, &aligned);
121 
122  std::vector<std::string> token_vec;
123  std::vector<std::pair<std::string, std::string> >::const_iterator iter;
124  for (iter = aligned.begin(); iter != aligned.end(); ++iter) {
125  token_vec.push_back(iter->first);
126  token_vec.push_back(iter->second);
127  if (aligned.end() - iter != 1)
128  token_vec.push_back(separator);
129  }
130  align_writer.Write(key, token_vec);
131 
132  n_done++;
133  }
134 
135  KALDI_LOG << "Done " << n_done << " sentences, failed for " << n_fail;
136  return (n_done != 0 ? 0 : 1);
137  } catch(const std::exception &e) {
138  std::cerr << e.what();
139  return -1;
140  }
141 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int main(int argc, char *argv[])
Definition: align-text.cc:29
int32 LevenshteinAlignment(const std::vector< T > &a, const std::vector< T > &b, T eps_symbol, std::vector< std::pair< T, T > > *output)
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
bool IsToken(const std::string &token)
Returns true if "token" is nonempty, and all characters are printable and whitespace-free.
Definition: text-utils.cc:105
Allows random access to a collection of objects in an archive or script file; see The Table concept...
Definition: kaldi-table.h:233
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
const T & Value(const std::string &key)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
bool HasKey(const std::string &key)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_LOG
Definition: kaldi-error.h:153
bool IsNotToken(const std::string &token)
Definition: align-text.cc:25