show-alignments.cc
Go to the documentation of this file.
1 // bin/show-alignments.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #include "base/kaldi-common.h"
22 #include "hmm/transition-model.h"
23 #include "hmm/hmm-utils.h"
24 #include "util/common-utils.h"
25 #include "fst/fstlib.h"
26 
27 int main(int argc, char *argv[]) {
28  using namespace kaldi;
29  typedef kaldi::int32 int32;
30  try {
31  const char *usage =
32  "Display alignments in human-readable form\n"
33  "Usage: show-alignments [options] <phone-syms> <model> <alignments-rspecifier>\n"
34  "e.g.: \n"
35  " show-alignments phones.txt 1.mdl ark:1.ali\n"
36  "See also: ali-to-phones, copy-int-vector\n";
37  ParseOptions po(usage);
38 
39  po.Read(argc, argv);
40 
41  if (po.NumArgs() != 3) {
42  po.PrintUsage();
43  exit(1);
44  }
45 
46  std::string phones_symtab_filename = po.GetArg(1),
47  model_filename = po.GetArg(2),
48  alignments_rspecifier = po.GetArg(3);
49 
50  TransitionModel trans_model;
51  ReadKaldiObject(model_filename, &trans_model);
52 
53  fst::SymbolTable *phones_symtab = NULL;
54  {
55  std::ifstream is(phones_symtab_filename.c_str());
56  phones_symtab = fst::SymbolTable::ReadText(is, phones_symtab_filename);
57  if (!phones_symtab || phones_symtab->NumSymbols() == 0)
58  KALDI_ERR << "Error opening symbol table file "<<phones_symtab_filename;
59  }
60 
61 
62  SequentialInt32VectorReader reader(alignments_rspecifier);
63 
64  for (; !reader.Done(); reader.Next()) {
65  std::string key = reader.Key();
66  const std::vector<int32> &alignment = reader.Value();
67 
68  std::vector<std::vector<int32> > split;
69  SplitToPhones(trans_model, alignment, &split);
70 
71  // split_str is the numerical form of the alignments..
72  std::vector<std::string> split_str(split.size());
73  std::vector<std::string> split_str_phones(split.size());
74  for (size_t i = 0; i < split.size(); i++) {
75  std::ostringstream ss;
76  ss << "[ ";
77  for (size_t j = 0; j < split[i].size(); j++)
78  ss << split[i][j] << " ";
79  ss << "] ";
80  split_str[i] = ss.str();
81 
82  int32 tid = split[i][0],
83  tstate = trans_model.TransitionIdToTransitionState(tid),
84  phone = trans_model.TransitionStateToPhone(tstate);
85  split_str_phones[i] =
86  phones_symtab->Find(phone) + " ";
87  std::string space;
88  int len = abs(static_cast<int>(split_str[i].size())-
89  static_cast<int>(split_str_phones[i].size()));
90  for (int j = 0; j < len; j++)
91  space += " ";
92  if (split_str[i].size() < split_str_phones[i].size())
93  split_str[i] += space;
94  else
95  split_str_phones[i] += space;
96  }
97  std::cout << key << " ";
98  for (size_t i = 0; i < split_str.size(); i++)
99  std::cout << split_str[i];
100  std::cout << '\n';
101  std::cout << key << " ";
102  for (size_t i = 0; i < split_str_phones.size(); i++)
103  std::cout << split_str_phones[i];
104  std::cout << '\n';
105  std::cout << '\n';
106  }
107  delete phones_symtab;
108  phones_symtab = NULL;
109  } catch(const std::exception &e) {
110  std::cerr << e.what();
111  return -1;
112  }
113 }
114 
115 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int main(int argc, char *argv[])
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
kaldi::int32 int32
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
bool SplitToPhones(const TransitionModel &trans_model, const std::vector< int32 > &alignment, std::vector< std::vector< int32 > > *split_alignment)
SplitToPhones splits up the TransitionIds in "alignment" into their individual phones (one vector per...
Definition: hmm-utils.cc:723
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
int32 TransitionIdToTransitionState(int32 trans_id) const
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int32 TransitionStateToPhone(int32 trans_state) const
int NumArgs() const
Number of positional parameters (c.f. argc-1).