26 #include "fst/fstlib.h" 28 int main(
int argc,
char *argv[]) {
29 using namespace kaldi;
33 "Convert model-level alignments to phone-sequences (in integer, " 35 "Usage: ali-to-phones [options] <model> <alignments-rspecifier> " 36 "<phone-transcript-wspecifier|ctm-wxfilename>\n" 38 " ali-to-phones 1.mdl ark:1.ali ark:-\n" 40 " ali-to-phones --ctm-output 1.mdl ark:1.ali 1.ctm\n" 41 "See also: show-alignments lattice-align-phones, compare-int-vector\n";
43 bool per_frame =
false;
44 bool write_lengths =
false;
45 bool ctm_output =
false;
47 po.
Register(
"ctm-output", &ctm_output,
48 "If true, output the alignments in ctm format " 49 "(the confidences will be set to 1)");
50 po.
Register(
"frame-shift", &frame_shift,
51 "frame shift used to control the times of the ctm output");
53 "If true, write out the frame-level phone alignment " 54 "(else phone sequence)");
55 po.
Register(
"write-lengths", &write_lengths,
56 "If true, write the #frames for each phone (different format)");
61 KALDI_ASSERT(!(per_frame && write_lengths) &&
"Incompatible options.");
68 std::string model_filename = po.
GetArg(1),
69 alignments_rspecifier = po.
GetArg(2);
77 (write_lengths ? empty : po.
GetArg(3)));
79 (write_lengths ? po.
GetArg(3) : empty));
81 std::string ctm_wxfilename(ctm_output ? po.
GetArg(3) : empty);
82 Output ctm_writer(ctm_wxfilename,
false);
84 ctm_writer.Stream() << std::fixed;
85 ctm_writer.Stream().precision(frame_shift >= 0.01 ? 2 : 3);
90 for (; !reader.
Done(); reader.
Next()) {
91 std::string key = reader.
Key();
92 const std::vector<int32> &alignment = reader.
Value();
94 std::vector<std::vector<int32> > split;
99 for (
size_t i = 0;
i < split.size();
i++) {
102 int32 num_repeats = split[
i].size();
103 ctm_writer.Stream() << key <<
" 1 " << phone_start <<
" " 104 << (frame_shift * num_repeats) <<
" " << phone << std::endl;
105 phone_start += frame_shift * num_repeats;
107 }
else if (!write_lengths) {
108 std::vector<int32> phones;
109 for (
size_t i = 0;
i < split.size();
i++) {
112 int32 num_repeats = split[
i].size();
115 for(int32
j = 0;
j < num_repeats;
j++)
116 phones.push_back(phone);
118 phones.push_back(phone);
120 phones_writer.
Write(key, phones);
122 std::vector<std::pair<int32, int32> > pairs;
123 for (
size_t i = 0;
i < split.size();
i++) {
126 int32 num_repeats = split[
i].size();
128 pairs.push_back(std::make_pair(phone, num_repeats));
130 pair_writer.
Write(key, pairs);
134 KALDI_LOG <<
"Done " << n_done <<
" utterances.";
135 }
catch(
const std::exception &e) {
136 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
bool SplitToPhones(const TransitionModel &trans_model, const std::vector< int32 > &alignment, std::vector< std::vector< int32 > > *split_alignment)
SplitToPhones splits up the TransitionIds in "alignment" into their individual phones (one vector per...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
int NumArgs() const
Number of positional parameters (c.f. argc-1).
#define KALDI_ASSERT(cond)
int main(int argc, char *argv[])
int32 TransitionIdToPhone(int32 trans_id) const