25 int main(
int argc,
char *argv[]) {
26 using namespace kaldi;
30 "With 3 or 4 arguments, averages iVectors over all the\n" 31 "utterances of each speaker using the spk2utt file.\n" 32 "Input the spk2utt file and a set of iVectors indexed by\n" 33 "utterance; output is iVectors indexed by speaker. If 4\n" 34 "arguments are given, extra argument is a table for the number\n" 35 "of utterances per speaker (can be useful for PLDA). If 2\n" 36 "arguments are given, computes the mean of all input files and\n" 37 "writes out the mean vector.\n" 39 "Usage: ivector-mean <spk2utt-rspecifier> <ivector-rspecifier> " 40 "<ivector-wspecifier> [<num-utt-wspecifier>]\n" 41 "or: ivector-mean <ivector-rspecifier> <mean-wxfilename>\n" 42 "e.g.: ivector-mean data/spk2utt exp/ivectors.ark exp/spk_ivectors.ark exp/spk_num_utts.ark\n" 43 "or: ivector-mean exp/ivectors.ark exp/mean.vec\n" 44 "See also: ivector-subtract-global-mean\n";
47 bool binary_write =
false;
48 po.
Register(
"binary", &binary_write,
"If true, write output in binary " 49 "(only applicable when writing files, not archives/tables.");
60 std::string ivector_rspecifier = po.
GetArg(1),
61 mean_wxfilename = po.
GetArg(2);
65 for (; !ivector_reader.
Done(); ivector_reader.
Next()) {
73 sum.
Scale(1.0 / num_done);
78 std::string spk2utt_rspecifier = po.
GetArg(1),
79 ivector_rspecifier = po.
GetArg(2),
80 ivector_wspecifier = po.
GetArg(3),
83 double spk_sumsq = 0.0;
86 int64 num_spk_done = 0, num_spk_err = 0,
87 num_utt_done = 0, num_utt_err = 0;
94 for (; !spk2utt_reader.
Done(); spk2utt_reader.
Next()) {
95 std::string spk = spk2utt_reader.
Key();
96 const std::vector<std::string> &uttlist = spk2utt_reader.
Value();
97 if (uttlist.empty()) {
98 KALDI_ERR <<
"Speaker with no utterances.";
102 for (
size_t i = 0;
i < uttlist.size();
i++) {
103 std::string utt = uttlist[
i];
104 if (!ivector_reader.
HasKey(utt)) {
105 KALDI_WARN <<
"No iVector present in input for utterance " << utt;
108 if (utt_count == 0) {
109 spk_mean = ivector_reader.
Value(utt);
117 if (utt_count == 0) {
118 KALDI_WARN <<
"Not producing output for speaker " << spk
119 <<
" since no utterances had iVectors";
122 spk_mean.
Scale(1.0 / utt_count);
123 ivector_writer.
Write(spk, spk_mean);
124 if (num_utts_wspecifier !=
"")
125 num_utts_writer.
Write(spk, utt_count);
127 spk_sumsq +=
VecVec(spk_mean, spk_mean);
128 if (spk_sum.
Dim() == 0)
130 spk_sum.
AddVec(1.0, spk_mean);
134 KALDI_LOG <<
"Computed mean of " << num_spk_done <<
" speakers (" 135 << num_spk_err <<
" with no utterances), consisting of " 136 << num_utt_done <<
" utterances (" << num_utt_err
137 <<
" absent from input).";
139 if (num_spk_done != 0) {
140 spk_sumsq /= num_spk_done;
141 spk_sum.
Scale(1.0 / num_spk_done);
142 double mean_length = spk_sum.
Norm(2.0),
143 spk_length = sqrt(spk_sumsq),
144 norm_spk_length = spk_length / sqrt(spk_sum.
Dim());
145 KALDI_LOG <<
"Norm of mean of speakers is " << mean_length
146 <<
", root-mean-square speaker-iVector length divided by " 147 <<
"sqrt(dim) is " << norm_spk_length;
150 return (num_spk_done != 0 ? 0 : 1);
152 }
catch(
const std::exception &e) {
153 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
int main(int argc, char *argv[])
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
A templated class for writing objects to an archive or script file; see The Table concept...
void Resize(MatrixIndexT length, MatrixResizeType resize_type=kSetZero)
Set vector to a specified size (can be zero).
Real Norm(Real p) const
Compute the p-th norm of the vector.
void Write(const std::string &key, const T &value) const
void Register(const std::string &name, bool *ptr, const std::string &doc)
Allows random access to a collection of objects in an archive or script file; see The Table concept...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
const T & Value(const std::string &key)
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void Scale(Real alpha)
Multiplies all elements by this constant.
bool HasKey(const std::string &key)
int NumArgs() const
Number of positional parameters (c.f. argc-1).
void WriteKaldiObject(const C &c, const std::string &filename, bool binary)
Real VecVec(const VectorBase< Real > &a, const VectorBase< Real > &b)
Returns dot product between v1 and v2.
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
std::string GetOptArg(int param) const