526     using namespace kaldi;
   529     typedef kaldi::int64 int64;
   532         "Propagate features through an xvector neural network model and write\n"   533         "the output vectors.  \"Xvector\" is our term for a vector or\n"   534         "embedding which is the output of a particular type of neural network\n"   535         "architecture found in speaker recognition.  This architecture\n"   536         "consists of several layers that operate on frames, a statistics\n"   537         "pooling layer that aggregates over the frame-level representations\n"   538         "and possibly additional layers that operate on segment-level\n"   539         "representations.  The xvectors are generally extracted from an\n"   540         "output layer after the statistics pooling layer.  By default, one\n"   541         "xvector is extracted directly from the set of features for each\n"   542         "utterance.  Optionally, xvectors are extracted from chunks of input\n"   543         "features and averaged, to produce a single vector.\n"   545         "Usage: nnet3-xvector-compute [options] <raw-nnet-in> "   546         "<features-rspecifier> <vector-wspecifier>\n"   547         "e.g.: nnet3-xvector-compute final.raw scp:feats.scp "   548         "ark:nnet_prediction.ark\n"   549         "See also: nnet3-compute\n";
   556     std::string use_gpu = 
"no";
   560     po.Register(
"use-gpu", &use_gpu,
   561       "yes|no|optional|wait, only has effect if compiled with CUDA");
   564     CuDevice::RegisterDeviceOptions(&po);
   568     if (po.NumArgs() != 3) {
   574     CuDevice::Instantiate().SelectGpuId(use_gpu);
   577     std::string nnet_rxfilename = po.GetArg(1),
   578                 feature_rspecifier = po.GetArg(2),
   579                 vector_wspecifier = po.GetArg(3);
   589       int32 left_context, right_context;
   597       KALDI_LOG << 
"Left/right context is " << left_context << 
", "   600       total_context = left_context + right_context;
   606     int32 num_utts_read = 0, num_xvectors_written = 0;
   607     int64 frame_count = 0;
   611     for (; !feature_reader.Done(); feature_reader.Next()) {
   612       std::string utt = feature_reader.Key();
   614       if (features.NumRows() == 0) {
   615         KALDI_WARN << 
"Zero-length utterance: " << utt;
   619       frame_count += features.NumRows();
   621       computer.AcceptUtterance(utt, features);
   624       while (computer.XvectorReady()) {
   627         computer.OutputXvector(&utt, &xvector);
   628         vector_writer.
Write(utt, xvector);
   629         num_xvectors_written++;
   634     while (computer.XvectorReady()) {
   637       computer.OutputXvector(&utt, &xvector);
   638       vector_writer.
Write(utt, xvector);
   639       num_xvectors_written++;
   644     CuDevice::Instantiate().PrintProfile();
   646     double elapsed = timer.
Elapsed();
   648               << 
"s: real-time factor assuming 100 frames/sec is "   649               << (elapsed*100.0/frame_count);
   650     KALDI_LOG << 
"Read " << num_utts_read << 
" utterances, wrote "   651               << num_xvectors_written << 
" xvectors.";
   655     if (num_xvectors_written > num_utts_read / 2)
   659   } 
catch(
const std::exception &e) {
   660     std::cerr << e.what();
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
void CollapseModel(const CollapseModelConfig &config, Nnet *nnet)
This function modifies the neural net for efficiency, in a way that suitable to be done in test time...
 
void Write(std::ostream &Out, bool binary) const
Writes to C++ stream (option to write in binary). 
 
void SetBatchnormTestMode(bool test_mode, Nnet *nnet)
This function affects only components of type BatchNormComponent. 
 
A templated class for writing objects to an archive or script file; see The Table concept...
 
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
 
void SetDropoutTestMode(bool test_mode, Nnet *nnet)
This function affects components of child-classes of RandomComponent. 
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
void ComputeSimpleNnetContext(const Nnet &nnet, int32 *left_context, int32 *right_context)
ComputeSimpleNnetContext computes the left-context and right-context of a nnet. 
 
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
 
void Register(OptionsItf *po)
 
A class representing a vector. 
 
void SetRequireDirectInput(bool b, Nnet *nnet)
Calls the corresponding function in any component of type StatisticsPoolingComponent; used as a way t...
 
double Elapsed() const
Returns time in seconds. 
 
Config class for the CollapseModel function.