526 using namespace kaldi;
529 typedef kaldi::int64 int64;
532 "Propagate features through an xvector neural network model and write\n" 533 "the output vectors. \"Xvector\" is our term for a vector or\n" 534 "embedding which is the output of a particular type of neural network\n" 535 "architecture found in speaker recognition. This architecture\n" 536 "consists of several layers that operate on frames, a statistics\n" 537 "pooling layer that aggregates over the frame-level representations\n" 538 "and possibly additional layers that operate on segment-level\n" 539 "representations. The xvectors are generally extracted from an\n" 540 "output layer after the statistics pooling layer. By default, one\n" 541 "xvector is extracted directly from the set of features for each\n" 542 "utterance. Optionally, xvectors are extracted from chunks of input\n" 543 "features and averaged, to produce a single vector.\n" 545 "Usage: nnet3-xvector-compute [options] <raw-nnet-in> " 546 "<features-rspecifier> <vector-wspecifier>\n" 547 "e.g.: nnet3-xvector-compute final.raw scp:feats.scp " 548 "ark:nnet_prediction.ark\n" 549 "See also: nnet3-compute\n";
556 std::string use_gpu =
"no";
560 po.Register(
"use-gpu", &use_gpu,
561 "yes|no|optional|wait, only has effect if compiled with CUDA");
564 CuDevice::RegisterDeviceOptions(&po);
568 if (po.NumArgs() != 3) {
574 CuDevice::Instantiate().SelectGpuId(use_gpu);
577 std::string nnet_rxfilename = po.GetArg(1),
578 feature_rspecifier = po.GetArg(2),
579 vector_wspecifier = po.GetArg(3);
589 int32 left_context, right_context;
597 KALDI_LOG <<
"Left/right context is " << left_context <<
", " 600 total_context = left_context + right_context;
606 int32 num_utts_read = 0, num_xvectors_written = 0;
607 int64 frame_count = 0;
611 for (; !feature_reader.Done(); feature_reader.Next()) {
612 std::string utt = feature_reader.Key();
614 if (features.NumRows() == 0) {
615 KALDI_WARN <<
"Zero-length utterance: " << utt;
619 frame_count += features.NumRows();
621 computer.AcceptUtterance(utt, features);
624 while (computer.XvectorReady()) {
627 computer.OutputXvector(&utt, &xvector);
628 vector_writer.
Write(utt, xvector);
629 num_xvectors_written++;
634 while (computer.XvectorReady()) {
637 computer.OutputXvector(&utt, &xvector);
638 vector_writer.
Write(utt, xvector);
639 num_xvectors_written++;
644 CuDevice::Instantiate().PrintProfile();
646 double elapsed = timer.
Elapsed();
648 <<
"s: real-time factor assuming 100 frames/sec is " 649 << (elapsed*100.0/frame_count);
650 KALDI_LOG <<
"Read " << num_utts_read <<
" utterances, wrote " 651 << num_xvectors_written <<
" xvectors.";
655 if (num_xvectors_written > num_utts_read / 2)
659 }
catch(
const std::exception &e) {
660 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void CollapseModel(const CollapseModelConfig &config, Nnet *nnet)
This function modifies the neural net for efficiency, in a way that suitable to be done in test time...
void Write(std::ostream &Out, bool binary) const
Writes to C++ stream (option to write in binary).
void SetBatchnormTestMode(bool test_mode, Nnet *nnet)
This function affects only components of type BatchNormComponent.
A templated class for writing objects to an archive or script file; see The Table concept...
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
void SetDropoutTestMode(bool test_mode, Nnet *nnet)
This function affects components of child-classes of RandomComponent.
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void ComputeSimpleNnetContext(const Nnet &nnet, int32 *left_context, int32 *right_context)
ComputeSimpleNnetContext computes the left-context and right-context of a nnet.
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
void Register(OptionsItf *po)
A class representing a vector.
void SetRequireDirectInput(bool b, Nnet *nnet)
Calls the corresponding function in any component of type StatisticsPoolingComponent; used as a way t...
double Elapsed() const
Returns time in seconds.
Config class for the CollapseModel function.