160   using namespace kaldi;
   165       "Compute a bootstrapping of WER to extract the 95% confidence interval.\n"   166       "Take a reference and a transcription file, in integer or text format,\n"   167       "and outputs overall WER statistics to standard output along with its\n"   168       "confidence interval using the bootstrap method of Bisani and Ney.\n"   169       "If a second transcription file corresponding to the same reference is\n"   170       "provided, a bootstrap comparison of the two transcription is performed\n"   171       "to estimate the probability of improvement.\n"   173       "Usage: compute-wer-bootci [options] <ref-rspecifier> <hyp-rspecifier> [<hyp2-rspecifier>]\n"   174       "E.g.: compute-wer-bootci --mode=present ark:data/train/text ark:hyp_text\n"   175       "or compute-wer-bootci ark:data/train/text ark:hyp_text ark:hyp_text2\n"   176       "See also: compute-wer\n";
   180     std::string mode = 
"strict";
   181     po.Register(
"mode", &mode,
   182                 "Scoring mode: \"present\"|\"all\"|\"strict\":\n"   183                 "  \"present\" means score those we have transcriptions for\n"   184                 "  \"all\" means treat absent transcriptions as empty\n"   185                 "  \"strict\" means die if all in ref not also in hyp");
   187     int32 replications = 10000;
   188     po.Register(
"replications", &replications,
   189             "Number of replications to compute the intervals");
   193     if (po.NumArgs() < 2 || po.NumArgs() > 3) {
   198     std::string ref_rspecifier = po.GetArg(1);
   199     std::string hyp_rspecifier = po.GetArg(2);
   200     std::string hyp2_rspecifier = (po.NumArgs() == 3?po.GetArg(3):
"");
   202     if (mode != 
"strict" && mode != 
"present" && mode != 
"all") {
   204           "--mode option invalid: expected \"present\"|\"all\"|\"strict\", got "   209     std::vector<std::pair<int32, int32> > edit_word_per_hyp, edit_word_per_hyp2;
   210     if(hyp2_rspecifier.empty())
   214               edit_word_per_hyp, edit_word_per_hyp2);
   218     BaseFloat mean_wer = 0.0, interval = 0.0,
   219               mean_wer2 = 0.0, interval2 = 0.0,
   223             &mean_wer, &interval);
   225     if(!hyp2_rspecifier.empty()) {
   227               &mean_wer2, &interval2);
   230              replications, &p_improv);
   234     std::cout.precision(2);
   235     std::cerr.precision(2);
   236     std::cout << 
"Set1: %WER " << std::fixed << 100*mean_wer <<
   237               " 95% Conf Interval [ " << 100*mean_wer-100*interval <<
   238               ", " << 100*mean_wer+100*interval << 
" ]" << 
'\n';
   240     if(!hyp2_rspecifier.empty()) {
   241         std::cout << 
"Set2: %WER " << std::fixed << 100*mean_wer2 <<
   242             " 95% Conf Interval [ " << 100*mean_wer2-100*interval2 <<
   243             ", " << 100*mean_wer2+100*interval2 << 
" ]" << 
'\n';
   245         std::cout << 
"Probability of Set2 improving Set1: " << std::fixed <<
   246             100*p_improv << 
'\n';
   250   } 
catch(
const std::exception &e) {
   251     std::cerr << e.what();
 This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
void GetEditsSingleHyp(const std::string &hyp_rspecifier, const std::string &ref_rspecifier, const std::string &mode, std::vector< std::pair< int32, int32 > > &edit_word_per_hyp)
 
void GetEditsDualHyp(const std::string &hyp_rspecifier, const std::string &hyp_rspecifier2, const std::string &ref_rspecifier, const std::string &mode, std::vector< std::pair< int32, int32 > > &edit_word_per_hyp, std::vector< std::pair< int32, int32 > > &edit_word_per_hyp2)
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
void GetBootstrapWERInterval(const std::vector< std::pair< int32, int32 > > &edit_word_per_hyp, int32 replications, BaseFloat *mean, BaseFloat *interval)
 
void GetBootstrapWERTwoSystemComparison(const std::vector< std::pair< int32, int32 > > &edit_word_per_hyp, const std::vector< std::pair< int32, int32 > > &edit_word_per_hyp2, int32 replications, BaseFloat *p_improv)