160 using namespace kaldi;
165 "Compute a bootstrapping of WER to extract the 95% confidence interval.\n" 166 "Take a reference and a transcription file, in integer or text format,\n" 167 "and outputs overall WER statistics to standard output along with its\n" 168 "confidence interval using the bootstrap method of Bisani and Ney.\n" 169 "If a second transcription file corresponding to the same reference is\n" 170 "provided, a bootstrap comparison of the two transcription is performed\n" 171 "to estimate the probability of improvement.\n" 173 "Usage: compute-wer-bootci [options] <ref-rspecifier> <hyp-rspecifier> [<hyp2-rspecifier>]\n" 174 "E.g.: compute-wer-bootci --mode=present ark:data/train/text ark:hyp_text\n" 175 "or compute-wer-bootci ark:data/train/text ark:hyp_text ark:hyp_text2\n" 176 "See also: compute-wer\n";
180 std::string mode =
"strict";
181 po.Register(
"mode", &mode,
182 "Scoring mode: \"present\"|\"all\"|\"strict\":\n" 183 " \"present\" means score those we have transcriptions for\n" 184 " \"all\" means treat absent transcriptions as empty\n" 185 " \"strict\" means die if all in ref not also in hyp");
187 int32 replications = 10000;
188 po.Register(
"replications", &replications,
189 "Number of replications to compute the intervals");
193 if (po.NumArgs() < 2 || po.NumArgs() > 3) {
198 std::string ref_rspecifier = po.GetArg(1);
199 std::string hyp_rspecifier = po.GetArg(2);
200 std::string hyp2_rspecifier = (po.NumArgs() == 3?po.GetArg(3):
"");
202 if (mode !=
"strict" && mode !=
"present" && mode !=
"all") {
204 "--mode option invalid: expected \"present\"|\"all\"|\"strict\", got " 209 std::vector<std::pair<int32, int32> > edit_word_per_hyp, edit_word_per_hyp2;
210 if(hyp2_rspecifier.empty())
214 edit_word_per_hyp, edit_word_per_hyp2);
218 BaseFloat mean_wer = 0.0, interval = 0.0,
219 mean_wer2 = 0.0, interval2 = 0.0,
223 &mean_wer, &interval);
225 if(!hyp2_rspecifier.empty()) {
227 &mean_wer2, &interval2);
230 replications, &p_improv);
234 std::cout.precision(2);
235 std::cerr.precision(2);
236 std::cout <<
"Set1: %WER " << std::fixed << 100*mean_wer <<
237 " 95% Conf Interval [ " << 100*mean_wer-100*interval <<
238 ", " << 100*mean_wer+100*interval <<
" ]" <<
'\n';
240 if(!hyp2_rspecifier.empty()) {
241 std::cout <<
"Set2: %WER " << std::fixed << 100*mean_wer2 <<
242 " 95% Conf Interval [ " << 100*mean_wer2-100*interval2 <<
243 ", " << 100*mean_wer2+100*interval2 <<
" ]" <<
'\n';
245 std::cout <<
"Probability of Set2 improving Set1: " << std::fixed <<
246 100*p_improv <<
'\n';
250 }
catch(
const std::exception &e) {
251 std::cerr << e.what();
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
void GetEditsSingleHyp(const std::string &hyp_rspecifier, const std::string &ref_rspecifier, const std::string &mode, std::vector< std::pair< int32, int32 > > &edit_word_per_hyp)
void GetEditsDualHyp(const std::string &hyp_rspecifier, const std::string &hyp_rspecifier2, const std::string &ref_rspecifier, const std::string &mode, std::vector< std::pair< int32, int32 > > &edit_word_per_hyp, std::vector< std::pair< int32, int32 > > &edit_word_per_hyp2)
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
void GetBootstrapWERInterval(const std::vector< std::pair< int32, int32 > > &edit_word_per_hyp, int32 replications, BaseFloat *mean, BaseFloat *interval)
void GetBootstrapWERTwoSystemComparison(const std::vector< std::pair< int32, int32 > > &edit_word_per_hyp, const std::vector< std::pair< int32, int32 > > &edit_word_per_hyp2, int32 replications, BaseFloat *p_improv)