28 const std::vector<int32> &alignment,
36 int32 num_frames = alignment.size();
37 if (num_frames == 0) {
41 if (num_frames != feats.
NumRows()) {
42 KALDI_WARN <<
"Dimension mismatch: alignment " << num_frames
43 <<
" versus feats " << feats.
NumRows();
46 std::vector<int32> times;
48 if (num_frames_clat != num_frames) {
49 KALDI_WARN <<
"Numerator/frames versus denlat frames mismatch: " 50 << num_frames <<
" versus " << num_frames_clat;
61 0, feat_dim).CopyFromMat(feats);
64 for (
int32 t = 0; t < left_context; t++)
66 for (
int32 t = 0; t < right_context; t++)
68 feats.
Row(num_frames - 1));
106 std::vector<DiscriminativeNnetExample> *egs_out):
115 (*egs_out_)[0] =
eg_;
124 (*egs_out_)[0] =
eg_;
172 unordered_map<StateId, StateId> *state_map,
205 multiple_transition_ids(false),
206 num_den_overlap(false), nonzero_derivative(false),
207 can_excise_frame(false),
208 start_state(std::numeric_limits<
int32>::max()), end_state(0) { }
237 std::vector<int32> times;
243 std::vector<std::map<int32, int32> > pdf_to_tid(num_frames);
244 for (
StateId s = 0; s < num_states; s++) {
246 for (fst::MutableArcIterator<Lattice> aiter(&
lat_, s);
247 !aiter.Done(); aiter.Next()) {
249 Arc arc = aiter.Value();
250 KALDI_ASSERT(arc.ilabel != 0 && arc.ilabel == arc.olabel);
252 if (pdf_to_tid[t].
count(pdf) != 0) {
253 arc.ilabel = arc.olabel = pdf_to_tid[t][pdf];
256 pdf_to_tid[t][pdf] = arc.ilabel;
266 Project(&
lat_, fst::PROJECT_INPUT);
279 Determinize(
lat_, &det_lat);
283 Reverse(
lat_, &tmp_lat);
284 Determinize(tmp_lat, &
lat_);
285 Reverse(
lat_, &tmp_lat);
286 Determinize(tmp_lat, &
lat_);
308 std::vector<std::set<int32> > pdfs_per_frame(num_frames),
309 tids_per_frame(num_frames);
313 for (
int32 state = 0; state < num_states; state++) {
317 for (fst::ArcIterator<Lattice> aiter(
lat_, state); !aiter.Done();
320 KALDI_ASSERT(arc.ilabel != 0 && arc.ilabel == arc.olabel);
321 int32 transition_id = arc.ilabel,
323 tids_per_frame[t].insert(transition_id);
324 pdfs_per_frame[t].insert(pdf_id);
340 for (
int32 t = 0; t < num_frames; t++) {
404 std::vector<bool> can_excise(num_frames,
false);
406 bool need_some_frame =
false;
407 for (
int32 t = 0; t < num_frames; t++) {
410 need_some_frame =
true;
412 if (!need_some_frame) {
414 KALDI_WARN <<
"Example completely removed when excising.";
424 int32 start_t, end_t;
425 for (start_t = 0; can_excise[start_t]; start_t++);
426 for (end_t = num_frames; can_excise[end_t-1]; end_t--);
433 std::vector<bool> will_excise(can_excise);
434 for (
int32 t = start_t; t < end_t; t++) {
435 for (
int32 t2 = t - right_context; t2 <= t + left_context; t2++)
436 if (t2 >= start_t && t2 < end_t && !can_excise[t2])
437 will_excise[t] =
false;
445 for (
int32 state = 0; state < num_states; state++) {
447 for (::fst::MutableArcIterator<Lattice> aiter(&
lat_, state); !aiter.Done();
449 Arc arc = aiter.Value();
450 if (will_excise[t]) {
451 arc.ilabel = arc.olabel = 0;
461 int32 num_frames_kept = 0;
462 for (
int32 t = 0; t < num_frames; t++) {
463 if (!will_excise[t]) {
473 int32 num_frames_kept_plus = num_frames_kept + left_context + right_context;
479 for (
int32 i = 0;
i < left_context;
i++) {
486 for (
int32 i = 0;
i < right_context;
i++) {
488 num_frames_kept + left_context +
i);
490 end_t + left_context +
i);
495 for (
int32 t = start_t; t < end_t; t++) {
496 if (!will_excise[t]) {
498 left_context + dst_t);
517 std::vector<int32> split_points;
525 split_points.push_back(0);
526 for (
int32 t = 1; t < num_frames; t++) {
529 split_points.push_back(t);
531 split_points.push_back(num_frames);
534 std::vector<bool> is_kept(split_points.size() - 1);
538 for (
size_t s = 0; s < is_kept.size(); s++) {
539 int32 start = split_points[s], end = split_points[s+1];
540 bool keep_this_split =
false;
541 for (
int32 t = start; t < end; t++)
543 keep_this_split =
true;
544 is_kept[s] = keep_this_split;
555 for (
int32 t = 0; t < num_frames; t++)
559 for (
size_t s = 0; s < is_kept.size(); s++) {
563 int32 segment_len = split_points[s+1] - split_points[s];
574 KALDI_LOG <<
"Split " << num_lattices <<
" lattices. Stats:";
575 double kept_segs_per_lat = num_kept_segments * 1.0 / num_lattices,
576 segs_per_lat = num_segments * 1.0 / num_lattices;
578 KALDI_LOG <<
"Made on average " << segs_per_lat <<
" segments per lattice, " 579 <<
"of which " << kept_segs_per_lat <<
" were kept.";
581 double percent_needed = num_frames_must_keep * 100.0 / num_frames_orig,
582 percent_after_split = num_frames_kept_after_split * 100.0 / num_frames_orig,
583 percent_after_excise = num_frames_kept_after_excise * 100.0 / num_frames_orig;
585 KALDI_LOG <<
"Needed to keep " << percent_needed <<
"% of frames, after split " 586 <<
"kept " << percent_after_split <<
"%, after excising frames kept " 587 << percent_after_excise <<
"%.";
589 KALDI_LOG <<
"Longest lattice had " << longest_lattice
590 <<
" frames, longest segment after splitting had " 591 << longest_segment_after_split
592 <<
" frames, longest segment after excising had " 593 << longest_segment_after_excise;
601 tot_context = left_context + right_context;
624 for (
StateId s = 0; s < lat->NumStates(); s++) {
625 for (::fst::MutableArcIterator<Lattice> aiter(lat, s); !aiter.Done();
627 Arc arc = aiter.Value();
636 StateId s, unordered_map<StateId, StateId> *state_map,
Lattice *lat_out) {
637 if (state_map->count(s) == 0) {
638 return ((*state_map)[s] = lat_out->AddState());
640 return (*state_map)[s];
651 unordered_map<StateId, StateId> state_map;
660 if (t < seg_begin || t > seg_end)
666 lat_out.SetStart(this_state);
670 lat_out.SetFinal(this_state,
lat_.Final(s));
677 for (fst::ArcIterator<Lattice> aiter(
lat_, s); !aiter.Done(); aiter.Next()) {
678 const Arc &arc = aiter.Value();
680 &state_map, &lat_out);
681 KALDI_ASSERT(arc.ilabel != 0 && arc.ilabel == arc.olabel);
682 lat_out.AddArc(this_state,
Arc(arc.ilabel, arc.olabel, arc.weight,
768 std::vector<DiscriminativeNnetExample> *egs_out,
771 splitter.
Split(stats_out);
779 std::vector<DiscriminativeNnetExample> *egs_out,
782 splitter.
Excise(stats_out);
789 std::string criterion,
791 bool one_silence_class,
798 num_frames = eg.
num_ali.size(),
800 context_width = left_context + 1 + right_context;
801 *tot_t += num_frames;
812 std::vector<int32> silence_phones;
816 one_silence_class, eg, &post);
820 for (
int32 t = 0; t < num_frames; t++) {
825 avg_feat.
AddRowSumMat(1.0 / context_width, context_window, 0.0);
827 for (
size_t i = 0;
i < post[t].size();
i++) {
828 int32 pdf_id = post[t][
i].first;
830 hash->
Row(pdf_id).AddVec(weight, avg_feat_dbl);
831 if (weight > 0.0) *num_weight += weight;
832 else *den_weight += -weight;
840 const std::vector<int32> &silence_phones,
841 std::string criterion,
843 bool one_silence_class,
846 KALDI_ASSERT(criterion ==
"mpfe" || criterion ==
"smbr" || criterion ==
"mmi");
851 if (criterion ==
"mpfe" || criterion ==
"smbr") {
854 criterion, one_silence_class, &tid_post);
858 bool convert_to_pdf_ids =
true, cancel =
true;
860 drop_frames, convert_to_pdf_ids, cancel,
868 const std::vector<BaseFloat> &costs,
869 std::vector<std::vector<size_t> > *groups) {
871 std::vector<BaseFloat> group_costs;
872 for (
size_t i = 0;
i < costs.size();
i++) {
873 bool found_group =
false;
875 for (
size_t j = 0;
j < groups->size();
j++) {
876 if (group_costs[
j] + this_cost <= max_cost) {
877 (*groups)[
j].push_back(
i);
878 group_costs[
j] += this_cost;
884 groups->resize(groups->size() + 1);
885 groups->back().push_back(
i);
886 group_costs.push_back(this_cost);
892 const std::vector<const DiscriminativeNnetExample*> &input,
898 left_context = eg0.left_context,
899 num_frames = eg0.num_ali.size(),
900 right_context = eg0.input_frames.NumRows() - num_frames - left_context;
902 int32 tot_frames = eg0.input_frames.NumRows();
904 for (
size_t i = 1;
i < input.size();
i++)
905 tot_frames += input[
i]->input_frames.NumRows();
907 int32 arbitrary_tid = 1;
917 0, eg0.input_frames.NumCols()).CopyFromMat(eg0.input_frames);
918 if (eg0.spk_info.Dim() != 0) {
920 eg0.input_frames.NumCols(), eg0.spk_info.Dim()).
921 CopyRowsFromVec(eg0.spk_info);
924 output->
num_ali.reserve(tot_frames - left_context - right_context);
925 output->
weight = eg0.weight;
930 int32 initial = inter_segment_clat.AddState();
931 inter_segment_clat.SetStart(initial);
933 std::vector<int32> inter_segment_ali(left_context + right_context);
934 std::fill(inter_segment_ali.begin(), inter_segment_ali.end(), arbitrary_tid);
937 final_weight.SetString(inter_segment_ali);
938 inter_segment_clat.SetFinal(initial, final_weight);
940 int32 feat_offset = eg0.input_frames.NumRows();
942 for (
size_t i = 1;
i < input.size();
i++) {
951 eg_i.
spk_info.Dim()).CopyRowsFromVec(
958 inter_segment_ali.begin(), inter_segment_ali.end());
961 Concat(&(output->
den_lat), inter_segment_clat);
972 const std::vector<DiscriminativeNnetExample> &input,
973 std::vector<DiscriminativeNnetExample> *output) {
975 std::vector<BaseFloat> costs(input.size());
976 for (
size_t i = 0;
i < input.size();
i++)
977 costs[
i] = static_cast<BaseFloat>(input[
i].input_frames.NumRows());
978 std::vector<std::vector<size_t> > groups;
983 output->resize(groups.size());
984 for (
size_t i = 0;
i < groups.size();
i++) {
985 std::vector<const DiscriminativeNnetExample*> group_egs;
986 for (
size_t j = 0;
j < groups[
i].size();
j++) {
987 size_t index = groups[
i][
j];
988 group_egs.push_back(&(input[index]));
bool collapse_transition_ids
fst::StdArc::StateId StateId
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
fst::ArcTpl< LatticeWeight > LatticeArc
const SplitDiscriminativeExampleConfig & config_
int64 num_frames_kept_after_split
void AddRowSumMat(Real alpha, const MatrixBase< Real > &M, Real beta=1.0)
Does *this = alpha * (sum of rows of M) + beta * *this.
static void RemoveAllOutputSymbols(Lattice *lat)
int32 LatticeStateTimes(const Lattice &lat, vector< int32 > *times)
This function iterates over the states of a topologically sorted lattice and counts the time instance...
const TransitionModel & tmodel_
static const LatticeWeightTpl One()
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
void SplitDiscriminativeExample(const SplitDiscriminativeExampleConfig &config, const TransitionModel &tmodel, const DiscriminativeNnetExample &eg, std::vector< DiscriminativeNnetExample > *egs_out, SplitExampleStats *stats_out)
Split a "discriminative example" into multiple pieces, splitting where the lattice has "pinch points"...
int64 num_frames_must_keep
std::vector< int32 > state_times_
void ExciseDiscriminativeExample(const SplitDiscriminativeExampleConfig &config, const TransitionModel &tmodel, const DiscriminativeNnetExample &eg, std::vector< DiscriminativeNnetExample > *egs_out, SplitExampleStats *stats_out)
Remove unnecessary frames from discriminative training example.
int32 longest_segment_after_excise
void UpdateHash(const TransitionModel &tmodel, const DiscriminativeNnetExample &eg, std::string criterion, bool drop_frames, bool one_silence_class, Matrix< double > *hash, double *num_weight, double *den_weight, double *tot_t)
This function is used in code that tests the functionality that we provide here, about splitting and ...
void CollapseTransitionIds()
void CombineDiscriminativeExamples(int32 max_length, const std::vector< DiscriminativeNnetExample > &input, std::vector< DiscriminativeNnetExample > *output)
This function is used to combine multiple discriminative-training examples (each corresponding to a s...
int32 TransitionIdToPdf(int32 trans_id) const
This struct exists only for diagnostic purposes.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
int32 longest_segment_after_split
int64 num_frames_kept_after_excise
std::vector< std::vector< std::pair< int32, BaseFloat > > > Posterior
Posterior is a typedef for storing acoustic-state (actually, transition-id) posteriors over an uttera...
void ExampleToPdfPost(const TransitionModel &tmodel, const std::vector< int32 > &silence_phones, std::string criterion, bool drop_frames, bool one_silence_class, const DiscriminativeNnetExample &eg, Posterior *post)
Given a discriminative training example, this function works out posteriors at the pdf level (note: t...
BaseFloat LatticeForwardBackwardMmi(const TransitionModel &tmodel, const Lattice &lat, const std::vector< int32 > &num_ali, bool drop_frames, bool convert_to_pdf_ids, bool cancel, Posterior *post)
This function can be used to compute posteriors for MMI, with a positive contribution for the numerat...
DiscriminativeExampleSplitter(const SplitDiscriminativeExampleConfig &config, const TransitionModel &tmodel, const DiscriminativeNnetExample &eg, std::vector< DiscriminativeNnetExample > *egs_out)
const SubVector< Real > Row(MatrixIndexT i) const
Return specific row of matrix [const].
bool LatticeToDiscriminativeExample(const std::vector< int32 > &alignment, const Matrix< BaseFloat > &feats, const CompactLattice &clat, BaseFloat weight, int32 left_context, int32 right_context, DiscriminativeNnetExample *eg)
Converts lattice to discriminative training example.
std::vector< FrameInfo > frame_info_
static const CompactLatticeWeightTpl< WeightType, IntType > One()
void AppendDiscriminativeExamples(const std::vector< const DiscriminativeNnetExample *> &input, DiscriminativeNnetExample *output)
Appends the given vector of examples (which must be non-empty) into a single output example (called b...
void DoSplit(SplitExampleStats *stats)
void ConvertLattice(const ExpandedFst< ArcTpl< Weight > > &ifst, MutableFst< ArcTpl< CompactLatticeWeightTpl< Weight, Int > > > *ofst, bool invert)
Convert lattice from a normal FST to a CompactLattice FST.
Vector< BaseFloat > spk_info
spk_info contains any component of the features that varies slowly or not at all with time (and hence...
fst::VectorFst< LatticeArc > Lattice
void PrepareLattice(bool first_time)
int32 CompactLatticeStateTimes(const CompactLattice &lat, vector< int32 > *times)
As LatticeStateTimes, but in the CompactLattice format.
BaseFloat LatticeForwardBackwardMpeVariants(const TransitionModel &trans, const std::vector< int32 > &silence_phones, const Lattice &lat, const std::vector< int32 > &num_ali, std::string criterion, bool one_silence_class, Posterior *post)
This function implements either the MPFE (minimum phone frame error) or SMBR (state-level minimum bay...
void Excise(SplitExampleStats *stats)
CompactLattice den_lat
The denominator lattice.
bool multiple_transition_ids
Matrix< BaseFloat > input_frames
The input data– typically with a number of frames [NumRows()] larger than labels.size(), because it includes features to the left and right as needed for the temporal context of the network.
std::vector< int32 > num_ali
The numerator alignment.
void Split(SplitExampleStats *stats)
BaseFloat weight
The weight we assign to this example; this will typically be one, but we include it for the sake of g...
fst::VectorFst< CompactLatticeArc > CompactLattice
void ScalePosterior(BaseFloat scale, Posterior *post)
Scales the BaseFloat (weight) element in the posterior entries.
This struct is used to store the information we need for discriminative training (MMI or MPE)...
A class representing a vector.
StateId GetOutputStateId(StateId s, unordered_map< StateId, StateId > *state_map, Lattice *lat_out)
#define KALDI_ASSERT(cond)
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
void DoExcise(SplitExampleStats *stats)
void OutputOneSplit(int32 seg_begin, int32 seg_end)
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void ConvertPosteriorToPdfs(const TransitionModel &tmodel, const Posterior &post_in, Posterior *post_out)
Converts a posterior over transition-ids to be a posterior over pdf-ids.
int32 left_context
The number of frames of left context in the features (we can work out the #frames of right context fr...
const DiscriminativeNnetExample & eg_
Note on how to parse this filename: it contains functions relatied to neural-net training examples...
void CreateOutputLattice(int32 seg_begin, int32 seg_end, CompactLattice *clat_out)
Sub-matrix representation.
void SolvePackingProblem(BaseFloat max_cost, const std::vector< BaseFloat > &costs, std::vector< std::vector< size_t > > *groups)
This function solves the "packing problem" using the "first fit" algorithm.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
std::vector< DiscriminativeNnetExample > * egs_out_
Config structure for SplitExample, for splitting discriminative training examples.