20 #ifndef KALDI_UTIL_EDIT_DISTANCE_INL_H_ 21 #define KALDI_UTIL_EDIT_DISTANCE_INL_H_ 31 const std::vector<T> &b) {
47 int M = a.size(), N = b.size();
48 std::vector<int32> e(N+1);
49 std::vector<int32> e_tmp(N+1);
51 for (
size_t i = 0;
i < e.size();
i++)
53 for (
int32 m = 1; m <= M; m++) {
59 int32 term1 = e[
n-1] + (a[m-1] == b[
n-1] ? 0 : 1);
61 int32 term3 = e_tmp[
n-1] + 1;
62 e_tmp[
n] = std::min(term1, std::min(term2, term3));
80 const std::vector<T> &hyp,
83 std::vector<error_stats> e(ref.size()+1);
84 std::vector<error_stats> cur_e(ref.size()+1);
87 for (
size_t i =0;
i < e.size();
i ++) {
95 for (
size_t hyp_index = 1; hyp_index <= hyp.size(); hyp_index ++) {
98 cur_e[0].total_cost++;
99 for (
size_t ref_index = 1; ref_index <= ref.size(); ref_index ++) {
100 int32 ins_err = e[ref_index].total_cost + 1;
101 int32 del_err = cur_e[ref_index-1].total_cost + 1;
102 int32 sub_err = e[ref_index-1].total_cost;
103 if (hyp[hyp_index-1] != ref[ref_index-1])
106 if (sub_err < ins_err && sub_err < del_err) {
107 cur_e[ref_index] =e[ref_index-1];
108 if (hyp[hyp_index-1] != ref[ref_index-1])
109 cur_e[ref_index].sub_num++;
110 cur_e[ref_index].total_cost = sub_err;
111 }
else if (del_err < ins_err) {
112 cur_e[ref_index] = cur_e[ref_index-1];
113 cur_e[ref_index].total_cost = del_err;
114 cur_e[ref_index].del_num++;
116 cur_e[ref_index] = e[ref_index];
117 cur_e[ref_index].total_cost = ins_err;
118 cur_e[ref_index].ins_num++;
123 size_t ref_index = e.size()-1;
124 *ins = e[ref_index].ins_num, *del =
125 e[ref_index].del_num, *sub = e[ref_index].sub_num;
126 return e[ref_index].total_cost;
131 const std::vector<T> &b,
133 std::vector<std::pair<T, T> > *output) {
142 size_t M = a.size(), N = b.size();
144 std::vector<std::vector<int32> > e(M+1);
145 for (m = 0; m <=M; m++) e[m].resize(N+1);
146 for (n = 0; n <= N; n++)
148 for (m = 1; m <= M; m++) {
149 e[m][0] = e[m-1][0] + 1;
150 for (n = 1; n <= N; n++) {
151 int32 sub_or_ok = e[m-1][n-1] + (a[m-1] == b[n-1] ? 0 : 1);
152 int32 del = e[m-1][
n] + 1;
153 int32 ins = e[m][n-1] + 1;
154 e[m][
n] = std::min(sub_or_ok, std::min(del, ins));
160 while (m != 0 || n != 0) {
161 size_t last_m, last_n;
169 int32 sub_or_ok = e[m-1][n-1] + (a[m-1] == b[n-1] ? 0 : 1);
170 int32 del = e[m-1][
n] + 1;
171 int32 ins = e[m][n-1] + 1;
173 if (sub_or_ok <= std::min(del, ins)) {
187 a_sym = (last_m == m ? eps_symbol : a[last_m]);
188 b_sym = (last_n == n ? eps_symbol : b[last_n]);
189 output->push_back(std::make_pair(a_sym, b_sym));
200 #endif // KALDI_UTIL_EDIT_DISTANCE_INL_H_ This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
int32 LevenshteinAlignment(const std::vector< T > &a, const std::vector< T > &b, T eps_symbol, std::vector< std::pair< T, T > > *output)
int32 LevenshteinEditDistance(const std::vector< T > &a, const std::vector< T > &b)
void ReverseVector(std::vector< T > *vec)
Reverses the contents of a vector.
#define KALDI_ASSERT(cond)