edit-distance-inl.h
Go to the documentation of this file.
1 // util/edit-distance-inl.h
2
3 // Copyright 2009-2011 Microsoft Corporation; Haihua Xu; Yanmin Qian
4
5 // See ../../COPYING for clarification regarding multiple authors
6 //
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10
12
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19
20 #ifndef KALDI_UTIL_EDIT_DISTANCE_INL_H_
21 #define KALDI_UTIL_EDIT_DISTANCE_INL_H_
22 #include <algorithm>
23 #include <utility>
24 #include <vector>
25 #include "util/stl-utils.h"
26
27 namespace kaldi {
28
29 template<class T>
30 int32 LevenshteinEditDistance(const std::vector<T> &a,
31  const std::vector<T> &b) {
32  // Algorithm:
33  // write A and B for the sequences, with elements a_0 ..
34  // let |A| = M and |B| = N be the lengths, and have
35  // elements a_0 ... a_{M-1} and b_0 ... b_{N-1}.
36  // We are computing the recursion
37  // E(m, n) = min( E(m-1, n-1) + (1-delta(a_{m-1}, b_{n-1})),
38  // E(m-1, n) + 1,
39  // E(m, n-1) + 1).
40  // where E(m, n) is defined for m = 0..M and n = 0..N and out-of-
41  // bounds quantities are considered to be infinity (i.e. the
42  // recursion does not visit them).
43
44  // We do this computation using a vector e of size N+1.
45  // The outer iterations range over m = 0..M.
46
47  int M = a.size(), N = b.size();
48  std::vector<int32> e(N+1);
49  std::vector<int32> e_tmp(N+1);
50  // initialize e.
51  for (size_t i = 0; i < e.size(); i++)
52  e[i] = i;
53  for (int32 m = 1; m <= M; m++) {
54  // computing E(m, .) from E(m-1, .)
55  // handle special case n = 0:
56  e_tmp[0] = e[0] + 1;
57
58  for (int32 n = 1; n <= N; n++) {
59  int32 term1 = e[n-1] + (a[m-1] == b[n-1] ? 0 : 1);
60  int32 term2 = e[n] + 1;
61  int32 term3 = e_tmp[n-1] + 1;
62  e_tmp[n] = std::min(term1, std::min(term2, term3));
63  }
64  e = e_tmp;
65  }
66  return e.back();
67 }
68 //
69 struct error_stats {
73  int32 total_cost; // minimum total cost to the current alignment.
74 };
75 // Note that both hyp and ref should not contain noise word in
76 // the following implementation.
77
78 template<class T>
79 int32 LevenshteinEditDistance(const std::vector<T> &ref,
80  const std::vector<T> &hyp,
81  int32 *ins, int32 *del, int32 *sub) {
82  // temp sequence to remember error type and stats.
83  std::vector<error_stats> e(ref.size()+1);
84  std::vector<error_stats> cur_e(ref.size()+1);
85  // initialize the first hypothesis aligned to the reference at each
86  // position:[hyp_index =0][ref_index]
87  for (size_t i =0; i < e.size(); i ++) {
88  e[i].ins_num = 0;
89  e[i].sub_num = 0;
90  e[i].del_num = i;
91  e[i].total_cost = i;
92  }
93
94  // for other alignments
95  for (size_t hyp_index = 1; hyp_index <= hyp.size(); hyp_index ++) {
96  cur_e[0] = e[0];
97  cur_e[0].ins_num++;
98  cur_e[0].total_cost++;
99  for (size_t ref_index = 1; ref_index <= ref.size(); ref_index ++) {
100  int32 ins_err = e[ref_index].total_cost + 1;
101  int32 del_err = cur_e[ref_index-1].total_cost + 1;
102  int32 sub_err = e[ref_index-1].total_cost;
103  if (hyp[hyp_index-1] != ref[ref_index-1])
104  sub_err++;
105
106  if (sub_err < ins_err && sub_err < del_err) {
107  cur_e[ref_index] =e[ref_index-1];
108  if (hyp[hyp_index-1] != ref[ref_index-1])
109  cur_e[ref_index].sub_num++; // substitution error should be increased
110  cur_e[ref_index].total_cost = sub_err;
111  } else if (del_err < ins_err) {
112  cur_e[ref_index] = cur_e[ref_index-1];
113  cur_e[ref_index].total_cost = del_err;
114  cur_e[ref_index].del_num++; // deletion number is increased.
115  } else {
116  cur_e[ref_index] = e[ref_index];
117  cur_e[ref_index].total_cost = ins_err;
118  cur_e[ref_index].ins_num++; // insertion number is increased.
119  }
120  }
121  e = cur_e; // alternate for the next recursion.
122  }
123  size_t ref_index = e.size()-1;
124  *ins = e[ref_index].ins_num, *del =
125  e[ref_index].del_num, *sub = e[ref_index].sub_num;
126  return e[ref_index].total_cost;
127 }
128
129 template<class T>
130 int32 LevenshteinAlignment(const std::vector<T> &a,
131  const std::vector<T> &b,
132  T eps_symbol,
133  std::vector<std::pair<T, T> > *output) {
134  // Check inputs:
135  {
136  KALDI_ASSERT(output != NULL);
137  for (size_t i = 0; i < a.size(); i++) KALDI_ASSERT(a[i] != eps_symbol);
138  for (size_t i = 0; i < b.size(); i++) KALDI_ASSERT(b[i] != eps_symbol);
139  }
140  output->clear();
141  // This is very memory-inefficiently implemented using a vector of vectors.
142  size_t M = a.size(), N = b.size();
143  size_t m, n;
144  std::vector<std::vector<int32> > e(M+1);
145  for (m = 0; m <=M; m++) e[m].resize(N+1);
146  for (n = 0; n <= N; n++)
147  e[0][n] = n;
148  for (m = 1; m <= M; m++) {
149  e[m][0] = e[m-1][0] + 1;
150  for (n = 1; n <= N; n++) {
151  int32 sub_or_ok = e[m-1][n-1] + (a[m-1] == b[n-1] ? 0 : 1);
152  int32 del = e[m-1][n] + 1; // assumes a == ref, b == hyp.
153  int32 ins = e[m][n-1] + 1;
154  e[m][n] = std::min(sub_or_ok, std::min(del, ins));
155  }
156  }
157  // get time-reversed output first: trace back.
158  m = M;
159  n = N;
160  while (m != 0 || n != 0) {
161  size_t last_m, last_n;
162  if (m == 0) {
163  last_m = m;
164  last_n = n-1;
165  } else if (n == 0) {
166  last_m = m-1;
167  last_n = n;
168  } else {
169  int32 sub_or_ok = e[m-1][n-1] + (a[m-1] == b[n-1] ? 0 : 1);
170  int32 del = e[m-1][n] + 1; // assumes a == ref, b == hyp.
171  int32 ins = e[m][n-1] + 1;
172  // choose sub_or_ok if all else equal.
173  if (sub_or_ok <= std::min(del, ins)) {
174  last_m = m-1;
175  last_n = n-1;
176  } else {
177  if (del <= ins) { // choose del over ins if equal.
178  last_m = m-1;
179  last_n = n;
180  } else {
181  last_m = m;
182  last_n = n-1;
183  }
184  }
185  }
186  T a_sym, b_sym;
187  a_sym = (last_m == m ? eps_symbol : a[last_m]);
188  b_sym = (last_n == n ? eps_symbol : b[last_n]);
189  output->push_back(std::make_pair(a_sym, b_sym));
190  m = last_m;
191  n = last_n;
192  }
193  ReverseVector(output);
194  return e[M][N];
195 }
196
197
198 } // end namespace kaldi
199
200 #endif // KALDI_UTIL_EDIT_DISTANCE_INL_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int32 LevenshteinAlignment(const std::vector< T > &a, const std::vector< T > &b, T eps_symbol, std::vector< std::pair< T, T > > *output)
kaldi::int32 int32
int32 LevenshteinEditDistance(const std::vector< T > &a, const std::vector< T > &b)
struct rnnlm::@11::@12 n
void ReverseVector(std::vector< T > *vec)
Reverses the contents of a vector.
Definition: stl-utils.h:264
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185