edit-distance-test.cc
Go to the documentation of this file.
1 // util/edit-distance-test.cc
2 
3 // Copyright 2009-2011 Microsoft Corporation; Haihua Xu
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
20 
21 #include "base/kaldi-common.h"
22 #include "util/edit-distance.h"
23 
24 namespace kaldi {
25 
27  std::vector<int32> a;
28  std::vector<int32> b;
30 
31  a.push_back(1);
33 
34  b.push_back(1);
36 
37  b.push_back(2);
39 
40  a.push_back(2);
42 
43  a.push_back(3);
44  a.push_back(4);
45  b.push_back(4);
46 
48 
49  a.push_back(5);
50 
52 
53  b.push_back(6);
54 
56 
57  a.push_back(1);
58  b.push_back(1);
59 
61 
62  b.push_back(10);
63 
65 }
66 
67 
69  std::vector<std::string> a;
70  std::vector<std::string> b;
72 
73  a.push_back("1");
75 
76  b.push_back("1");
78 
79  b.push_back("2");
81 
82  a.push_back("2");
84 
85  a.push_back("3");
86  a.push_back("4");
87  b.push_back("4");
88 
90 
91  a.push_back("5");
92 
94 
95  b.push_back("6");
96 
98 
99  a.push_back("1");
100  b.push_back("1");
101 
103 
104  b.push_back("10");
105 
107 }
108 
109 
110 
111 // edit distance calculate
113  std::vector<int32> hyp;
114  std::vector<int32> ref;
115  int32 ins, del, sub, total_cost;
116  // initialize hypothesis
117  hyp.push_back(1);
118  hyp.push_back(3);
119  hyp.push_back(4);
120  hyp.push_back(5);
121  // initialize reference
122  ref.push_back(2);
123  ref.push_back(3);
124  ref.push_back(4);
125  ref.push_back(5);
126  ref.push_back(6);
127  ref.push_back(7);
128  total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
129  KALDI_ASSERT(total_cost == 3 && ins == 0 && del == 2 && sub == 1);
130 
131  std::swap(hyp, ref);
132  total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
133  KALDI_ASSERT(total_cost == 3 && ins == 2 && del == 0 && sub == 1);
134 
135  hyp.clear();
136  ref.clear();
137  hyp.push_back(1);
138  ref.push_back(1);
139  total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
140  KALDI_ASSERT(total_cost == 0 && ins+del+sub == 0);
141  hyp.push_back(2);
142  ref.push_back(3);
143  total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
144  KALDI_ASSERT(total_cost == 1 && ins == 0 && del == 0 && sub == 1);
145  // randomized test
146  size_t num = 0;
147  for (; num < 1000; num ++) {
148  int32 hyp_len = Rand()%11;
149  int32 ref_len = Rand()%3;
150  hyp.resize(hyp_len);
151  ref.resize(ref_len);
152 
153  int32 index = 0;
154  for (; index < hyp_len; index ++)
155  hyp[index] = Rand()%4;
156  for (index = 0; index < ref_len; index ++)
157  ref[index] = Rand()%4;
158  // current version
159  total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
160  // previous version
161  int32 total_cost2 = LevenshteinEditDistance(hyp, ref);
162  // verify both are the same
163  KALDI_ASSERT(total_cost == total_cost2);
164  KALDI_ASSERT(ins+del+sub == total_cost);
165  KALDI_ASSERT(del-ins == static_cast<int32>(ref.size() -hyp.size()));
166  }
167  return;
168 }
169 
170 
171 // edit distance calculate
173  std::vector<std::string> hyp;
174  std::vector<std::string> ref;
175  int32 ins, del, sub, total_cost;
176  // initialize hypothesis
177  hyp.push_back("1");
178  hyp.push_back("3");
179  hyp.push_back("4");
180  hyp.push_back("5");
181  // initialize reference
182  ref.push_back("2");
183  ref.push_back("3");
184  ref.push_back("4");
185  ref.push_back("5");
186  ref.push_back("6");
187  ref.push_back("7");
188  total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
189  KALDI_ASSERT(total_cost == 3 && ins == 0 && del == 2 && sub == 1);
190 
191  std::swap(hyp, ref);
192  total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
193  KALDI_ASSERT(total_cost == 3 && ins == 2 && del == 0 && sub == 1);
194 
195  hyp.clear();
196  ref.clear();
197  hyp.push_back("1");
198  ref.push_back("1");
199  total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
200  KALDI_ASSERT(total_cost == 0 && ins+del+sub == 0);
201  hyp.push_back("2");
202  ref.push_back("3");
203  total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
204  KALDI_ASSERT(total_cost == 1 && ins == 0 && del == 0 && sub == 1);
205  // randomized test
206  size_t num = 0;
207  for (; num < 1000; num ++) {
208  int32 hyp_len = Rand()%11;
209  int32 ref_len = Rand()%3;
210  hyp.resize(hyp_len);
211  ref.resize(ref_len);
212 
213  int32 index = 0;
214  for (; index < hyp_len; index ++)
215  hyp[index] = Rand()%4;
216  for (index = 0; index < ref_len; index ++)
217  ref[index] = Rand()%4;
218  // current version
219  total_cost = LevenshteinEditDistance(ref, hyp, &ins, &del, &sub);
220  // previous version
221  int32 total_cost2 = LevenshteinEditDistance(hyp, ref);
222  // verify both are the same
223  KALDI_ASSERT(total_cost == total_cost2);
224  KALDI_ASSERT(ins+del+sub == total_cost);
225  KALDI_ASSERT(del-ins == static_cast<int32>(ref.size() -hyp.size()));
226  }
227  return;
228 }
229 
230 
232  for (size_t i = 0; i < 100; i++) {
233  size_t a_sz = Rand() % 5, b_sz = Rand() % 5;
234  std::vector<int32> a, b;
235  for (size_t j = 0; j < a_sz; j++) a.push_back(Rand() % 10);
236  for (size_t j = 0; j < b_sz; j++) b.push_back(Rand() % 10);
237  int32 eps_sym = -1;
238  std::vector<std::pair<int32, int32> > ans;
239 
240  int32 e1 = LevenshteinEditDistance(a, b),
241  e2 = LevenshteinAlignment(a, b, eps_sym, &ans);
242  KALDI_ASSERT(e1 == e2);
243 
244  std::vector<int32> a2, b2;
245  for (size_t i = 0; i < ans.size(); i++) {
246  if (ans[i].first != eps_sym) a2.push_back(ans[i].first);
247  if (ans[i].second != eps_sym) b2.push_back(ans[i].second);
248  }
249  KALDI_ASSERT(a == a2);
250  KALDI_ASSERT(b == b2);
251  }
252 }
253 
254 } // end namespace kaldi
255 
256 int main() {
257  using namespace kaldi;
263  std::cout << "Test OK\n";
264 }
265 
266 
267 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int32 LevenshteinAlignment(const std::vector< T > &a, const std::vector< T > &b, T eps_symbol, std::vector< std::pair< T, T > > *output)
void TestLevenshteinAlignment()
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
kaldi::int32 int32
void TestEditDistance2String()
int32 LevenshteinEditDistance(const std::vector< T > &a, const std::vector< T > &b)
void TestEditDistance()
int main()
void TestEditDistance2()
int Rand(struct RandomState *state)
Definition: kaldi-math.cc:45
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
void TestEditDistanceString()