mikolov-rnnlm-lib.cc
Go to the documentation of this file.
1 // lm/mikolov-rnnlm-lib.cc
2 
3 // Copyright 2015 Guoguo Chen Hainan Xu
4 // 2010-2012 Tomas Mikolov
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // This file is based on version 0.3e of the RNNLM language modeling
9 // toolkit by Tomas Mikolov. Changes made by authors other than
10 // Tomas Mikolov are licensed under the Apache License, the short form
11 // os which is below. The original code by Tomas Mikolov is licensed
12 // under the BSD 3-clause license, whose text is further below.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "License");
15 // you may not use this file except in compliance with the License.
16 // You may obtain a copy of the License at
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
22 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
23 // MERCHANTABLITY OR NON-INFRINGEMENT.
24 // See the Apache 2 License for the specific language governing permissions and
25 // limitations under the License.
26 //
27 //
28 // Original BSD 3-clause license text:
29 // Copyright (c) 2010-2012 Tomas Mikolov
30 //
31 // All rights reserved. Redistribution and use in source and binary forms, with
32 // or without modification, are permitted provided that the following conditions
33 // are met: 1. Redistributions of source code must retain the above copyright
34 // notice, this list of conditions and the following
35 // disclaimer. 2. Redistributions in binary form must reproduce the above
36 // copyright notice, this list of conditions and the following disclaimer in the
37 // documentation and/or other materials provided with the
38 // distribution. 3. Neither name of copyright holders nor the names of its
39 // contributors may be used to endorse or promote products derived from this
40 // software without specific prior written permission. THIS SOFTWARE IS PROVIDED
41 // BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
42 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
43 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
44 // EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
45 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
46 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
47 // OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
48 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
49 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
50 // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51 
52 #include <assert.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <math.h>
57 #include "lm/mikolov-rnnlm-lib.h"
58 #include "util/table-types.h"
59 
60 namespace rnnlm {
61 
63 static union {
64  double d;
65  struct {
66  int j, i;
67  } n;
68 } d2i;
69 #define EXP_A (1048576 / M_LN2)
70 #define EXP_C 60801
71 #define FAST_EXP(y) (d2i.n.i = EXP_A * (y) + (1072693248 - EXP_C), d2i.d)
72 
74  version = 10;
75  filetype = TEXT;
76 
77  use_lmprob = 0;
78  gradient_cutoff = 15;
79  dynamic = 0;
80 
81  train_file[0] = 0;
82  valid_file[0] = 0;
83  test_file[0] = 0;
84  rnnlm_file[0] = 0;
85 
86  alpha_set = 0;
87  train_file_set = 0;
88 
89  alpha = 0.1;
90  beta = 0.0000001;
91  // beta = 0.00000;
92  alpha_divide = 0;
93  logp = 0;
94  llogp = -100000000;
95  iter = 0;
96 
97  min_improvement = 1.003;
98 
99  train_words = 0;
100  vocab_max_size = 100;
101  vocab_size = 0;
102  vocab = (struct vocab_word *)calloc(vocab_max_size,
103  sizeof(struct vocab_word));
104 
105  layer1_size = 30;
106 
107  direct_size = 0;
108  direct_order = 0;
109 
110  bptt = 0;
111  bptt_block = 10;
112  bptt_history = NULL;
113  bptt_hidden = NULL;
114  bptt_syn0 = NULL;
115 
116  gen = 0;
117 
118  independent = 0;
119 
120  neu0 = NULL;
121  neu1 = NULL;
122  neuc = NULL;
123  neu2 = NULL;
124 
125  syn0 = NULL;
126  syn1 = NULL;
127  sync = NULL;
128  syn_d = NULL;
129  syn_db = NULL;
130  // backup
131  neu0b = NULL;
132  neu1b = NULL;
133  neucb = NULL;
134  neu2b = NULL;
135 
136  neu1b2 = NULL;
137 
138  syn0b = NULL;
139  syn1b = NULL;
140  syncb = NULL;
141 
142  rand_seed = 1;
143 
144  class_size = 100;
145  old_classes = 0;
146 
147  srand(rand_seed);
148 
149  vocab_hash_size = 100000000;
150  vocab_hash = reinterpret_cast<int *>(calloc(vocab_hash_size, sizeof(int)));
151 }
152 
154  int i;
155 
156  if (neu0 != NULL) {
157  free(neu0);
158  free(neu1);
159  if (neuc != NULL) free(neuc);
160  free(neu2);
161 
162  free(syn0);
163  free(syn1);
164  if (sync != NULL) free(sync);
165 
166  if (syn_d != NULL) free(syn_d);
167 
168  if (syn_db != NULL) free(syn_db);
169 
170  free(neu0b);
171  free(neu1b);
172  if (neucb != NULL) free(neucb);
173  free(neu2b);
174 
175  free(neu1b2);
176 
177  free(syn0b);
178  free(syn1b);
179  if (syncb != NULL) free(syncb);
180 
181  for (i = 0; i < class_size; i++) {
182  free(class_words[i]);
183  }
184  free(class_max_cn);
185  free(class_cn);
186  free(class_words);
187 
188  free(vocab);
189  free(vocab_hash);
190 
191  if (bptt_history != NULL) free(bptt_history);
192  if (bptt_hidden != NULL) free(bptt_hidden);
193  if (bptt_syn0 != NULL) free(bptt_syn0);
194 
195  // todo: free bptt variables too
196  }
197 }
198 
200  return rand() / (real)RAND_MAX * (max - min) + min;
201 }
202 
203 void CRnnLM::setRnnLMFile(const std::string &str) {
204  strcpy(rnnlm_file, str.c_str());
205 }
206 
207 void CRnnLM::setRandSeed(int newSeed) {
208  rand_seed = newSeed;
209  srand(rand_seed);
210 }
211 
212 void CRnnLM::readWord(char *word, FILE *fin) {
213  int a = 0, ch;
214 
215  while (!feof(fin)) {
216  ch = fgetc(fin);
217 
218  if (ch == 13) continue;
219 
220  if ((ch == ' ') || (ch == '\t') || (ch == '\n')) {
221  if (a > 0) {
222  if (ch == '\n') ungetc(ch, fin);
223  break;
224  }
225 
226  if (ch == '\n') {
227  strcpy(word, const_cast<char *>("</s>"));
228  return;
229  } else {
230  continue;
231  }
232  }
233 
234  word[a] = ch;
235  a++;
236 
237  if (a >= MAX_STRING) {
238  // printf("Too long word found!\n"); //truncate too long words
239  a--;
240  }
241  }
242  word[a] = 0;
243 }
244 
245 int CRnnLM::getWordHash(const char *word) {
246  unsigned int hash, a;
247 
248  hash = 0;
249  for (a = 0; a < strlen(word); a++) {
250  hash = hash * 237 + word[a];
251  }
252  hash = hash % vocab_hash_size;
253 
254  return hash;
255 }
256 
257 int CRnnLM::searchVocab(const char *word) {
258  int a;
259  unsigned int hash;
260 
261  hash = getWordHash(word);
262 
263  if (vocab_hash[hash] == -1) return -1;
264  if (!strcmp(word, vocab[vocab_hash[hash]].word)) return vocab_hash[hash];
265 
266  for (a = 0; a < vocab_size; a++) { // search in vocabulary
267  if (!strcmp(word, vocab[a].word)) {
268  vocab_hash[hash] = a;
269  return a;
270  }
271  }
272 
273  return -1; // return OOV if not found
274 }
275 
277  int a, b, max;
279 
280  for (a = 1; a < vocab_size; a++) {
281  max = a;
282  for (b = a + 1; b < vocab_size; b++) {
283  if (vocab[max].cn < vocab[b].cn) max = b;
284  }
285 
286  swap = vocab[max];
287  vocab[max] = vocab[a];
288  vocab[a] = swap;
289  }
290 }
291 
292 void CRnnLM::saveWeights() { // saves current weights and unit activations
293  int a, b;
294 
295  for (a = 0; a < layer0_size; a++) {
296  neu0b[a].ac = neu0[a].ac;
297  neu0b[a].er = neu0[a].er;
298  }
299 
300  for (a = 0; a < layer1_size; a++) {
301  neu1b[a].ac = neu1[a].ac;
302  neu1b[a].er = neu1[a].er;
303  }
304 
305  for (a = 0; a < layerc_size; a++) {
306  neucb[a].ac = neuc[a].ac;
307  neucb[a].er = neuc[a].er;
308  }
309 
310  for (a = 0; a < layer2_size; a++) {
311  neu2b[a].ac = neu2[a].ac;
312  neu2b[a].er = neu2[a].er;
313  }
314 
315  for (b = 0; b < layer1_size; b++) {
316  for (a = 0; a < layer0_size; a++) {
317  syn0b[a + b * layer0_size].weight = syn0[a + b * layer0_size].weight;
318  }
319  }
320 
321  if (layerc_size > 0) {
322  for (b = 0; b < layerc_size; b++) {
323  for (a = 0; a < layer1_size; a++) {
324  syn1b[a + b * layer1_size].weight = syn1[a + b * layer1_size].weight;
325  }
326  }
327 
328  for (b = 0; b < layer2_size; b++) {
329  for (a = 0; a < layerc_size; a++) {
330  syncb[a + b * layerc_size].weight = sync[a + b * layerc_size].weight;
331  }
332  }
333  } else {
334  for (b = 0; b < layer2_size; b++) {
335  for (a = 0; a < layer1_size; a++) {
336  syn1b[a + b * layer1_size].weight = syn1[a + b * layer1_size].weight;
337  }
338  }
339  }
340 
341  // for (a = 0; a < direct_size; a++) syn_db[a].weight = syn_d[a].weight;
342 }
343 
345  int a, b, cl;
346 
349 
350  neu0 = (struct neuron *)calloc(layer0_size, sizeof(struct neuron));
351  neu1 = (struct neuron *)calloc(layer1_size, sizeof(struct neuron));
352  neuc = (struct neuron *)calloc(layerc_size, sizeof(struct neuron));
353  neu2 = (struct neuron *)calloc(layer2_size, sizeof(struct neuron));
354 
355  syn0 = (struct synapse *)calloc(layer0_size * layer1_size,
356  sizeof(struct synapse));
357  if (layerc_size == 0) {
358  syn1 = (struct synapse *)calloc(layer1_size * layer2_size,
359  sizeof(struct synapse));
360  } else {
361  syn1 = (struct synapse *)calloc(layer1_size * layerc_size,
362  sizeof(struct synapse));
363  sync = (struct synapse *)calloc(layerc_size * layer2_size,
364  sizeof(struct synapse));
365  }
366 
367  if (syn1 == NULL) {
368  printf("Memory allocation failed\n");
369  exit(1);
370  }
371 
372  if (layerc_size > 0)
373  if (sync == NULL) {
374  printf("Memory allocation failed\n");
375  exit(1);
376  }
377 
378  syn_d =
379  reinterpret_cast<direct_t *>(calloc(static_cast<long long>(direct_size),
380  sizeof(direct_t)));
381 
382  if (syn_d == NULL) {
383  printf("Memory allocation for direct"
384  " connections failed (requested %lld bytes)\n",
385  static_cast<long long>(direct_size) * static_cast<long long>(sizeof(direct_t)));
386  exit(1);
387  }
388 
389  neu0b = (struct neuron *)calloc(layer0_size, sizeof(struct neuron));
390  neu1b = (struct neuron *)calloc(layer1_size, sizeof(struct neuron));
391  neucb = (struct neuron *)calloc(layerc_size, sizeof(struct neuron));
392  neu1b2 = (struct neuron *)calloc(layer1_size, sizeof(struct neuron));
393  neu2b = (struct neuron *)calloc(layer2_size, sizeof(struct neuron));
394 
395  syn0b = (struct synapse *)calloc(layer0_size * layer1_size,
396  sizeof(struct synapse));
397  // syn1b = (struct synapse *)calloc(layer1_size*layer2_size,
398  // sizeof(struct synapse));
399  if (layerc_size == 0) {
400  syn1b = (struct synapse *)calloc(layer1_size * layer2_size,
401  sizeof(struct synapse));
402  } else {
403  syn1b = (struct synapse *)calloc(layer1_size * layerc_size,
404  sizeof(struct synapse));
405  syncb = (struct synapse *)calloc(layerc_size * layer2_size,
406  sizeof(struct synapse));
407  }
408 
409  if (syn1b == NULL) {
410  printf("Memory allocation failed\n");
411  exit(1);
412  }
413 
414  for (a = 0; a < layer0_size; a++) {
415  neu0[a].ac = 0;
416  neu0[a].er = 0;
417  }
418 
419  for (a = 0; a < layer1_size; a++) {
420  neu1[a].ac = 0;
421  neu1[a].er = 0;
422  }
423 
424  for (a = 0; a < layerc_size; a++) {
425  neuc[a].ac = 0;
426  neuc[a].er = 0;
427  }
428 
429  for (a = 0; a < layer2_size; a++) {
430  neu2[a].ac = 0;
431  neu2[a].er = 0;
432  }
433 
434  for (b = 0; b < layer1_size; b++) {
435  for (a = 0; a < layer0_size; a++) {
436  syn0[a + b * layer0_size].weight =
437  random(-0.1, 0.1) + random(-0.1, 0.1) + random(-0.1, 0.1);
438  }
439  }
440 
441  if (layerc_size > 0) {
442  for (b = 0; b < layerc_size; b++) {
443  for (a = 0; a < layer1_size; a++) {
444  syn1[a + b * layer1_size].weight =
445  random(-0.1, 0.1) + random(-0.1, 0.1) + random(-0.1, 0.1);
446  }
447  }
448 
449  for (b = 0; b < layer2_size; b++) {
450  for (a = 0; a < layerc_size; a++) {
451  sync[a + b * layerc_size].weight =
452  random(-0.1, 0.1) + random(-0.1, 0.1) + random(-0.1, 0.1);
453  }
454  }
455  } else {
456  for (b = 0; b < layer2_size; b++) {
457  for (a = 0; a < layer1_size; a++) {
458  syn1[a + b * layer1_size].weight =
459  random(-0.1, 0.1) + random(-0.1, 0.1) + random(-0.1, 0.1);
460  }
461  }
462  }
463 
464  long long aa;
465  for (aa = 0; aa < direct_size; aa++) {
466  syn_d[aa] = 0;
467  }
468 
469  if (bptt > 0) {
470  bptt_history = reinterpret_cast<int *>(calloc((bptt + bptt_block + 10),
471  sizeof(int)));
472  for (a = 0; a < bptt + bptt_block; a++) {
473  bptt_history[a] = -1;
474  }
475  bptt_hidden = reinterpret_cast<neuron *>(calloc(
476  (bptt + bptt_block + 1) * layer1_size, sizeof(neuron)));
477  for (a = 0; a < (bptt + bptt_block) * layer1_size; a++) {
478  bptt_hidden[a].ac = 0;
479  bptt_hidden[a].er = 0;
480  }
481  bptt_syn0 = (struct synapse *)calloc(layer0_size * layer1_size,
482  sizeof(struct synapse));
483  if (bptt_syn0 == NULL) {
484  printf("Memory allocation failed\n");
485  exit(1);
486  }
487  }
488 
489  saveWeights();
490 
491  double df, dd;
492  int i;
493 
494  df = 0;
495  dd = 0;
496  a = 0;
497  b = 0;
498 
499  if (old_classes) { // old classes
500  for (i = 0; i < vocab_size; i++) {
501  b += vocab[i].cn;
502  }
503  for (i = 0; i < vocab_size; i++) {
504  df += vocab[i].cn / static_cast<double>(b);
505  if (df > 1) df = 1;
506  if (df > (a + 1) / static_cast<double>(class_size)) {
507  vocab[i].class_index = a;
508  if (a < class_size - 1) a++;
509  } else {
510  vocab[i].class_index = a;
511  }
512  }
513  } else { // new classes
514  for (i = 0; i < vocab_size; i++) {
515  b += vocab[i].cn;
516  }
517  for (i = 0; i < vocab_size; i++) {
518  dd += sqrt(vocab[i].cn / static_cast<double>(b));
519  }
520  for (i = 0; i < vocab_size; i++) {
521  df += sqrt(vocab[i].cn / static_cast<double>(b)) / dd;
522  if (df > 1) df = 1;
523  if (df > (a + 1) / static_cast<double>(class_size)) {
524  vocab[i].class_index = a;
525  if (a < class_size - 1) a++;
526  } else {
527  vocab[i].class_index = a;
528  }
529  }
530  }
531 
532  // allocate auxiliary class variables (for faster search when
533  // normalizing probability at output layer)
534 
535  class_words = reinterpret_cast<int **>(calloc(class_size, sizeof(int *)));
536  class_cn = reinterpret_cast<int *>(calloc(class_size, sizeof(int)));
537  class_max_cn = reinterpret_cast<int *>(calloc(class_size, sizeof(int)));
538 
539  for (i = 0; i < class_size; i++) {
540  class_cn[i] = 0;
541  class_max_cn[i] = 10;
542  class_words[i] = reinterpret_cast<int *>(calloc(class_max_cn[i], sizeof(int)));
543  }
544 
545  for (i = 0; i < vocab_size; i++) {
546  cl = vocab[i].class_index;
547  class_words[cl][class_cn[cl]] = i;
548  class_cn[cl]++;
549  if (class_cn[cl] + 2 >= class_max_cn[cl]) {
550  class_max_cn[cl] += 10;
551  class_words[cl] = reinterpret_cast<int *>(realloc(class_words[cl],
552  class_max_cn[cl] * sizeof(int)));
553  }
554  }
555 }
556 
557 void CRnnLM::goToDelimiter(int delim, FILE *fi) {
558  int ch = 0;
559 
560  while (ch != delim) {
561  ch = fgetc(fi);
562  if (feof(fi)) {
563  printf("Unexpected end of file\n");
564  exit(1);
565  }
566  }
567 }
568 
569 void CRnnLM::restoreNet() { // will read whole network structure
570  FILE *fi;
571  int a, b, ver, unused_size;
572  float fl;
573  char str[MAX_STRING];
574  double d;
575 
576  fi = fopen(rnnlm_file, "rb");
577  if (fi == NULL) {
578  printf("ERROR: model file '%s' not found!\n", rnnlm_file);
579  exit(1);
580  }
581 
582  goToDelimiter(':', fi);
583  unused_size = fscanf(fi, "%d", &ver);
584  if ((ver == 4) && (version == 5)) {
585  /* we will solve this later.. */
586  } else {
587  if (ver != version) {
588  printf("Unknown version of file %s\n", rnnlm_file);
589  exit(1);
590  }
591  }
592  goToDelimiter(':', fi);
593  unused_size = fscanf(fi, "%d", &filetype);
594  goToDelimiter(':', fi);
595  if (train_file_set == 0) {
596  unused_size = fscanf(fi, "%s", train_file);
597  } else {
598  unused_size = fscanf(fi, "%s", str);
599  }
600  goToDelimiter(':', fi);
601  unused_size = fscanf(fi, "%s", valid_file);
602  goToDelimiter(':', fi);
603  unused_size = fscanf(fi, "%lf", &llogp);
604  goToDelimiter(':', fi);
605  unused_size = fscanf(fi, "%d", &iter);
606  goToDelimiter(':', fi);
607  unused_size = fscanf(fi, "%d", &train_cur_pos);
608  goToDelimiter(':', fi);
609  unused_size = fscanf(fi, "%lf", &logp);
610  goToDelimiter(':', fi);
611  unused_size = fscanf(fi, "%d", &anti_k);
612  goToDelimiter(':', fi);
613  unused_size = fscanf(fi, "%d", &train_words);
614  goToDelimiter(':', fi);
615  unused_size = fscanf(fi, "%d", &layer0_size);
616  goToDelimiter(':', fi);
617  unused_size = fscanf(fi, "%d", &layer1_size);
618  goToDelimiter(':', fi);
619  unused_size = fscanf(fi, "%d", &layerc_size);
620  goToDelimiter(':', fi);
621  unused_size = fscanf(fi, "%d", &layer2_size);
622  if (ver > 5) {
623  goToDelimiter(':', fi);
624  unused_size = fscanf(fi, "%lld", &direct_size);
625  }
626  if (ver > 6) {
627  goToDelimiter(':', fi);
628  unused_size = fscanf(fi, "%d", &direct_order);
629  }
630  goToDelimiter(':', fi);
631  unused_size = fscanf(fi, "%d", &bptt);
632  if (ver > 4) {
633  goToDelimiter(':', fi);
634  unused_size = fscanf(fi, "%d", &bptt_block);
635  } else {
636  bptt_block = 10;
637  }
638  goToDelimiter(':', fi);
639  unused_size = fscanf(fi, "%d", &vocab_size);
640  goToDelimiter(':', fi);
641  unused_size = fscanf(fi, "%d", &class_size);
642  goToDelimiter(':', fi);
643  unused_size = fscanf(fi, "%d", &old_classes);
644  goToDelimiter(':', fi);
645  unused_size = fscanf(fi, "%d", &independent);
646  goToDelimiter(':', fi);
647  unused_size = fscanf(fi, "%lf", &d);
648  starting_alpha = d;
649  goToDelimiter(':', fi);
650  if (alpha_set == 0) {
651  unused_size = fscanf(fi, "%lf", &d);
652  alpha = d;
653  } else {
654  unused_size = fscanf(fi, "%lf", &d);
655  }
656  goToDelimiter(':', fi);
657  unused_size = fscanf(fi, "%d", &alpha_divide);
658 
659  // read normal vocabulary
660  if (vocab_max_size < vocab_size) {
661  if (vocab != NULL) free(vocab);
662  vocab_max_size = vocab_size + 1000;
663  // initialize memory for vocabulary
664  vocab = (struct vocab_word *)calloc(vocab_max_size,
665  sizeof(struct vocab_word));
666  }
667  goToDelimiter(':', fi);
668  for (a = 0; a < vocab_size; a++) {
669  // unused_size = fscanf(fi, "%d%d%s%d", &b, &vocab[a].cn,
670  // vocab[a].word, &vocab[a].class_index);
671  unused_size = fscanf(fi, "%d%d", &b, &vocab[a].cn);
672  readWord(vocab[a].word, fi);
673  unused_size = fscanf(fi, "%d", &vocab[a].class_index);
674  // printf("%d %d %s %d\n", b, vocab[a].cn,
675  // vocab[a].word, vocab[a].class_index);
676  }
677  if (neu0 == NULL) initNet(); // memory allocation here
678 
679  if (filetype == TEXT) {
680  goToDelimiter(':', fi);
681  for (a = 0; a < layer1_size; a++) {
682  unused_size = fscanf(fi, "%lf", &d);
683  neu1[a].ac = d;
684  }
685  }
686  if (filetype == BINARY) {
687  fgetc(fi);
688  for (a = 0; a < layer1_size; a++) {
689  unused_size = fread(&fl, 4, 1, fi);
690  neu1[a].ac = fl;
691  }
692  }
693  if (filetype == TEXT) {
694  goToDelimiter(':', fi);
695  for (b = 0; b < layer1_size; b++) {
696  for (a = 0; a < layer0_size; a++) {
697  unused_size = fscanf(fi, "%lf", &d);
698  syn0[a + b * layer0_size].weight = d;
699  }
700  }
701  }
702  if (filetype == BINARY) {
703  for (b = 0; b < layer1_size; b++) {
704  for (a = 0; a < layer0_size; a++) {
705  unused_size = fread(&fl, 4, 1, fi);
706  syn0[a + b * layer0_size].weight = fl;
707  }
708  }
709  }
710  if (filetype == TEXT) {
711  goToDelimiter(':', fi);
712  if (layerc_size == 0) { // no compress layer
713  for (b = 0; b < layer2_size; b++) {
714  for (a = 0; a < layer1_size; a++) {
715  unused_size = fscanf(fi, "%lf", &d);
716  syn1[a + b * layer1_size].weight = d;
717  }
718  }
719  } else { // with compress layer
720  for (b = 0; b < layerc_size; b++) {
721  for (a = 0; a < layer1_size; a++) {
722  unused_size = fscanf(fi, "%lf", &d);
723  syn1[a + b * layer1_size].weight = d;
724  }
725  }
726 
727  goToDelimiter(':', fi);
728 
729  for (b = 0; b < layer2_size; b++) {
730  for (a = 0; a < layerc_size; a++) {
731  unused_size = fscanf(fi, "%lf", &d);
732  sync[a + b * layerc_size].weight = d;
733  }
734  }
735  }
736  }
737  if (filetype == BINARY) {
738  if (layerc_size == 0) { // no compress layer
739  for (b = 0; b < layer2_size; b++) {
740  for (a = 0; a < layer1_size; a++) {
741  unused_size = fread(&fl, 4, 1, fi);
742  syn1[a + b * layer1_size].weight = fl;
743  }
744  }
745  } else { // with compress layer
746  for (b = 0; b < layerc_size; b++) {
747  for (a = 0; a < layer1_size; a++) {
748  unused_size = fread(&fl, 4, 1, fi);
749  syn1[a + b * layer1_size].weight = fl;
750  }
751  }
752 
753  for (b = 0; b < layer2_size; b++) {
754  for (a = 0; a < layerc_size; a++) {
755  unused_size = fread(&fl, 4, 1, fi);
756  sync[a + b * layerc_size].weight = fl;
757  }
758  }
759  }
760  }
761  if (filetype == TEXT) {
762  goToDelimiter(':', fi); // direct connections
763  long long aa;
764  for (aa = 0; aa < direct_size; aa++) {
765  unused_size = fscanf(fi, "%lf", &d);
766  syn_d[aa] = d;
767  }
768  }
769  if (filetype == BINARY) {
770  long long aa;
771  for (aa = 0; aa < direct_size; aa++) {
772  unused_size = fread(&fl, 4, 1, fi);
773  syn_d[aa] = fl;
774 
775  /*unused_size = fread(&si, 2, 1, fi);
776  fl = si/(float)(4*256);
777  syn_d[aa] = fl;*/
778  }
779  }
780 
781  saveWeights();
782 
783  // idiom to "use" an unused variable
784  (void) unused_size;
785 
786  fclose(fi);
787 }
788 
789 void CRnnLM::netReset() { // cleans hidden layer activation + bptt history
790  int a, b;
791 
792  for (a = 0; a < layer1_size; a++) {
793  neu1[a].ac = 1.0;
794  }
795 
797 
798  if (bptt > 0) {
799  for (a = 1; a < bptt + bptt_block; a++) {
800  bptt_history[a] = 0;
801  }
802  for (a = bptt + bptt_block - 1; a > 1; a--) {
803  for (b = 0; b < layer1_size; b++) {
804  bptt_hidden[a * layer1_size + b].ac = 0;
805  bptt_hidden[a * layer1_size + b].er = 0;
806  }
807  }
808  }
809 
810  for (a = 0; a < MAX_NGRAM_ORDER; a++) {
811  history[a] = 0;
812  }
813 }
814 
815 void CRnnLM::matrixXvector(struct neuron *dest, struct neuron *srcvec,
816  struct synapse *srcmatrix, int matrix_width,
817  int from, int to, int from2, int to2, int type) {
818  int a, b;
819  real val1, val2, val3, val4;
820  real val5, val6, val7, val8;
821 
822  if (type == 0) { // ac mod
823  for (b = 0; b < (to - from) / 8; b++) {
824  val1 = 0;
825  val2 = 0;
826  val3 = 0;
827  val4 = 0;
828 
829  val5 = 0;
830  val6 = 0;
831  val7 = 0;
832  val8 = 0;
833 
834  for (a = from2; a < to2; a++) {
835  val1 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 0) * matrix_width].weight;
836  val2 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 1) * matrix_width].weight;
837  val3 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 2) * matrix_width].weight;
838  val4 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 3) * matrix_width].weight;
839 
840  val5 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 4) * matrix_width].weight;
841  val6 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 5) * matrix_width].weight;
842  val7 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 6) * matrix_width].weight;
843  val8 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 7) * matrix_width].weight;
844  }
845  dest[b * 8 + from + 0].ac += val1;
846  dest[b * 8 + from + 1].ac += val2;
847  dest[b * 8 + from + 2].ac += val3;
848  dest[b * 8 + from + 3].ac += val4;
849 
850  dest[b * 8 + from + 4].ac += val5;
851  dest[b * 8 + from + 5].ac += val6;
852  dest[b * 8 + from + 6].ac += val7;
853  dest[b * 8 + from + 7].ac += val8;
854  }
855 
856  for (b = b * 8; b < to - from; b++) {
857  for (a = from2; a < to2; a++) {
858  dest[b+from].ac +=
859  srcvec[a].ac * srcmatrix[a + (b + from) * matrix_width].weight;
860  }
861  }
862  } else { // er mod
863  for (a = 0; a < (to2 - from2) / 8; a++) {
864  val1 = 0;
865  val2 = 0;
866  val3 = 0;
867  val4 = 0;
868 
869  val5 = 0;
870  val6 = 0;
871  val7 = 0;
872  val8 = 0;
873 
874  for (b = from; b < to; b++) {
875  val1 += srcvec[b].er * srcmatrix[a * 8 + from2 + 0 + b * matrix_width].weight;
876  val2 += srcvec[b].er * srcmatrix[a * 8 + from2 + 1 + b * matrix_width].weight;
877  val3 += srcvec[b].er * srcmatrix[a * 8 + from2 + 2 + b * matrix_width].weight;
878  val4 += srcvec[b].er * srcmatrix[a * 8 + from2 + 3 + b * matrix_width].weight;
879 
880  val5 += srcvec[b].er * srcmatrix[a * 8 + from2 + 4 + b * matrix_width].weight;
881  val6 += srcvec[b].er * srcmatrix[a * 8 + from2 + 5 + b * matrix_width].weight;
882  val7 += srcvec[b].er * srcmatrix[a * 8 + from2 + 6 + b * matrix_width].weight;
883  val8 += srcvec[b].er * srcmatrix[a * 8 + from2 + 7 + b * matrix_width].weight;
884  }
885  dest[a * 8 + from2 + 0].er += val1;
886  dest[a * 8 + from2 + 1].er += val2;
887  dest[a * 8 + from2 + 2].er += val3;
888  dest[a * 8 + from2 + 3].er += val4;
889 
890  dest[a * 8 + from2 + 4].er += val5;
891  dest[a * 8 + from2 + 5].er += val6;
892  dest[a * 8 + from2 + 6].er += val7;
893  dest[a * 8 + from2 + 7].er += val8;
894  }
895 
896  for (a = a * 8; a < to2 - from2; a++) {
897  for (b = from; b < to; b++) {
898  dest[a + from2].er
899  += srcvec[b].er * srcmatrix[a + from2 + b * matrix_width].weight;
900  }
901  }
902 
903  if (gradient_cutoff > 0)
904  for (a = from2; a < to2; a++) {
905  if (dest[a].er > gradient_cutoff) dest[a].er = gradient_cutoff;
906  if (dest[a].er < -gradient_cutoff) dest[a].er = -gradient_cutoff;
907  }
908  }
909 
910  // this is normal implementation (about 3x slower):
911 
912  /*if (type == 0) { //ac mod
913  for (b = from; b < to; b++) {
914  for (a = from2; a < to2; a++) {
915  dest[b].ac += srcvec[a].ac * srcmatrix[a+b*matrix_width].weight;
916  }
917  }
918  }
919  else //er mod
920  if (type == 1) {
921  for (a = from2; a < to2; a++) {
922  for (b = from; b < to; b++) {
923  dest[a].er += srcvec[b].er * srcmatrix[a+b*matrix_width].weight;
924  }
925  }
926  }*/
927 }
928 
929 void CRnnLM::computeNet(int last_word, int word) {
930  int a, b, c;
931  real val;
932  double sum; // sum is used for normalization: it's better to have larger
933  // precision as many numbers are summed together here
934 
935  if (last_word != -1) neu0[last_word].ac = 1;
936 
937  // propagate 0->1
938  for (a = 0; a < layer1_size; a++) {
939  neu1[a].ac = 0;
940  }
941  for (a = 0; a < layerc_size; a++) {
942  neuc[a].ac = 0;
943  }
944 
945  matrixXvector(neu1, neu0, syn0, layer0_size, 0, layer1_size,
946  layer0_size - layer1_size, layer0_size, 0);
947 
948  for (b = 0; b < layer1_size; b++) {
949  a = last_word;
950  if (a != -1) neu1[b].ac += neu0[a].ac * syn0[a + b * layer0_size].weight;
951  }
952 
953  // activate 1 --sigmoid
954  for (a = 0; a < layer1_size; a++) {
955  if (neu1[a].ac > 50) neu1[a].ac = 50; // for numerical stability
956  if (neu1[a].ac < -50) neu1[a].ac = -50; // for numerical stability
957  val = -neu1[a].ac;
958  neu1[a].ac = 1 / (1 + FAST_EXP(val));
959  }
960 
961  if (layerc_size > 0) {
962  matrixXvector(neuc, neu1, syn1, layer1_size,
963  0, layerc_size, 0, layer1_size, 0);
964  // activate compression --sigmoid
965  for (a = 0; a < layerc_size; a++) {
966  if (neuc[a].ac > 50) neuc[a].ac = 50; // for numerical stability
967  if (neuc[a].ac < -50) neuc[a].ac = -50; // for numerical stability
968  val = -neuc[a].ac;
969  neuc[a].ac = 1 / (1 + FAST_EXP(val));
970  }
971  }
972 
973  // 1->2 class
974  for (b = vocab_size; b < layer2_size; b++) {
975  neu2[b].ac = 0;
976  }
977 
978  if (layerc_size > 0) {
979  matrixXvector(neu2, neuc, sync, layerc_size,
980  vocab_size, layer2_size, 0, layerc_size, 0);
981  } else {
982  matrixXvector(neu2, neu1, syn1, layer1_size,
983  vocab_size, layer2_size, 0, layer1_size, 0);
984  }
985 
986  // apply direct connections to classes
987  if (direct_size > 0) {
988  unsigned long long hash[MAX_NGRAM_ORDER];
989  // this will hold pointers to syn_d that contains hash parameters
990 
991  for (a = 0; a < direct_order; a++) {
992  hash[a] = 0;
993  }
994 
995  for (a = 0; a < direct_order; a++) {
996  b = 0;
997  if (a > 0) if (history[a - 1] == -1) break;
998  // if OOV was in history, do not use this N-gram feature and higher orders
999  hash[a] = PRIMES[0] * PRIMES[1];
1000 
1001  for (b = 1; b <= a; b++) {
1002  hash[a] += PRIMES[(a * PRIMES[b] + b) % PRIMES_SIZE]
1003  * static_cast<unsigned long long>(history[b - 1] + 1);
1004  }
1005  // update hash value based on words from the history
1006 
1007  hash[a] = hash[a] % (direct_size / 2);
1008  // make sure that starting hash index is in the first
1009  // half of syn_d (second part is reserved for history->words features)
1010  }
1011 
1012  for (a = vocab_size; a < layer2_size; a++) {
1013  for (b = 0; b < direct_order; b++) {
1014  if (hash[b]) {
1015  neu2[a].ac += syn_d[hash[b]];
1016  // apply current parameter and move to the next one
1017 
1018  hash[b]++;
1019  } else {
1020  break;
1021  }
1022  }
1023  }
1024  }
1025 
1026  // activation 2 --softmax on classes
1027  sum = 0;
1028  for (a = vocab_size; a < layer2_size; a++) {
1029  if (neu2[a].ac > 50) neu2[a].ac = 50; // for numerical stability
1030  if (neu2[a].ac < -50) neu2[a].ac = -50; // for numerical stability
1031  val = FAST_EXP(neu2[a].ac);
1032  sum+= val;
1033  neu2[a].ac = val;
1034  }
1035  for (a = vocab_size; a < layer2_size; a++) {
1036  neu2[a].ac /= sum;
1037  }
1038  // output layer activations now sum exactly to 1
1039 
1040  if (gen > 0) return; // if we generate words, we don't know what current word
1041  // is -> only classes are estimated and word is selected
1042  // in testGen()
1043 
1044 
1045  // 1->2 word
1046  if (word != -1) {
1047  for (c = 0; c < class_cn[vocab[word].class_index]; c++) {
1048  neu2[class_words[vocab[word].class_index][c]].ac = 0;
1049  }
1050  if (layerc_size > 0) {
1051  matrixXvector(neu2, neuc, sync, layerc_size,
1052  class_words[vocab[word].class_index][0],
1053  class_words[vocab[word].class_index][0]
1054  + class_cn[vocab[word].class_index],
1055  0, layerc_size, 0);
1056  } else {
1057  matrixXvector(neu2, neu1, syn1, layer1_size,
1058  class_words[vocab[word].class_index][0],
1059  class_words[vocab[word].class_index][0]
1060  + class_cn[vocab[word].class_index],
1061  0, layer1_size, 0);
1062  }
1063  }
1064 
1065  // apply direct connections to words
1066  if (word != -1) if (direct_size > 0) {
1067  unsigned long long hash[MAX_NGRAM_ORDER];
1068 
1069  for (a = 0; a < direct_order; a++) {
1070  hash[a] = 0;
1071  }
1072 
1073  for (a = 0; a < direct_order; a++) {
1074  b = 0;
1075  if (a > 0) if (history[a - 1] == -1) break;
1076  hash[a] =
1077  PRIMES[0] * PRIMES[1] *
1078  static_cast<unsigned long long>(vocab[word].class_index + 1);
1079 
1080  for (b = 1; b <= a; b++) {
1081  hash[a] += PRIMES[(a * PRIMES[b] + b) % PRIMES_SIZE]
1082  * static_cast<unsigned long long>(history[b - 1] + 1);
1083  }
1084  hash[a] = (hash[a] % (direct_size / 2)) + (direct_size) / 2;
1085  }
1086 
1087  for (c = 0; c < class_cn[vocab[word].class_index]; c++) {
1088  a = class_words[vocab[word].class_index][c];
1089 
1090  for (b = 0; b < direct_order; b++) if (hash[b]) {
1091  neu2[a].ac += syn_d[hash[b]];
1092  hash[b]++;
1093  hash[b] = hash[b] % direct_size;
1094  } else {
1095  break;
1096  }
1097  }
1098  }
1099 
1100  // activation 2 --softmax on words
1101  sum = 0;
1102  if (word != -1) {
1103  for (c = 0; c < class_cn[vocab[word].class_index]; c++) {
1104  a = class_words[vocab[word].class_index][c];
1105  if (neu2[a].ac > 50) neu2[a].ac = 50; // for numerical stability
1106  if (neu2[a].ac < -50) neu2[a].ac = -50; // for numerical stability
1107  val = FAST_EXP(neu2[a].ac);
1108  sum+= val;
1109  neu2[a].ac = val;
1110  }
1111  for (c = 0; c < class_cn[vocab[word].class_index]; c++) {
1112  neu2[class_words[vocab[word].class_index][c]].ac /= sum;
1113  }
1114  }
1115 }
1116 
1118  int a;
1119 
1120  for (a = 0; a < layer1_size; a++) {
1121  neu0[a + layer0_size - layer1_size].ac = neu1[a].ac;
1122  }
1123 }
1124 
1125 void CRnnLM::restoreContextFromVector(const std::vector <float> &context_in) {
1126  assert(context_in.size() == layer1_size);
1127  for (int i = 0; i < layer1_size; ++i) {
1128  neu1[i].ac = context_in[i];
1129  }
1130 }
1131 
1132 void CRnnLM::saveContextToVector(std::vector <float> *context_out) {
1133  assert(context_out != NULL);
1134  context_out->resize(layer1_size);
1135  for (int i = 0; i < layer1_size; ++i) {
1136  (*context_out)[i] = neu1[i].ac;
1137  }
1138 }
1139 
1141  std::string current_word,
1142  const std::vector < std::string > &history_words,
1143  const std::vector < float > &context_in,
1144  std::vector < float > *context_out) {
1145  // We assume the network has been restored.
1146  netReset();
1147  restoreContextFromVector(context_in);
1149 
1150  // Maps unk to the unk symbol.
1151  std::vector <std::string> history_words_nounk(history_words);
1152  std::string current_word_nounk = current_word;
1153  if (isUnk(current_word_nounk)) {
1154  current_word_nounk = unk_sym;
1155  }
1156  for (int i = 0; i < history_words_nounk.size(); ++i) {
1157  if (isUnk(history_words_nounk[i])) {
1158  history_words_nounk[i] = unk_sym;
1159  }
1160  }
1161 
1162  // Handles history for n-gram features.
1163  for (int i = 0; i < MAX_NGRAM_ORDER; i++) {
1164  history[i] = 0;
1165  }
1166  for (int i = 0; i < history_words_nounk.size() && i < MAX_NGRAM_ORDER; i++) {
1167  history[i] = searchVocab(
1168  history_words_nounk[history_words_nounk.size() - 1 - i].c_str());
1169  }
1170 
1171  int word = 0, last_word = 0;
1172  float logprob = 0;
1173  if (current_word_nounk == unk_sym) {
1174  logprob += getUnkPenalty(current_word);
1175  }
1176  word = searchVocab(current_word_nounk.c_str());
1177  if (history_words_nounk.size() > 0) {
1178  last_word = searchVocab(
1179  history_words_nounk[history_words_nounk.size() - 1].c_str());
1180  }
1181  computeNet(last_word, word);
1182 
1183  if (word != -1) {
1184  logprob +=
1185  log(neu2[vocab[word].class_index + vocab_size].ac * neu2[word].ac);
1186  } else {
1187  logprob += -16.118;
1188  }
1189 
1190  if (context_out != NULL) {
1191  saveContextToVector(context_out);
1192  }
1193 
1194  if (last_word != -1) {
1195  neu0[last_word].ac = 0;
1196  }
1197 
1198  return logprob;
1199 }
1200 
1201 bool CRnnLM::isUnk(const std::string &word) {
1202  int word_int = searchVocab(word.c_str());
1203  if (word_int == -1)
1204  return true;
1205  return false;
1206 }
1207 
1208 void CRnnLM::setUnkSym(const std::string &unk) {
1209  unk_sym = unk;
1210 }
1211 
1212 float CRnnLM::getUnkPenalty(const std::string &word) {
1213  unordered_map <std::string, float>::const_iterator iter =
1214  unk_penalty.find(word);
1215  if (iter != unk_penalty.end())
1216  return iter->second;
1217  return -16.118; // Fixed penalty.
1218 }
1219 
1220 void CRnnLM::setUnkPenalty(const std::string &filename) {
1221  if (filename.empty())
1222  return;
1223  kaldi::SequentialBaseFloatReader unk_reader(filename);
1224  for (; !unk_reader.Done(); unk_reader.Next()) {
1225  std::string key = unk_reader.Key();
1226  float prob = unk_reader.Value();
1227  unk_reader.FreeCurrent();
1228  unk_penalty[key] = log(prob);
1229  }
1230 }
1231 
1232 } // namespace rnnlm
double real
int history[MAX_NGRAM_ORDER]
struct synapse * syn0b
struct synapse * syn0
struct synapse * syn1
struct neuron * neu2b
int getWordHash(const char *word)
void setUnkSym(const std::string &unk)
unordered_map< std::string, float > unk_penalty
float logprob
void restoreContextFromVector(const std::vector< float > &context_in)
#define FAST_EXP(y)
const int MAX_NGRAM_ORDER
double direct_t
void swap(basic_filebuf< CharT, Traits > &x, basic_filebuf< CharT, Traits > &y)
long long direct_size
void goToDelimiter(int delim, FILE *fi)
float computeConditionalLogprob(std::string current_word, const std::vector< std::string > &history_words, const std::vector< float > &context_in, std::vector< float > *context_out)
float getUnkPenalty(const std::string &word)
struct synapse * sync
struct synapse * bptt_syn0
struct rnnlm::@11::@12 n
char test_file[MAX_FILENAME_STRING]
char valid_file[MAX_FILENAME_STRING]
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
bool isUnk(const std::string &word)
struct neuron * neu1b2
static union rnnlm::@11 d2i
struct vocab_word * vocab
struct neuron * neu1
real random(real min, real max)
struct synapse * syncb
void setRnnLMFile(const std::string &str)
struct neuron * neu1b
void setRandSeed(int newSeed)
void setUnkPenalty(const std::string &filename)
struct synapse * syn1b
int searchVocab(const char *word)
void matrixXvector(struct neuron *dest, struct neuron *srcvec, struct synapse *srcmatrix, int matrix_width, int from, int to, int from2, int to2, int type)
void saveContextToVector(std::vector< float > *context_out)
struct neuron * neu0
#define MAX_STRING
struct neuron * neu0b
const unsigned int PRIMES_SIZE
char train_file[MAX_FILENAME_STRING]
const unsigned int PRIMES[]
struct neuron * neuc
std::string unk_sym
void computeNet(int last_word, int word)
void readWord(char *word, FILE *fin)
struct neuron * neu2
struct neuron * neucb
char rnnlm_file[MAX_FILENAME_STRING]