74 std::vector<std::vector<bool> > *is_computable) {
80 std::ostringstream graph_pretty;
82 KALDI_VLOG(4) <<
"Graph is " << graph_pretty.str();
97 int32 *right_context) {
99 int32 input_end = input_start + window_size;
101 input.
name =
"input";
103 output.
name =
"output";
105 ivector.
name =
"ivector";
110 for (
int32 t = input_start; t < input_end; t++) {
120 for (
int32 t = input_start - nnet.
Modulus(); t < input_end; t++) {
125 request.
inputs.push_back(input);
126 request.
outputs.push_back(output);
128 request.
inputs.push_back(ivector);
129 std::vector<std::vector<bool> > computable;
133 std::vector<bool> &output_ok = computable[0];
134 std::vector<bool>::iterator iter =
135 std::find(output_ok.begin(), output_ok.end(),
true);
136 int32 first_ok = iter - output_ok.begin();
137 int32 first_not_ok = std::find(iter, output_ok.end(),
false) -
139 if (first_ok == window_size || first_not_ok <= first_ok)
141 *left_context = first_ok;
142 *right_context = window_size - first_not_ok;
148 int32 *right_context) {
157 std::vector<int32> left_contexts(modulus + 1);
158 std::vector<int32> right_contexts(modulus + 1);
164 int32 window_size = 40, max_window_size = 800;
166 while (window_size < max_window_size) {
171 for (input_start = 0; input_start <= modulus; input_start++) {
173 &(left_contexts[input_start]),
174 &(right_contexts[input_start])))
177 if (input_start <= modulus) {
185 KALDI_ASSERT(left_contexts[0] == left_contexts[modulus] &&
186 "nnet does not have the properties we expect.");
187 KALDI_ASSERT(right_contexts[0] == right_contexts[modulus] &&
188 "nnet does not have the properties we expect.");
190 *std::max_element(left_contexts.begin(), left_contexts.end());
192 *std::max_element(right_contexts.begin(), right_contexts.end());
196 KALDI_ERR <<
"Failure in ComputeSimpleNnetContext (perhaps not a simple nnet?)";
215 int32 updatable_c = 0;
222 *u_comp2 = dynamic_cast<const UpdatableComponent*>(comp2);
233 std::ostringstream os;
236 int32 updatable_c = 0;
241 os << component_name <<
':' << vec(updatable_c) <<
' ';
260 *u_comp2 = dynamic_cast<const UpdatableComponent*>(comp2);
285 KALDI_ERR <<
"Updatable component does not inherit from class " 286 "UpdatableComponent; change this code.";
306 if (dynamic_cast<StatisticsPoolingComponent*>(comp) != NULL)
313 if (scale == 1.0)
return;
325 KALDI_ERR <<
"Trying to add incompatible nnets.";
337 if (src_uc == NULL || dest_uc == NULL)
338 KALDI_ERR <<
"Updatable component does not inherit from class " 339 "UpdatableComponent; change this code.";
341 dest_uc->
Add(alphas(i++), *src_uc);
343 dest_comp->
Add(scale, *src_comp);
351 KALDI_ERR <<
"Trying to add incompatible nnets.";
355 dest_comp->
Add(alpha, *src_comp);
369 KALDI_ERR <<
"Updatable component does not inherit from class " 370 "UpdatableComponent; change this code.";
381 int32 dim_offset = 0;
390 KALDI_ERR <<
"Updatable component does not inherit from class " 391 "UpdatableComponent; change this code.";
395 dim_offset += this_dim;
404 int32 dim_offset = 0;
412 KALDI_ERR <<
"Updatable component does not inherit from class " 413 "UpdatableComponent; change this code.";
417 dim_offset += this_dim;
440 KALDI_ERR <<
"Updatable component does not inherit from class " 441 "UpdatableComponent; change this code.";
451 "Nesting CompositeComponent within CompositeComponent is not allowed.\n" 452 "(We may change this as more complicated components are introduced.)");
454 if(c->
Type() ==
"RepeatedAffineComponent" ||
455 c->
Type() ==
"NaturalGradientRepeatedAffineComponent") {
471 if(const_c->
Type() ==
"RepeatedAffineComponent" ||
472 const_c->
Type() ==
"NaturalGradientRepeatedAffineComponent") {
481 }
else if (const_c->
Type() ==
"CompositeComponent") {
493 std::ostringstream ostr;
495 int32 left_context, right_context;
498 ostr <<
"left-context: " << left_context <<
"\n";
499 ostr <<
"right-context: " << right_context <<
"\n";
501 ostr <<
"input-dim: " << nnet.
InputDim(
"input") <<
"\n";
502 ostr <<
"ivector-dim: " << nnet.
InputDim(
"ivector") <<
"\n";
503 ostr <<
"output-dim: " << nnet.
OutputDim(
"output") <<
"\n";
504 ostr <<
"# Nnet info follows.\n";
530 if (dynamic_cast<const BatchNormComponent*>(comp) != NULL)
538 KALDI_ASSERT(batchnorm_stats_scale >= 0.0 && batchnorm_stats_scale <= 1.0);
539 if (batchnorm_stats_scale == 1.0)
545 bc->
Scale(batchnorm_stats_scale);
551 KALDI_LOG <<
"Recomputing stats on nnet (affects batch-norm)";
556 for (
size_t i = 0;
i < egs.size();
i++)
593 std::vector<bool> is_used(num_components,
false);
594 for (
int32 i = 0;
i < num_nodes;
i++) {
602 for (
int32 i = 0;
i < num_components;
i++)
604 components->push_back(i);
609 std::vector<std::vector<int32> > depend_on_graph, dependency_graph;
618 assert(num_nodes == static_cast<int32>(dependency_graph.size()));
619 std::vector<bool> node_is_required(num_nodes,
false);
620 std::vector<int32> queue;
621 for (
int32 i = 0;
i < num_nodes;
i++) {
625 while (!queue.empty()) {
628 if (!node_is_required[i]) {
629 node_is_required[
i] =
true;
630 for (
size_t j = 0;
j < dependency_graph[
i].size();
j++)
631 queue.push_back(dependency_graph[i][
j]);
635 for (
int32 i = 0;
i < num_nodes;
i++) {
636 if (!node_is_required[
i])
666 int32 bottleneck_dim,
689 for (
int32 c = 0; c < num_components; c++) {
695 if (affine == NULL) {
696 KALDI_WARN <<
"Not decomposing component " << component_name
697 <<
" as it is not an AffineComponent.";
703 KALDI_WARN <<
"Not decomposing component " << component_name
705 <<
" because its dimension is " << input_dim
706 <<
" -> " << output_dim;
709 Component *component_a = NULL, *component_b = NULL;
720 KALDI_ERR <<
"Neural network already has a component named " 723 KALDI_ERR <<
"Neural network already has a component named " 733 <<
" components to FixedAffineComponent.";
749 for (i = lower; i <= upper; i++) {
750 sum = sum + input_vector(i);
751 if (sum >= min_val)
break;
772 int32 middle_dim = std::min<int32>(input_dim, output_dim);
777 B(output_dim, middle_dim);
778 linear_params.Svd(&s, &B, &A);
791 BaseFloat s2_sum_reduced = this_part.Sum();
794 /
static_cast<BaseFloat>(input_dim * output_dim);
796 KALDI_LOG <<
"Shrinkage ratio " << shrinkage_ratio
798 <<
" Skipping SVD for this layer.";
805 KALDI_LOG <<
"For component " << component_name
806 <<
" singular value squared sum changed by " 807 << (s2_sum_orig - s2_sum_reduced)
808 <<
" (from " << s2_sum_orig <<
" to " << s2_sum_reduced <<
")";
809 KALDI_LOG <<
"For component " << component_name
810 <<
" dimension reduced from " 811 <<
" (" << input_dim <<
"," << output_dim <<
")" 814 KALDI_LOG <<
"shrinkage ratio : " << shrinkage_ratio;
831 *component_a_out = component_a;
832 *component_b_out = component_b;
848 std::set<int32> nodes_to_modify;
850 node_names_modified = node_names_orig;
858 if (modification_index >= 0) {
861 nodes_to_modify.insert(
n);
862 std::string node_name = node_names_orig[
n],
863 node_name_b = node_name +
"_b";
864 node_names_modified[
n] = node_name_b;
872 std::ostringstream config_os;
887 std::string node_name = node_names_orig[
n];
891 std::string node_name_a = node_name +
"_a",
892 node_name_b = node_name +
"_b";
895 config_os <<
"component-node name=" << node_name_a <<
" component=" 899 config_os <<
"component-node name=" << node_name_b <<
" component=" 910 config_os <<
"output-node name=" << node_name <<
" input=";
913 "linear" :
"quadratic");
916 config_os <<
"component-node name=" << node_name <<
" component=" 920 node_names_modified);
923 config_os <<
"dim-range-node name=" << node_name <<
" input-node=" 926 <<
" dim=" << node.
dim;
934 std::istringstream config_is(config_os.str());
1006 bool floating_scale = (scale < 0.0);
1009 if (floating_scale) {
1035 scale = std::sqrt(trace_P_P / trace_P);
1050 update_speed *= 0.5;
1051 if (ratio > 1.1) update_speed *= 0.5;
1080 KALDI_VLOG(2) <<
"Error in orthogonality is " << error;
1087 BaseFloat alpha = update_speed / (scale * scale);
1099 M->
AddMat(1.0, M_update);
1118 params = &(lc->
Params());
1130 if (orthonormal_constraint == 0.0 ||
RandInt(0, 3) != 0) {
1149 if (CuDevice::Instantiate().Enabled()) {
1151 if (print_memory_info) {
1152 KALDI_VLOG(1) <<
"Consolidating memory; will print memory usage before " 1153 "and after consolidating:";
1154 g_cuda_allocator.PrintMemoryUsage();
1160 if (print_memory_info) {
1161 g_cuda_allocator.PrintMemoryUsage();
1175 int32 num_components_changed = 0;
1180 component_name_pattern.c_str())) {
1182 if (affine == NULL) {
1183 KALDI_WARN <<
"Not reducing rank of component " << component_name
1184 <<
" as it is not an AffineComponent.";
1189 if (input_dim <= rank || output_dim <= rank) {
1190 KALDI_WARN <<
"Not reducing rank of component " << component_name
1191 <<
" with SVD to rank " << rank
1192 <<
" because its dimension is " << input_dim
1193 <<
" -> " << output_dim;
1200 int32 middle_dim = std::min<int32>(input_dim, output_dim);
1203 Vt(middle_dim, input_dim);
1204 linear_params.Svd(&s, &U, &Vt);
1212 KALDI_LOG <<
"For component " << component_name
1213 <<
" singular value sum changed by reduce-rank command " 1214 << (s_sum_orig - s_sum_reduced)
1215 <<
" (from " << s_sum_orig <<
" to " << s_sum_reduced <<
")";
1220 linear_params_reduced_rank_cuda.
Swap(&linear_params_reduced_rank);
1222 bias_params_cuda.
Swap(&bias_params);
1223 affine->
SetParams(bias_params_cuda, linear_params_reduced_rank_cuda);
1224 num_components_changed++;
1227 KALDI_LOG <<
"Reduced rank of parameters of " << num_components_changed
1235 std::vector<std::string> lines;
1238 std::vector<ConfigLine> config_lines;
1240 for (
size_t i = 0;
i < config_lines.size();
i++) {
1242 const std::string &directive = config_lines[
i].
FirstToken();
1243 if (directive ==
"convert-to-fixed-affine") {
1244 std::string name_pattern =
"*";
1247 config_line.
GetValue(
"name", &name_pattern);
1248 int32 num_components_changed = 0;
1253 name_pattern.c_str()) &&
1254 (affine = dynamic_cast<AffineComponent*>(component))) {
1256 num_components_changed++;
1259 KALDI_LOG <<
"Converted " << num_components_changed
1260 <<
" components to FixedAffineComponent.";
1261 }
else if (directive ==
"remove-orphan-nodes") {
1262 bool remove_orphan_inputs =
false;
1263 config_line.
GetValue(
"remove-orphan-inputs", &remove_orphan_inputs);
1265 }
else if (directive ==
"remove-orphan-components") {
1267 }
else if (directive ==
"remove-orphans") {
1268 bool remove_orphan_inputs =
false;
1269 config_line.
GetValue(
"remove-orphan-inputs", &remove_orphan_inputs);
1272 }
else if (directive ==
"set-learning-rate") {
1273 std::string name_pattern =
"*";
1276 config_line.
GetValue(
"name", &name_pattern);
1278 if (!config_line.
GetValue(
"learning-rate", &learning_rate)) {
1279 KALDI_ERR <<
"In edits-config, expected learning-rate to be set in line: " 1288 int32 num_learning_rates_set = 0;
1291 name_pattern.c_str()) &&
1293 dynamic_cast<UpdatableComponent*>(nnet->
GetComponent(c)))) {
1295 num_learning_rates_set++;
1298 KALDI_LOG <<
"Set learning rates for " << num_learning_rates_set <<
" components.";
1299 }
else if (directive ==
"set-learning-rate-factor") {
1300 std::string name_pattern =
"*";
1302 config_line.
GetValue(
"name", &name_pattern);
1304 if (!config_line.
GetValue(
"learning-rate-factor", &learning_rate_factor)) {
1305 KALDI_ERR <<
"In edits-config, expected learning-rate-factor to be set in line: " 1313 int32 num_learning_rate_factors_set = 0;
1316 name_pattern.c_str()) &&
1318 dynamic_cast<UpdatableComponent*>(nnet->
GetComponent(c)))) {
1320 num_learning_rate_factors_set++;
1323 KALDI_LOG <<
"Set learning rate factors for " << num_learning_rate_factors_set
1325 }
else if (directive ==
"rename-node") {
1328 std::string old_name, new_name;
1329 if (!config_line.
GetValue(
"old-name", &old_name) ||
1330 !config_line.
GetValue(
"new-name", &new_name) ||
1332 KALDI_ERR <<
"In edits-config, could not make sense of this rename-node " 1333 <<
"directive (expect old-name=xxx new-name=xxx) " 1337 KALDI_ERR <<
"Could not rename node from " << old_name <<
" to " 1338 << new_name <<
" because there is no node called " 1342 }
else if (directive ==
"remove-output-nodes") {
1344 std::string name_pattern;
1345 if (!config_line.
GetValue(
"name", &name_pattern) ||
1347 KALDI_ERR <<
"In edits-config, could not make sense of " 1348 <<
"remove-output-nodes directive: " 1350 std::vector<int32> nodes_to_remove;
1351 int32 outputs_remaining = 0;
1355 name_pattern.c_str()))
1356 nodes_to_remove.push_back(
n);
1358 outputs_remaining++;
1361 KALDI_LOG <<
"Removing " << nodes_to_remove.size() <<
" output nodes.";
1362 if (outputs_remaining == 0)
1363 KALDI_ERR <<
"All outputs were removed.";
1365 }
else if (directive ==
"set-dropout-proportion") {
1366 std::string name_pattern =
"*";
1369 config_line.
GetValue(
"name", &name_pattern);
1371 if (!config_line.
GetValue(
"proportion", &proportion)) {
1372 KALDI_ERR <<
"In edits-config, expected proportion to be set in line: " 1375 int32 num_dropout_proportions_set = 0;
1378 name_pattern.c_str())) {
1385 if (dropout_component != NULL) {
1387 num_dropout_proportions_set++;
1388 }
else if (mask_component != NULL){
1390 num_dropout_proportions_set++;
1391 }
else if (general_dropout_component != NULL){
1393 num_dropout_proportions_set++;
1397 KALDI_LOG <<
"Set dropout proportions for " 1398 << num_dropout_proportions_set <<
" components.";
1399 }
else if (directive ==
"apply-svd") {
1400 std::string name_pattern;
1401 int32 bottleneck_dim = -1;
1404 config_line.
GetValue(
"bottleneck-dim", &bottleneck_dim);
1405 config_line.
GetValue(
"energy-threshold", &energy_threshold);
1406 config_line.
GetValue(
"shrinkage-threshold", &shrinkage_threshold);
1407 if (!config_line.
GetValue(
"name", &name_pattern))
1408 KALDI_ERR <<
"Edit directive apply-svd requires 'name' to be specified.";
1409 if (bottleneck_dim <= 0 && energy_threshold <=0)
1410 KALDI_ERR <<
"Either Bottleneck-dim or energy-threshold " 1411 "must be set in apply-svd command. " 1412 "Range of possible values is (0 1]";
1413 SvdApplier applier(name_pattern, bottleneck_dim,
1415 shrinkage_threshold,
1418 }
else if (directive ==
"reduce-rank") {
1419 std::string name_pattern;
1421 if (!config_line.
GetValue(
"name", &name_pattern) ||
1422 !config_line.
GetValue(
"rank", &rank))
1423 KALDI_ERR <<
"Edit directive reduce-rank requires 'name' and " 1424 "'rank' to be specified.";
1426 KALDI_ERR <<
"Rank must be positive in reduce-rank command.";
1429 KALDI_ERR <<
"Directive '" << directive <<
"' is not currently " 1430 "supported (reading edit-config).";
1434 <<
"' in edit config line " << config_line.
WholeLine();
1442 std::vector<std::vector<int32> > graph;
1451 config_(config),
nnet_(nnet) { }
1453 bool changed =
true;
1457 for (; changed; num_iters++) {
1459 for (
int32 n = 0;
n < num_nodes;
n++)
1460 if (OptimizeNode(
n))
1463 if (num_iters >= 10)
1464 KALDI_ERR <<
"Something went wrong collapsing model.";
1470 if (num_components2 != num_components1 ||
1471 num_components3 != num_components2)
1472 KALDI_LOG <<
"Added " << (num_components2 - num_components1)
1473 <<
" components, removed " 1474 << (num_components2 - num_components3);
1494 int32 component_index2) {
1496 if (config_.collapse_dropout &&
1497 (ans = CollapseComponentsDropout(component_index1,
1498 component_index2)) != -1)
1500 if (config_.collapse_batchnorm &&
1501 (ans = CollapseComponentsBatchnorm(component_index1,
1502 component_index2)) != -1)
1504 if (config_.collapse_affine &&
1505 (ans = CollapseComponentsAffine(component_index1,
1506 component_index2)) != -1)
1508 if (config_.collapse_scale &&
1509 (ans = CollapseComponentsScale(component_index1,
1510 component_index2)) != -1)
1529 if (ss == NULL)
return -1;
1537 if (sd == NULL)
return -1;
1542 std::vector<int32> v;
1544 int32 node_index = v[0];
1554 int32 ans = SumDescriptorIsCollapsible(desc.
Part(0));
1557 int32 node_index = SumDescriptorIsCollapsible(desc.
Part(
i));
1558 if (node_index != ans)
1573 int32 node_to_replace,
1581 std::ostringstream expr_os;
1583 node_names[node_to_replace] = expr_os.str();
1584 std::ostringstream src_replaced_os;
1586 std::vector<std::string> tokens;
1593 tokens.push_back(
"end of input");
1594 const std::string *next_token = &(tokens[0]);
1649 int32 input_node_index = DescriptorIsCollapsible(descriptor);
1650 if (input_node_index == -1)
1657 int32 combined_component_index = CollapseComponents(input_component_index,
1659 if (combined_component_index == -1)
1675 descriptor = ReplaceNodeInDescriptor(descriptor,
1694 int32 component_index2) {
1702 if (dropout_component == NULL && general_dropout_component == NULL)
1706 if (dropout_component != NULL) {
1708 scale = 1.0 / (1.0 - dropout_proportion);
1717 return GetScaledComponentIndex(component_index2,
1734 int32 component_index2) {
1738 if (batchnorm_component == NULL)
1741 if (batchnorm_component->
Offset().Dim() == 0) {
1742 KALDI_ERR <<
"Expected batch-norm components to have test-mode set.";
1746 return GetDiagonallyPreModifiedComponentIndex(batchnorm_component->
Offset(),
1747 batchnorm_component->
Scale(),
1748 batchnorm_component_name,
1762 int32 component_index2) {
1770 *affine_component2 =
1771 dynamic_cast<const AffineComponent*>(
1773 if (affine_component2 == NULL ||
1774 (fixed_affine_component1 == NULL && affine_component1 == NULL))
1777 std::ostringstream new_component_name_os;
1780 std::string new_component_name = new_component_name_os.str();
1782 if (new_component_index >= 0)
1783 return new_component_index;
1787 if (fixed_affine_component1 != NULL) {
1788 if (fixed_affine_component1->
InputDim() >
1794 linear_params1 = &(fixed_affine_component1->
LinearParams());
1795 bias_params1 = &(fixed_affine_component1->
BiasParams());
1797 if (affine_component1->
InputDim() >
1804 bias_params1 = &(affine_component1->
BiasParams());
1808 output_dim1 = linear_params1->
NumRows(),
1809 input_dim2 = affine_component2->InputDim(),
1810 output_dim2 = affine_component2->OutputDim();
1816 int32 multiple = input_dim2 / output_dim1;
1819 multiple * input_dim1);
1820 for (
int32 i = 0;
i < multiple;
i++) {
1821 bias_params1_full.
Range(
i * output_dim1,
1822 output_dim1).CopyFromVec(*bias_params1);
1823 linear_params1_full.
Range(
i * output_dim1, output_dim1,
1824 i * input_dim1, input_dim1).CopyFromMat(
1830 int32 new_input_dim = multiple * input_dim1,
1831 new_output_dim = output_dim2;
1836 bias_params1_full, 1.0);
1838 linear_params1_full,
kNoTrans, 0.0);
1841 new_component->
Init(new_input_dim, new_output_dim, 0.0, 0.0);
1842 new_component->
SetParams(new_bias_params, new_linear_params);
1861 int32 component_index2) {
1869 if (affine_component1 == NULL ||
1870 fixed_scale_component2 == NULL ||
1872 fixed_scale_component2->
InputDim())
1875 std::ostringstream new_component_name_os;
1878 std::string new_component_name = new_component_name_os.str();
1880 if (new_component_index >= 0)
1881 return new_component_index;
1887 bias_params.MulElements(scales);
1888 linear_params.MulRowsVec(scales);
1892 new_affine_component->
SetParams(bias_params, linear_params);
1894 new_affine_component);
1937 const std::string &src_identifier,
1940 if (offset.
Max() == 0.0 && offset.
Min() == 0.0 &&
1941 scale.
Max() == 1.0 && scale.
Min() == 1.0)
1943 std::ostringstream new_component_name_os;
1944 new_component_name_os << src_identifier
1947 std::string new_component_name = new_component_name_os.str();
1949 if (new_component_index >= 0)
1950 return new_component_index;
1961 if (affine_component != NULL) {
1962 new_component = component->
Copy();
1965 PreMultiplyAffineParameters(offset, scale,
1968 }
else if (linear_component != NULL) {
1974 PreMultiplyAffineParameters(offset, scale,
1975 &(new_affine_component->BiasParams()),
1976 &(new_affine_component->LinearParams()));
1977 new_component = new_affine_component;
1978 }
else if (tdnn_component != NULL) {
1979 new_component = tdnn_component->
Copy();
1982 if (new_tdnn_component->
BiasParams().Dim() == 0) {
1987 PreMultiplyAffineParameters(offset, scale,
2012 transform_dim = offset.
Dim();
2014 offset.
Dim() == scale.
Dim() &&
2015 input_dim % transform_dim == 0);
2020 full_scale(input_dim);
2021 for (
int32 d = 0;
d < input_dim;
d += transform_dim) {
2022 full_offset.Range(
d, transform_dim).CopyFromVec(offset);
2023 full_scale.Range(
d, transform_dim).CopyFromVec(scale);
2054 std::ostringstream os;
2056 <<
".scale" << std::setprecision(3) << scale;
2057 std::string new_component_name = os.str();
2071 if (affine_component == NULL && conv_component == NULL &&
2072 linear_component == NULL && tdnn_component == NULL) {
2079 if (affine_component != NULL) {
2082 LinearParams().
Scale(scale);
2083 }
else if (conv_component != NULL) {
2085 ScaleLinearParams(scale);
2086 }
else if (linear_component != NULL) {
2110 std::vector<int32> *
2111 num_max_change_per_component_applied,
2112 int32 *num_max_change_global_applied) {
2118 int32 num_max_change_per_component_applied_per_minibatch = 0;
2120 std::string component_name_with_min_scale;
2129 KALDI_ERR <<
"Updatable component does not inherit from class " 2130 <<
"UpdatableComponent; change this code.";
2134 if (max_param_change_per_comp != 0.0 &&
2135 std::sqrt(dot_prod) * std::abs(scale) >
2136 max_param_change_per_comp * max_change_scale) {
2137 scale_factors(i) = max_param_change_per_comp * max_change_scale /
2138 (std::sqrt(dot_prod) * std::abs(scale));
2139 (*num_max_change_per_component_applied)[
i]++;
2140 num_max_change_per_component_applied_per_minibatch++;
2142 <<
" change too big: " << std::sqrt(dot_prod) <<
" * " 2143 << scale <<
" > " <<
"max-change * max-change-scale=" 2144 << max_param_change_per_comp <<
" * " << max_change_scale
2145 <<
", scaling by " << scale_factors(i);
2147 scale_factors(i) = 1.0;
2149 if (i == 0 || scale_factors(i) < min_scale) {
2150 min_scale = scale_factors(i);
2152 max_change_with_min_scale = max_param_change_per_comp;
2154 param_delta_squared += std::pow(scale_factors(i),
2155 static_cast<BaseFloat>(2.0)) * dot_prod;
2160 BaseFloat param_delta = std::sqrt(param_delta_squared);
2162 param_delta *= std::abs(scale);
2163 if (max_param_change != 0.0) {
2164 if (param_delta > max_param_change * max_change_scale) {
2165 if (param_delta - param_delta != 0.0) {
2166 KALDI_WARN <<
"Infinite parameter change, will not apply.";
2169 scale *= max_param_change * max_change_scale / param_delta;
2170 (*num_max_change_global_applied)++;
2174 if ((max_param_change != 0.0 &&
2175 param_delta > max_param_change * max_change_scale &&
2176 param_delta - param_delta == 0.0) || min_scale < 1.0) {
2177 std::ostringstream ostr;
2178 if (min_scale < 1.0)
2179 ostr <<
"Per-component max-change active on " 2180 << num_max_change_per_component_applied_per_minibatch
2181 <<
" / " << num_updatable <<
" Updatable Components." 2182 <<
" (Smallest factor=" << min_scale <<
" on " 2183 << component_name_with_min_scale
2184 <<
" with max-change=" << max_change_with_min_scale <<
"). ";
2185 if (param_delta > max_param_change * max_change_scale)
2186 ostr <<
"Global max-change factor was " 2187 << max_param_change * max_change_scale / param_delta
2188 <<
" with max-change=" << max_param_change <<
".";
2193 scale_factors.
Scale(scale);
2200 int32 num_n_values = -1;
2201 for (
size_t i = 0;
i < io_vec.size();
i++) {
2203 int32 this_num_n_values;
2204 const std::vector<Index> &index_vec = io.
indexes;
2206 "Empty input or output in ComputationRequest?");
2208 int32 lowest_n_value = std::numeric_limits<int32>::max(),
2209 highest_n_value = std::numeric_limits<int32>::min();
2210 std::vector<Index>::const_iterator
2211 iter = index_vec.begin(), end = index_vec.end();
2212 for (; iter != end; ++iter) {
2214 if (n < lowest_n_value) { lowest_n_value =
n; }
2215 if (n > highest_n_value) { highest_n_value =
n; }
2217 this_num_n_values = highest_n_value + 1 - lowest_n_value;
2221 this_num_n_values = index_vec.back().n + 1;
2223 if (num_n_values == -1) {
2224 num_n_values = this_num_n_values;
2226 if (num_n_values != this_num_n_values) {
2227 KALDI_ERR <<
"Different inputs/outputs of ComputationRequest have " 2228 "different numbers of n values: " << num_n_values
2229 <<
" vs. " << this_num_n_values;
2233 if (!exhaustive &&
RandInt(0, 100) == 0) {
2235 if (num_n_values != num_n_values_check) {
2236 KALDI_ERR <<
"Exhaustive and quick checks returned different " 2237 "answers: " << num_n_values <<
" vs. " 2238 << num_n_values_check;
2241 return num_n_values;
2247 if (l2_regularize_scale == 0.0)
2261 BaseFloat scale = -2.0 * l2_regularize_scale * lrate * l2_regularize;
2263 dest_component->
Add(scale, *src_component);
2275 delta_nnet, max_param_change, max_change_scale,
2292 KALDI_ERR <<
"Updatable component does not inherit from class " 2293 <<
"UpdatableComponent; change this code.";
2294 if (num_max_change_per_component_applied[i] > 0)
2296 <<
", per-component max-change was enforced " 2297 << ((100.0 * num_max_change_per_component_applied[
i]) /
2298 num_minibatches_processed)
2299 <<
" \% of the time.";
2303 if (num_max_change_global_applied > 0)
2304 KALDI_LOG <<
"The global max-change was enforced " 2305 << ((100.0 * num_max_change_global_applied) /
2306 num_minibatches_processed)
2307 <<
" \% of the time.";
void NnetToDirectedGraph(const Nnet &nnet, std::vector< std::vector< int32 > > *graph)
This function takes an nnet and turns it to a directed graph on nodes.
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
int32 InputDim(const std::string &input_name) const
void CollapseModel(const CollapseModelConfig &config, Nnet *nnet)
This function modifies the neural net for efficiency, in a way that suitable to be done in test time...
virtual int32 OutputDim() const
Returns output-dimension of this component.
const std::string & FirstToken() const
void ScaleNnet(BaseFloat scale, Nnet *nnet)
Scales the nnet parameters and stats by this scale.
const std::string WholeLine()
void EvaluateComputationRequest(const Nnet &nnet, const ComputationRequest &request, std::vector< std::vector< bool > > *is_computable)
Given an nnet and a computation request, this function works out which requested outputs in the compu...
void SetDropoutProportion(BaseFloat dropout_proportion, Nnet *nnet)
This function sets the dropout proportion in all dropout components to dropout_proportion value...
BaseFloat shrinkage_threshold_
Real Trace(bool check_square=true) const
Return the trace. If check_square = true, will crash if matrix is not square.
void SetDropoutProportion(BaseFloat dropout_proportion)
int32 AddComponent(const std::string &name, Component *component)
Adds a new component with the given name, which should not be the same as any existing component name...
virtual int32 InputDim() const
Returns input-dimension of this component.
void ReadConfig(std::istream &config_file)
void FindOrphanComponents(const Nnet &nnet, std::vector< int32 > *components)
This function finds a list of components that are never used, and outputs the integer comopnent index...
std::string component_name_b
std::string PrintVectorPerUpdatableComponent(const Nnet &nnet, const VectorBase< BaseFloat > &vec)
This function is for printing, to a string, a vector with one element per updatable component of the ...
int32 CollapseComponents(int32 component_index1, int32 component_index2)
This function tries to collapse two successive components, where the component 'component_index1' app...
const std::string & GetNodeName(int32 node_index) const
returns individual node name.
void SetTestMode(bool test_mode)
BaseFloat OrthonormalConstraint() const
int32 num_max_change_global_applied
void ComponentDotProducts(const Nnet &nnet1, const Nnet &nnet2, VectorBase< BaseFloat > *dot_prod)
Returns dot products between two networks of the same structure (calls the DotProduct functions of th...
void SetTestMode(bool test_mode)
bool Parse(const std::vector< std::string > &node_names, const std::string **next_token)
const CuVector< BaseFloat > & BiasParams() const
int32 num_minibatches_processed
Abstract base-class for neural-net components.
int32 GetDiagonallyPreModifiedComponentIndex(const CuVectorBase< BaseFloat > &offset, const CuVectorBase< BaseFloat > &scale, const std::string &src_identifier, int32 component_index)
This function finds, or creates, a component which is like 'component_index' but is combined with a d...
virtual int32 InputDim() const
Returns input-dimension of this component.
int32 GetVerboseLevel()
Get verbosity level, usually set via command line '–verbose=' switch.
virtual Component * Copy() const =0
Copies component (deep copy).
int32 DescriptorIsCollapsible(const Descriptor &desc)
void FindOrphanNodes(const Nnet &nnet, std::vector< int32 > *nodes)
This function finds a list of nodes that are never used to compute any output, and outputs the intege...
int32 CollapseComponentsDropout(int32 component_index1, int32 component_index2)
Tries to produce a component that's equivalent to running the component 'component_index2' with input...
void GetComputableInfo(std::vector< std::vector< bool > > *computable) const
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
TdnnComponent is a more memory-efficient alternative to manually splicing several frames of input and...
SimpleForwardingDescriptor is the base-case of ForwardingDescriptor, consisting of a source node in t...
BaseFloat energy_threshold_
void SetDropoutProportion(BaseFloat p)
void SetUpdatableConfigs(const UpdatableComponent &other)
bool OptimizeNode(int32 node_index)
This function modifies the neural network in the case where 'node_index' is a component-input node wh...
ModelCollapser(const CollapseModelConfig &config, Nnet *nnet)
void Compute(const NnetExample &eg)
virtual int32 NumParameters() const
The following new virtual function returns the total dimension of the parameters in this class...
bool DescriptorTokenize(const std::string &input, std::vector< std::string > *tokens)
This function tokenizes input when parsing Descriptor configuration values.
void ReduceRankOfComponents(const std::string component_name_pattern, int32 rank, Nnet *nnet)
CuSubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
virtual Component * Copy() const
Copies component (deep copy).
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
void ScaleBatchnormStats(BaseFloat batchnorm_stats_scale, Nnet *nnet)
This function scales the batchorm stats of any batchnorm components (components of type BatchNormComp...
bool IsInputNode(int32 node) const
Returns true if this is an output node, meaning that it is of type kInput.
void SetBatchnormTestMode(bool test_mode, Nnet *nnet)
This function affects only components of type BatchNormComponent.
std::vector< int32 > num_max_change_per_component_applied
void AddToDiag(Real value)
Adds "value" to the diagonal elements of the matrix.
void AddMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A
std::vector< IoSpecification > inputs
This class is for computing cross-entropy and accuracy values in a neural network, for diagnostics.
void SetComponent(int32 c, Component *component)
Replace the component indexed by c with a new component.
std::vector< Index > indexes
"indexes" is a vector the same length as features.NumRows(), explaining the meaning of each row of th...
This class represents a matrix that's stored on the GPU if we have one, and in memory if not...
const CuMatrix< BaseFloat > & LinearParams() const
bool store_component_stats
bool IsComponentNode(int32 node) const
Returns true if this is a component node, meaning that it is of type kComponent.
std::string component_name_pattern_
void RemoveOrphanComponents()
ObjectiveType objective_type
virtual void GetNodeDependencies(std::vector< int32 > *node_indexes) const
This function appends to "node_indexes" all the node indexes.
bool NameMatchesPattern(const char *name, const char *pattern)
CompositeComponent is a component representing a sequence of [simple] components. ...
virtual void Vectorize(VectorBase< BaseFloat > *params) const
Turns the parameters into vector form.
void ReadEditConfig(std::istream &edit_config_is, Nnet *nnet)
ReadEditConfig() reads a file with a similar-looking format to the config file read by Nnet::ReadConf...
std::string component_name
void SetNodeName(int32 node_index, const std::string &new_name)
This can be used to modify invidual node names.
int32 OutputDim(const std::string &output_name) const
This file contains declarations of components that in one way or another normalize their input: Norma...
bool PrintTotalStats() const
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
void VectorizeNnet(const Nnet &src, VectorBase< BaseFloat > *parameters)
Copies the nnet parameters to *params, whose dimension must be equal to NumParameters(src).
FixedScaleComponent applies a fixed per-element scale; it's similar to the Rescale component in the n...
This file contains some miscellaneous functions dealing with class Nnet.
void ConvertRepeatedToBlockAffine(CompositeComponent *c_component)
void SetNnetAsGradient(Nnet *nnet)
Sets nnet as gradient by Setting is_gradient_ to true and learning_rate_ to 1 for each UpdatableCompo...
This file contains declarations of components that are "simple", meaning they don't care about the in...
void ComputeGraphTranspose(const std::vector< std::vector< int32 > > &graph, std::vector< std::vector< int32 > > *graph_transpose)
Outputs a graph in which the order of arcs is reversed.
std::string Info() const
returns some human-readable information about the network, mostly for debugging purposes.
int32 Modulus() const
[Relevant for clockwork RNNs and similar].
void ConstrainOrthonormal(Nnet *nnet)
This function, to be called after processing every minibatch, is responsible for enforcing the orthog...
const CollapseModelConfig & config_
void Init(int32 input_dim, int32 output_dim, BaseFloat param_stddev, BaseFloat bias_stddev)
BaseFloat DropoutProportion() const
int32 CollapseComponentsBatchnorm(int32 component_index1, int32 component_index2)
Tries to produce a component that's equivalent to running the component 'component_index2' with input...
void AddVec2(const Real alpha, const VectorBase< Real > &v)
Add vector : *this = *this + alpha * rv^2 [element-wise squaring].
void UnVectorizeNnet(const VectorBase< BaseFloat > ¶meters, Nnet *dest)
Copies the parameters from params to *dest.
void FreezeNaturalGradient(bool freeze, Nnet *nnet)
Controls if natural gradient will be updated.
void SetDropoutTestMode(bool test_mode, Nnet *nnet)
This function affects components of child-classes of RandomComponent.
int32 GetNumNvalues(const std::vector< NnetIo > &io_vec, bool exhaustive)
This utility function can be used to obtain the number of distinct 'n' values in a training example...
This is an abstract base-class.
std::string UnusedValues() const
returns e.g.
void ResetGenerators(Nnet *nnet)
This function calls 'ResetGenerator()' on all components in 'nnet' that inherit from class RandomComp...
bool GraphHasCycles(const std::vector< std::vector< int32 > > &graph)
This function returns 'true' if the graph represented in 'graph' contains cycles (including cycles wh...
void ConstrainOrthonormalInternal(BaseFloat scale, CuMatrixBase< BaseFloat > *M)
const NetworkNode & GetNode(int32 node) const
returns const reference to a particular numbered network node.
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
int32 NumParameters(const Nnet &src)
Returns the total of the number of parameters in the updatable components of the nnet.
void AddMatVec(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType trans, const CuVectorBase< Real > &v, const Real beta)
void ParseConfigLines(const std::vector< std::string > &lines, std::vector< ConfigLine > *config_lines)
CuMatrixBase< BaseFloat > & LinearParams()
virtual void FreezeNaturalGradient(bool freeze)
freezes/unfreezes NaturalGradient updates, if applicable (to be overriden by components that use Natu...
void AddNnetComponents(const Nnet &src, const Vector< BaseFloat > &alphas, BaseFloat scale, Nnet *dest)
Does *dest += alpha * src for updatable components (affects nnet parameters), and *dest += scale * sr...
virtual int32 InputDim() const
Returns input-dimension of this component.
const ForwardingDescriptor & Src() const
BaseFloat MaxChange() const
Returns the per-component max-change value, which is interpreted as the maximum change (in l2 norm) i...
bool IsOutputNode(int32 node) const
Returns true if this is an output node, meaning that it is of type kDescriptor and is not directly fo...
void ApplyL2Regularization(const Nnet &nnet, BaseFloat l2_regularize_scale, Nnet *delta_nnet)
This function is used as part of the regular training workflow, prior to UpdateNnetWithMaxChange().
BaseFloat OrthonormalConstraint() const
virtual void ZeroStats()
Components that provide an implementation of StoreStats should also provide an implementation of Zero...
void ComputeSimpleNnetContext(const Nnet &nnet, int32 *left_context, int32 *right_context)
ComputeSimpleNnetContext computes the left-context and right-context of a nnet.
BaseFloat L2Regularization() const
Returns the l2 regularization constant, which may be set in any updatable component (usually from the...
virtual BaseFloat DotProduct(const UpdatableComponent &other) const =0
Computes dot-product between parameters of two instances of a Component.
virtual int32 Properties() const =0
Return bitmask of the component's properties.
void Swap(Matrix< Real > *mat)
CuVector< BaseFloat > & BiasParams()
int32 CollapseComponentsScale(int32 component_index1, int32 component_index2)
Tries to produce a component that's equivalent to running the component 'component_index2' with input...
static void PreMultiplyAffineParameters(const CuVectorBase< BaseFloat > &offset, const CuVectorBase< BaseFloat > &scale, CuVectorBase< BaseFloat > *bias_params, CuMatrixBase< BaseFloat > *linear_params)
This helper function, used GetDiagonallyPreModifiedComponentIndex, modifies the linear and bias param...
void RecomputeStats(const std::vector< NnetChainExample > &egs, const chain::ChainTrainingOptions &chain_config_in, const fst::StdVectorFst &den_fst, Nnet *nnet)
This function zeros the stored component-level stats in the nnet using ZeroComponentStats(), then recomputes them with the supplied egs.
virtual Component * Copy() const
Copies component (deep copy).
virtual int32 OutputDim() const
Returns output-dimension of this component.
CuMatrixBase< BaseFloat > & Params()
const SumDescriptor & Part(int32 n) const
returns the n'th part.
void RemoveOrphanNodes(bool remove_orphan_inputs=false)
void SymAddMat2(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transA, Real beta)
*this = beta * *this + alpha * M M^T, for symmetric matrices.
int32 GetComponentIndex(const std::string &node_name) const
returns index associated with this component name, or -1 if no such index.
SvdApplier(const std::string component_name_pattern, int32 bottleneck_dim, BaseFloat energy_threshold, BaseFloat shrinkage_threshold, Nnet *nnet)
void SetLearningRate(BaseFloat learning_rate, Nnet *nnet)
Sets the underlying learning rate for all the components in the nnet to this value.
bool HasBatchnorm(const Nnet &nnet)
Returns true if nnet has at least one component of type BatchNormComponent.
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
This is the normal base-case of SumDescriptor which just wraps a ForwardingDescriptor.
static bool ComputeSimpleNnetContextForShift(const Nnet &nnet, int32 input_start, int32 window_size, int32 *left_context, int32 *right_context)
void ZeroComponentStats(Nnet *nnet)
Zeroes the component stats in all nonlinear components in the nnet.
void Compute(const ComputationRequest &request)
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
const CuMatrix< BaseFloat > & LinearParams() const
const ForwardingDescriptor & Src() const
int32 GetScaledComponentIndex(int32 component_index, BaseFloat scale)
Given a component 'component_index', returns a component which will give the same output as the curre...
std::string NnetInfo(const Nnet &nnet)
This function returns various info about the neural net.
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
const std::string & GetComponentName(int32 component_index) const
returns individual component name.
Real * Data()
Returns a pointer to the start of the vector's data.
A ForwardingDescriptor describes how we copy data from another NetworkNode, or from multiple other Ne...
void ReadConfigLines(std::istream &is, std::vector< std::string > *lines)
This function reads in a config file and *appends* its contents to a vector of lines; it is responsib...
BaseFloat DotProduct(const Nnet &nnet1, const Nnet &nnet2)
Returns dot product between two networks of the same structure (calls the DotProduct functions of the...
MatrixIndexT Dim() const
Returns the dimension of the vector.
std::string component_name_a
void Scale(Real alpha)
Multiplies all elements by this constant.
NetworkNode is used to represent, three types of thing: either an input of the network (which pretty ...
Component * GetComponent(int32 c)
Return component indexed c. Not a copy; not owned by caller.
virtual void SetAsGradient()
Sets is_gradient_ to true and sets learning_rate_ to 1, ignoring learning_rate_factor_.
This file contains a few functions that treat the neural net as a graph on nodes: e...
int32 NumComponents() const
Class UpdatableComponent is a Component which has trainable parameters; it extends the interface of C...
const Component * GetComponent(int32 i) const
Gets the ith component in this component.
void MulColsVec(const VectorBase< Real > &scale)
Equivalent to (*this) = (*this) * diag(scale).
Real Max() const
Returns the maximum value of any element, or -infinity for the empty vector.
void SetDropoutProportion(BaseFloat p)
virtual void SetLearningRateFactor(BaseFloat lrate_factor)
void MulColsVec(const CuVectorBase< Real > &scale)
scale i'th column by scale[i]
const CuVector< BaseFloat > & Offset() const
virtual std::string Type() const =0
Returns a string such as "SigmoidComponent", describing the type of the object.
virtual int32 OutputDim() const
Returns output-dimension of this component.
GeneralDropoutComponent implements dropout, including a continuous variant where the thing we multipl...
CuSubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
virtual void PerturbParams(BaseFloat stddev)=0
This function is to be used in testing.
Matrix for CUDA computing.
std::vector< Index > indexes
void RemoveSomeNodes(const std::vector< int32 > &nodes_to_remove)
MatrixIndexT NumCols() const
bool IsSimpleNnet(const Nnet &nnet)
This function returns true if the nnet has the following properties: It has an output called "output"...
Offsets in 't' and 'x' values of other ForwardingDescriptors.
void WriteConfig(std::ostream &os, const std::vector< std::string > &node_names) const
int32 NumComponents() const
void ConsolidateMemory(Nnet *nnet)
This just calls ConsolidateMemory() on all the components of the nnet.
virtual void UnVectorize(const VectorBase< BaseFloat > ¶ms)
Converts the parameters from vector form.
A class representing a vector.
This class is responsible for parsing input like hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c' baz="a b c d='a b' e" and giving you access to the fields, in this case.
#define KALDI_ASSERT(cond)
std::vector< IoSpecification > outputs
void SetRequireDirectInput(bool b, Nnet *nnet)
Calls the corresponding function in any component of type StatisticsPoolingComponent; used as a way t...
virtual void ConsolidateMemory()
This virtual function relates to memory management, and avoiding fragmentation.
const CuVector< BaseFloat > & BiasParams() const
virtual void SetParams(const CuVectorBase< BaseFloat > &bias, const CuMatrixBase< BaseFloat > &linear)
Descriptor ReplaceNodeInDescriptor(const Descriptor &src, int32 node_to_replace, const Descriptor &expr)
int32 NumInputNodes(const Nnet &nnet)
returns the number of input nodes of this nnet.
BaseFloat OrthonormalConstraint() const
std::vector< ModifiedComponentInfo > modified_component_info_
void PerturbParams(BaseFloat stddev, Nnet *nnet)
Calls PerturbParams (with the given stddev) on all updatable components of the nnet.
bool HasUnusedValues() const
bool GetValue(const std::string &key, std::string *value)
void SetComponent(int32 i, Component *component)
Sets the ith component.
Real FrobeniusNorm() const
void Print(const Nnet &nnet) const
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
int32 NumOutputNodes(const Nnet &nnet)
returns the number of output nodes of this nnet.
virtual int32 OutputDim() const
Returns output-dimension of this component.
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
virtual void Scale(BaseFloat scale)
This virtual function when called on – an UpdatableComponent scales the parameters by "scale" when c...
void Swap(CuVector< Real > *vec)
int32 CollapseComponentsAffine(int32 component_index1, int32 component_index2)
Tries to produce a component that's equivalent to running the component 'component_index2' with input...
union kaldi::nnet3::NetworkNode::@15 u
void Print(std::ostream &os, const std::vector< std::string > &node_names)
This function, useful for debugging/visualization purposes, prints out a summary of the computation g...
int32 GetNodeIndex(const std::string &node_name) const
returns index associated with this node name, or -1 if no such index.
int32 NumParts() const
Returns the number of parts that are concatenated over.
MatrixIndexT NumRows() const
Dimensions.
Provides a vector abstraction class.
bool NnetIsRecurrent(const Nnet &nnet)
Returns true if 'nnet' has some kind of recurrency.
virtual void SetUnderlyingLearningRate(BaseFloat lrate)
Sets the learning rate of gradient descent- gets multiplied by learning_rate_factor_.
bool DecomposeComponent(const std::string &component_name, const AffineComponent &affine, Component **component_a_out, Component **component_b_out)
virtual void Add(BaseFloat alpha, const Component &other)
This virtual function when called by – an UpdatableComponent adds the parameters of another updatabl...
int32 SumDescriptorIsCollapsible(const SumDescriptor &sum_desc)
TimeHeightConvolutionComponent implements 2-dimensional convolution where one of the dimensions of co...
BaseFloat LearningRate() const
Gets the learning rate to be used in gradient descent.
std::vector< int32 > modification_index_
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
This file contains declarations of components that are not "simple", meaning they care about the inde...
The first step in compilation is to turn the ComputationSpecification into a ComputationGraph, where for each Cindex we have a list of other Cindexes that it depends on.
bool UpdateNnetWithMaxChange(const Nnet &delta_nnet, BaseFloat max_param_change, BaseFloat max_change_scale, BaseFloat scale, Nnet *nnet, std::vector< int32 > *num_max_change_per_component_applied, int32 *num_max_change_global_applied)
This function does the operation '*nnet += scale * delta_nnet', while respecting any max-parameter-ch...
int32 NumUpdatableComponents(const Nnet &dest)
Returns the number of updatable components in the nnet.
const CuVector< BaseFloat > & Scales() const
const std::vector< std::string > & GetNodeNames() const
returns vector of node names (needed by some parsing code, for instance).
Real Min() const
Returns the minimum value of any element, or +infinity for the empty vector.
void AddNnet(const Nnet &src, BaseFloat alpha, Nnet *dest)
Does *dest += alpha * src (affects nnet parameters and stored stats).
An abstract representation of a set of Cindexes.
void SortSvd(VectorBase< Real > *s, MatrixBase< Real > *U, MatrixBase< Real > *Vt, bool sort_on_absolute_value)
Function to ensure that SVD is sorted.
void DecomposeComponents()
MatrixIndexT Dim() const
Dimensions.
int32 RandInt(int32 min_val, int32 max_val, struct RandomState *state)
Vector for CUDA computing.
bool IsComponentInputNode(int32 node) const
Returns true if this is component-input node, i.e.
Config class for the CollapseModel function.
FixedAffineComponent is an affine transform that is supplied at network initialization time and is no...
This class implements an affine transform using a block diagonal matrix e.g., one whose weight matrix...
int32 GetReducedDimension(const Vector< BaseFloat > &input_vector, int32 lower, int32 upper, BaseFloat min_val)