44 if ((*sum_group_component =
45 dynamic_cast<SumGroupComponent*>(component)) == NULL) {
46 KALDI_LOG <<
"Adding SumGroupComponent to neural net.";
49 std::vector<int32> sizes(dim, 1);
52 (*sum_group_component)->Init(sizes);
53 nnet->
Append(*sum_group_component);
57 if ((*softmax_component = dynamic_cast<SoftmaxComponent*>(component)) == NULL)
58 KALDI_ERR <<
"Neural net has wrong topology: expected second-to-last " 59 <<
"component to be SoftmaxComponent, type is " 62 if ((*affine_component = dynamic_cast<AffineComponent*>(component)) == NULL)
63 KALDI_ERR <<
"Neural net has wrong topology: expected third-to-last " 64 <<
"component to be AffineComponent, type is " 94 &sum_group_component);
101 sum_group_component);
114 std::vector<int32> old_sizes;
118 for (
size_t i = 0;
i < old_sizes.size();
i++) {
119 int32 this_input_dim = old_sizes[
i];
121 for (
int32 d = 0;
d < this_input_dim;
d++, old_dim++)
125 counts(
i) = this_tot_count;
128 KALDI_ASSERT(counts.Sum() > 0 &&
"Cannot do mixing up without counts.");
130 std::vector<int32> targets;
136 std::vector<int32> new_sizes(old_sizes.size());
137 for (
size_t i = 0;
i < targets.size();
i++)
138 new_sizes[
i] = std::max(targets[
i], old_sizes[i]);
139 int32 new_dim = std::accumulate(new_sizes.begin(), new_sizes.end(),
140 static_cast<int32>(0)),
156 int32 old_offset = 0, new_offset = 0;
158 for (
size_t i = 0; i < old_sizes.size(); i++) {
159 int32 this_old_dim = old_sizes[
i],
160 this_new_dim = new_sizes[
i],
161 this_cur_dim = this_old_dim;
164 old_offset, this_old_dim,
165 0, affine_input_dim),
166 this_new_linear_term(new_linear_term,
167 new_offset, this_new_dim,
168 0, affine_input_dim);
170 old_offset, this_old_dim),
171 this_new_bias_term(new_bias_term, new_offset, this_new_dim),
172 this_old_counts(old_counts,
173 old_offset, this_old_dim),
174 this_new_counts(new_counts,
175 new_offset, this_new_dim);
178 this_new_linear_term.
Range(0, this_old_dim, 0, affine_input_dim).
179 CopyFromMat(this_old_linear_term);
180 this_new_bias_term.Range(0, this_old_dim).
181 CopyFromVec(this_old_bias_term);
182 this_new_counts.
Range(0, this_old_dim).
183 CopyFromVec(this_old_counts);
186 for (; this_cur_dim < this_new_dim; this_cur_dim++) {
188 *count_end = count_begin + this_cur_dim,
189 *count_max = std::max_element(count_begin, count_end);
192 *count_end = *count_max;
193 int32 max_index =
static_cast<int32>(count_max - count_begin),
194 new_index = this_cur_dim;
196 new_vec(this_new_linear_term, new_index);
200 cur_vec.AddVec(perturb_stddev, rand);
201 new_vec.
AddVec(-perturb_stddev, rand);
202 this_new_bias_term(max_index) +=
Log(0.5);
203 this_new_bias_term(new_index) = this_new_bias_term(max_index);
205 old_offset += this_old_dim;
206 new_offset += this_new_dim;
208 KALDI_ASSERT(old_offset == old_dim && new_offset == new_dim);
209 ac->
SetParams(new_bias_term, new_linear_term);
213 this->
dim_ = new_dim;
214 KALDI_LOG <<
"Mixed up from dimension of " << old_dim <<
" to " << new_dim
215 <<
" in the softmax layer.";
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
const Component & GetComponent(int32 c) const
virtual void SetParams(const VectorBase< BaseFloat > &bias, const MatrixBase< BaseFloat > &linear)
CuVector< BaseFloat > bias_params_
Abstract class, basic element of the network, it is a box with defined inputs, outputs, and tranformation functions interface.
virtual int32 OutputDim() const =0
Get size of output vectors.
int32 NumComponents() const
Returns number of components– think of this as similar to # of layers, but e.g.
void GetSizes(std::vector< int32 > *sizes) const
void GetSplitTargets(const Vector< BaseFloat > &state_occs, int32 target_components, BaseFloat power, BaseFloat min_count, std::vector< int32 > *targets)
Get Gaussian-mixture or substate-mixture splitting targets, according to a power rule (e...
void Append(Component *new_component)
Appends this component to the components already in the neural net.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void CopyFromVec(const CuVectorBase< Real > &src)
Copy functions; these will crash if the dimension do not match.
virtual int32 InputDim() const
Get size of input vectors.
static void GiveNnetCorrectTopology(Nnet *nnet, AffineComponent **affine_component, SoftmaxComponent **softmax_component, SumGroupComponent **sum_group_component)
This function makes sure the neural net ends with a SumGroupComponent.
CuVector< double > value_sum_
void Resize(MatrixIndexT dim, MatrixResizeType t=kSetZero)
Allocate the memory.
Real * Data()
Returns a pointer to the start of the vector's data.
MatrixIndexT Dim() const
Returns the dimension of the vector.
void SetRandn()
Set vector to random normally-distributed noise.
A class representing a vector.
#define KALDI_ASSERT(cond)
virtual std::string Type() const =0
SubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Return a sub-part of matrix.
void Init(const std::vector< int32 > &sizes)
CuMatrix< BaseFloat > linear_params_
void AddVec(const Real alpha, const VectorBase< OtherReal > &v)
Add vector : *this = *this + alpha * rv (with casting between floats and doubles) ...
Sub-matrix representation.
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
void MixUp(int32 num_mixtures, BaseFloat power, BaseFloat min_count, BaseFloat perturb_stddev, AffineComponent *ac, SumGroupComponent *sc)
Allocate mixtures to states via a power rule, and add any new mixtures.
MatrixIndexT Dim() const
Dimensions.
void MixupNnet(const NnetMixupConfig &mixup_config, Nnet *nnet)
This function works as follows.
SubVector< Real > Range(const MatrixIndexT o, const MatrixIndexT l)
Returns a sub-vector of a vector (a range of elements).