44       horizontal_flip_prob(0.0),
    45       horizontal_shift(0.0),
    49       fill_mode_string(
"nearest") { }
    53     po->
Register(
"num-channels", &num_channels, 
"Number of colors in the image."    54                  "It is important to specify this (helps interpret the image "    56     po->
Register(
"horizontal-flip-prob", &horizontal_flip_prob,
    57                  "Probability of doing horizontal flip");
    58     po->
Register(
"horizontal-shift", &horizontal_shift,
    59                  "Maximum allowed horizontal shift as proportion of image "    60                  "width.  Padding is with closest pixel.");
    61     po->
Register(
"vertical-shift", &vertical_shift,
    62                  "Maximum allowed vertical shift as proportion of image "    63                  "height.  Padding is with closest pixel.");
    64     po->
Register(
"rotation-degree", &rotation_degree,
    65                  "Maximum allowed degree to rotate the image");
    66     po->
Register(
"rotation-prob", &rotation_prob,
    67                  "Probability of doing rotation");
    68     po->
Register(
"fill-mode", &fill_mode_string, 
"Mode for dealing with "    69                  "points outside the image boundary when applying transformation. "    70                  "Choices = {nearest, reflect}");
    76                  horizontal_flip_prob <= 1);
    77     KALDI_ASSERT(horizontal_shift >= 0 && horizontal_shift <= 1);
    78     KALDI_ASSERT(vertical_shift >= 0 && vertical_shift <= 1);
    79     KALDI_ASSERT(rotation_degree >=0 && rotation_degree <= 180);
    81     KALDI_ASSERT(fill_mode_string == 
"nearest" || fill_mode_string == 
"reflect");
    86     if (fill_mode_string == 
"reflect") {
    89       if (fill_mode_string != 
"nearest") {
    90         KALDI_ERR << 
"Choices for --fill-mode are 'nearest' or 'reflect', got: "   120   for (
int32 r = 0; r < width; r++) {
   121     for (
int32 c = 0; c < height; c++) {
   125                                           transform(0, 1) * c + transform(0, 2);
   127                                           transform(1, 1) * c + transform(1, 2);
   139       BaseFloat weight_11 = (r2 - r_old) * (c2 - c_old),
   140           weight_12 = (r2 - r_old) * (c_old - c1),
   141           weight_21 = (r_old - r1) * (c2 - c_old),
   142           weight_22 = (r_old - r1) * (c_old - c1);
   151           if (r1 >= width) r1 = width - 1;
   159           if (c1 >= height) c1 = height - 1;
   165           if (r2 < 0) r2 = - r2;
   168           r2 = 2 * width - 2 - r2;
   169           if (r1 >= width) r1 = 2 * width - 2 - r1;
   173           if (c2 < 0) c2 = -c2;
   176           c2 = 2 * height - 2 - c2;
   177           if (c1 >= height) c1 = 2 * height - 2 - c1;
   182         BaseFloat p11 = original_image(r1, num_channels * c1 + ch),
   183             p12 = original_image(r1, num_channels * c2 + ch),
   184             p21 = original_image(r2, num_channels * c1 + ch),
   185             p22 = original_image(r2, num_channels * c2 + ch);
   186         (*image)(r, num_channels * c + ch) = weight_11 * p11 + weight_12 * p12 +
   187             weight_21 * p21 + weight_22 * p22;
   213     KALDI_ERR << 
"Number of columns in image must divide the number "   231   shift_mat(0, 2) = round(horizontal_shift);
   232   shift_mat(1, 2) = round(vertical_shift);
   236     shift_mat(0, 0) = -1.0;
   247     rotation_mat(0, 0) = cos(theta);
   248     rotation_mat(0, 1) = -sin(theta);
   249     rotation_mat(1, 0) = sin(theta);
   250     rotation_mat(1, 1) = cos(theta);
   273   if (transform_mat.
IsUnit())  
   282   set_origin_mat(0, 2) = image_width / 2.0 - 0.5;
   283   set_origin_mat(1, 2) = image_height / 2.0 - 0.5;
   286   reset_origin_mat(0, 2) = -image_width / 2.0 + 0.5;
   287   reset_origin_mat(1, 2) = -image_height / 2.0 + 0.5;
   306   bool found_input = 
false;
   307   for (
int32 i = 0; 
i < io_size; 
i++) {
   309     if (io.
name == 
"input") {
   324     KALDI_ERR << 
"Nnet example to perturb had no NnetIo object named 'input'";
   331 int main(
int argc, 
char *argv[]) {
   333     using namespace kaldi;
   336     typedef kaldi::int64 int64;
   339         "Copy examples (single frames or fixed-size groups of frames) for neural\n"   340         "network training, doing image augmentation inline (copies after possibly\n"   341         "modifying of each image, randomly chosen according to configuration\n"   344         "  nnet3-egs-augment-image --horizontal-flip-prob=0.5 --horizontal-shift=0.1\\\n"   345         "       --vertical-shift=0.1 --srand=103 --num-channels=3 --fill-mode=nearest ark:- ark:-\n"   347         "Requires that each eg contain a NnetIo object 'input', with successive\n"   348         "'t' values representing different x offsets , and the feature dimension\n"   349         "representing the y offset and the channel (color), with the channel\n"   350         "varying the fastest.\n"   351         "See also: nnet3-copy-egs\n";
   354     int32 srand_seed = 0;
   359     po.
Register(
"srand", &srand_seed, 
"Seed for the random number generator");
   373     std::string examples_rspecifier = po.
GetArg(1),
   374         examples_wspecifier = po.
GetArg(2);
   381     for (; !example_reader.
Done(); example_reader.
Next(), num_done++) {
   382       std::string key = example_reader.
Key();
   385       example_writer.
Write(key, eg);
   387     KALDI_LOG << 
"Perturbed " << num_done << 
" neural-network training images.";
   388     return (num_done == 0 ? 1 : 0);
   389   } 
catch(
const std::exception &e) {
   390     std::cerr << e.what() << 
'\n';
 NnetExample is the input data and corresponding label (or labels) for one or more frames of input...
 
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
 
FillMode GetFillMode() const
 
float RandUniform(struct RandomState *state=NULL)
Returns a random number strictly between 0 and 1. 
 
void GetMatrix(Matrix< BaseFloat > *mat) const
Outputs the contents as a matrix. 
 
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix). 
 
int main(int argc, char *argv[])
 
Base class which provides matrix operations not involving resizing or allocation. ...
 
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor]. 
 
BaseFloat rotation_degree
 
bool WithProb(BaseFloat prob, struct RandomState *state)
 
A templated class for writing objects to an archive or script file; see The Table concept...
 
GeneralMatrix features
The features or labels. 
 
BaseFloat horizontal_flip_prob
 
void SetUnit()
Sets to zero, except ones along diagonal [for non-square matrices too]. 
 
void Write(const std::string &key, const T &value) const
 
void Register(const std::string &name, bool *ptr, const std::string &doc)
 
void PerturbImageInNnetExample(const ImageAugmentationConfig &config, NnetExample *eg)
This function does image perturbation as directed by 'config' The example 'eg' is expected to contain...
 
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
 
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
 
void AddMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
 
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables. 
 
void PerturbImage(const ImageAugmentationConfig &config, MatrixBase< BaseFloat > *image)
This function randomly modifies (perturbs) the image by applying different geometric transformations ...
 
void AddMatMatMat(const Real alpha, const MatrixBase< Real > &A, MatrixTransposeType transA, const MatrixBase< Real > &B, MatrixTransposeType transB, const MatrixBase< Real > &C, MatrixTransposeType transC, const Real beta)
this <– beta*this + alpha*A*B*C. 
 
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility. 
 
void Register(ParseOptions *po)
 
int NumArgs() const
Number of positional parameters (c.f. argc-1). 
 
void ApplyAffineTransform(MatrixBase< BaseFloat > &transform, int32 num_channels, MatrixBase< BaseFloat > *image, FillMode fill_mode)
This function applies a geometric transformation 'transform' to the image. 
 
#define KALDI_ASSERT(cond)
 
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix). 
 
std::string name
the name of the input in the neural net; in simple setups it will just be "input". 
 
BaseFloat horizontal_shift
 
std::string fill_mode_string
 
ImageAugmentationConfig()
 
std::vector< NnetIo > io
"io" contains the input and output. 
 
bool IsUnit(Real cutoff=1.0e-05) const
Returns true if the matrix is all zeros, except for ones on diagonal.