Libraries

library(R6)
library(cgraph)

Xavier Initialization

  xavier_init <- function(n.rows, n.cols)
  {
    matrix(runif(
        n.rows * n.cols,
        min = -sqrt(6 / (n.rows + n.cols)),
        max = sqrt(6 / (n.rows + n.cols))
      ), n.rows, n.cols
    )
  }

R6 Class

  ae <- R6Class(
    classname = "ae",
    inherit = cgraph,
    public = list(
      error = c(),
      initialize = function(x, n.hidden = 1, ...)
      {
        super$initialize()
        
        x <- as.matrix(x)
        
        input <- input(name = "input")
        
        w1 <- parm(xavier_init(n.hidden, ncol(x)), name = "w1")
        w2 <- parm(xavier_init(ncol(x), n.hidden), name = "w2")
        
        b1 <- parm(matrix(0, n.hidden), name = "b1")
        b2 <- parm(matrix(0, ncol(x)), name = "b2")
        
        hidden <- cg_sigmoid(cg_linear(w1, input, b1), name = "hidden")
        
        output <- cg_sigmoid(cg_linear(w2, hidden, b2), name = "output")
        
        cg_mean(cg_colSums((output - input) ^ const(2)), name = "loss")
        
        self$fit(x, ...)
      },
      fit = function(x, batch.size = 1, eta = 0.05, n.epochs = 1)
      {
        parms <- c("w1", "w2", "b1", "b2")

        x <- as.matrix(x)
        
        for(i in 1:n.epochs)
        {
          loss <- c()

          batches <- split(sample(nrow(x)),
            rep(1:ceiling(nrow(x) / batch.size), length.out = nrow(x))
          )
          
          for(batch in batches)
          {
            values <- self$run("loss", list(
              input = t(x[batch,, drop = F])
            ))

            grads <- self$gradients("loss", values)
            
            for(parm in parms)
            {
              self$values[[parm]] <- self$values[[parm]] - eta * grads[[parm]]
            }

            loss <- c(loss, values$loss)
          }

          self$error <- c(self$error, mean(loss))
        }
      },
      predict = function(x)
      {
        x <- as.matrix(x)
        
        values <- self$run("output", list(
          input = t(x)
        ))
        
       t(values$output)
      }
    )
  )

Example

Let us apply the autoencoder to encode a set of images. First, we load the icons dataset.

  icons <- read.csv("data/icons.csv")

The icons dataset consists of 1800 grayscale images (20 by 20 pixels) displaying various web icons. You can download the icons dataset in the downloads section below. Accordingly, the data frame is converted to a matrix.

  icons <- as.matrix(icons)

Here, icons is a 1800 by 400 matrix of pixel values. The rows of the matrix are the images and columns are the individual pixel values of the images. Each pixel takes values between 0 (black) and 1 (white). The first six images look as follows:

  par(mfrow = c(2,3))
  
  col = gray.colors(32, 0, 1)
  
  for(i in 1:6)
  {
    icon <- t(matrix(icons[i,], 20, 20))[,20:1]
  
    image(icon, col = col)
  }

We apply the autoencoder to compress the images from a 400-dimensional space to a 40-dimensional space.

  x <- ae$new(
    x = icons,
    n.hidden = 40,
    batch.size = 20,
    eta = 0.05,
    n.epochs = 400
  )

The computational graph of the autoencoder looks as follows:

  plot(x)

Moreover, we can also visualize the MSE at each epoch during the model fitting.

  plot(x$error, type = "l", xlab = "Epoch", ylab = "MRE")

The autoencoder compresses the images resonablly well. If we compare the first image in the dataset with the corresponding reconstruction by the autoencoder, we can clearly recognize the reconstructed image.

  par(mfrow = c(1,2))
  
  image <- t(matrix(icons[1,], 20, 20))[,20:1]
  output <- t(matrix(x$predict(icons[1,, drop = F]), 20, 20))[,20:1]
  
  image(image, col = col, main = "Original")
  image(output, col = col, main = "Reconstructed")

Downloads

  1. Icons dataset.