This example demonstrates how to apply an autoencoder to compress a dataset containing images of web icons (link). Our goal is to fit an autoencoder that can compress the images to a lower dimensional space while still being able to reconstruct them in the orignal space. The quality of the reconstruction is estimated by the mean squared error between the original image and the reconstructed image. The autoencoder is optimized by stochastic gradient descent for 100 epochs with a learning rate of 0.05.

  # Load cgraph package
  library(cgraph)

  # Set seed for reproducible results
  set.seed(6)

  # Xavier initialization
  xavier_init <- function(n_rows, n_cols)
  {
    sd <- sqrt(2 / (n_rows + n_cols))
    matrix(rnorm(n_rows * n_cols, mean = 0, sd = sd), n_rows, n_cols)
  }

  # Basic autoencoder with a single hidden layer and MSE loss
  ae <- function(x, n_hidden = 1, batch_size = 1, eta = 0.05, n_epochs = 1)
  {
    # Transform to a matrix
    x <- as.matrix(x)
    
    # Initialize graph
    graph <- cg_graph()
    
    # Create input
    input <- cg_input("input")
    
    # Create parameters
    parms <- list(
      w1 = cg_parameter(xavier_init(n_hidden, ncol(x)), "w1"),
      w2 = cg_parameter(xavier_init(ncol(x), n_hidden), "w2"),
      b1 = cg_parameter(matrix(0, n_hidden), "b1"),
      b2 = cg_parameter(matrix(0, ncol(x)), "b2")
    )
    
    # Calculate hidden activations
    hidden <- cg_sigmoid(cg_linear(parms$w1, input, parms$b1), "hidden")
    
    # Calculate output
    output <- cg_sigmoid(cg_linear(parms$w2, hidden, parms$b2), "output")
    
    # Calculate mean squared loss
    loss <- cg_mean(cg_colsums((output - input)^2), "loss")
    
    # Keep track of loss
    error <- rep(0, n_epochs)
    
    # Calculate number of batches
    n_batches <- ceiling(nrow(x) / batch_size)
    
    # Optimize by stochastic gradient descent
    for(i in 1:n_epochs)
    {
      # Keep track of loss across the batches
      batch_error <- rep(0, n_batches)
      
      # Split the rows in random batches
      batches <- split(sample(nrow(x)),
                       rep(1:n_batches, length.out = nrow(x))
      )
      
      # Iterate over the batches
      for(j in 1:n_batches)
      {
        # Set the input value
        input$value <- t(x[batches[[j]],, drop = FALSE])
        
        # Perform forward pass
        cg_graph_forward(graph, loss)
        
        # Perform backward pass
        cg_graph_backward(graph, loss)
        
        # Update the parameters
        for(parm in parms)
        {
          # Perform gradient descent update rule
          parm$value <- parm$value - eta * parm$grad
        }
        
        # Store the loss over the batch
        batch_error[j] <- loss$value
      }
      
      # Average the loss over the batches
      error[i] <- mean(batch_error)
    }
    
    # Create list object and set class attribute
    structure(list(graph = graph, input = input, output = output, error = error), class = "ae")
  }

  # Apply a fitted autoencoder on new data
  predict.ae <- function(model, x)
  {
    # Transform to a matrix
    x <- as.matrix(x)
    
    # Set the input value
    model$input$value <- t(as.matrix(x))
    
    # Perform forward pass
    cg_graph_forward(model$graph, model$output)
    
    # Return the output
    t(model$output$value)
  }
  
  # Load icons data from disk
  icons <- read.csv("data/icons.csv")
  
  # Transform icons to a matrix
  icons <- as.matrix(icons)
  
  # Train an autoencoder on the images of the icons
  model <- ae(icons, n_hidden = 40, batch_size = 20, n_epochs = 100)
  
  # Plot the mean squared error during model fitting
  plot(model$error, type = "l", xlab = "Epoch", ylab = "MSE")

  # Set plot grid
  par(mfrow = c(2, 4))
  
  # Define colors
  col <- gray.colors(32, 0, 1)
  
  # Iterate through first eight images
  for(i in 1:8)
  {
    # Plot icon
    image(t(matrix(icons[i,], 20, 20))[, 20:1], col = col)
  }

  # Reconstruct the first eight images
  output <- predict(model, icons[1:8,])
  
  # Iterate through first eight images
  for(i in 1:8)
  {
    # Plot reconstructed icon
    image(t(matrix(output[i,], 20, 20))[, 20:1], col = col)   
  }