implementing the backpropagation algorit

Forum

Forum
Beginners
implementing the backpropagation algorit

implementing the backpropagation algorithm

closed account (E093605o)

I have written a basic linear algebra library and a MLP (I started to learn C++ a week ago). Now, when I run the code I get an assertion error - in the backprop() method I multiply two matrices that are not of equal column/row dimension. I strictly adhered to this article http://neuralnetworksanddeeplearning.com/chap2.html
For sake of completeness I included the Main.cpp file from where the backprop() method is called but the error is in the backprop method, I have made a capslock comment above it. Can somebody tell me where the logic flaw is?

//MLP.h
#pragma once
#include "Matrix.h"
#include <tuple>

template<typename T>
class MLP {
 public:
  std::vector<size_t> units_per_layer;
  std::vector<Matrix<T>> bias_vectors;
  std::vector<Matrix<T>> weight_matrices;
  std::vector<Matrix<T>> activations;
  std::vector<Matrix<T>> zs;
  

  double lr = .002;

  MLP(std::vector<size_t> units_per_layer):
    units_per_layer(units_per_layer),
    weight_matrices(),
    bias_vectors(),
    zs(),
    activations()
    {

  for (size_t i = 0; i < units_per_layer.size() - 1; ++i) {
    size_t in_channels{units_per_layer[i]};
    size_t out_channels{units_per_layer[i+1]};

    // initialize to random Gaussian
    auto W  = mtx<T>::randn(out_channels, in_channels);
    weight_matrices.push_back(W);

    auto b  = mtx<T>::randn(out_channels, 1);
    bias_vectors.push_back(b);

    auto z = mtx<T>::randn(out_channels,1);
    zs.push_back(z);

    activations.resize(units_per_layer.size());
  }
}

static inline auto sigmoid(double x) {
  return 1.0 / (1 + exp(-x));
}

static inline auto d_sigmoid(double x){
  return (x * (1 - x));
}   


auto forward(Matrix<T> x) {
  assert(std::get<0>(x.shape) == units_per_layer[0] && std::get<1>(x.shape));

  activations[0] = x;
  Matrix<T> prev(x);
  for (int i = 0; i < units_per_layer.size() - 1; i++) {

    Matrix<T> y = weight_matrices[i].matmul(prev);
    y = y + bias_vectors[i];
    y = y.apply_function(sigmoid);
    activations[i+1] = y;
    prev = y;
  }
  return prev;
}

std::tuple<std::vector<Matrix<T>>,std::vector<Matrix<T>>> backprop(Matrix<T> &target) {
  assert(std::get<0>(target.shape) == units_per_layer.back());

  // determine the simple error, error = target - output
  Matrix<T> error = (target - activations.back());
  Matrix<T> last_z = zs[zs.size()-1];
  Matrix<T> last_z_transformed = last_z.apply_function(d_sigmoid);
  Matrix<T> delta_L = error.multiply_elementwise(last_z_transformed);

  //the weights and bias gradients
  std::vector<Matrix<T>> nabla_w(weight_matrices.size());
  std::vector<Matrix<T>> nabla_b(bias_vectors.size());
 
  
  // backprop the error from output to input and step the weights
  for(int i = weight_matrices.size() - 1; i > 0; i--) {
    Matrix<T> z_transformed = zs[i].apply_function(d_sigmoid);
    Matrix<T> w_transposed = weight_matrices[i].T();
    
    // calculating error for previous layer
    // ERROR IN THIS LINE; MATRICES ARE NOT OF EQUAL COLUMN/ROW DIMENSION
    Matrix<T> delta_l = w_transposed.matmul(delta_L).multiply_elementwise(z_transformed);

    // calculating the change of weights and biases
    Matrix<T> a_transposed = activations[i].T();
    nabla_w[i] = delta_l.matmul(a_transposed);
    nabla_b[i] = delta_l;

    // updating the error term delta
    delta_L = delta_l;
  }
  return std::tuple<std::vector<Matrix<T>>,std::vector<Matrix<T>>>(nabla_w,nabla_b);
}

void online_GD(Matrix<T> x, Matrix<T> y){
  forward(x);
  update_parameters(&backprop(y),1.0);
}

void update_parameters(std::tuple<std::vector<Matrix<T>>,std::vector<Matrix<T>>> &params, double batch_size){
  for (int i = weight_matrices.size()-1; i > 0; i--){
    Matrix<T> nabla_w_scaled = std::get<0>(params)[i].multiply_scalar(lr/batch_size);
    Matrix<T> nabla_b_scaled = std::get<1>(params)[i].multiply_scalar(lr/batch_size);
    weight_matrices[i] = weight_matrices[i] - nabla_w_scaled;
    bias_vectors[i] = bias_vectors[i] - nabla_b_scaled;
  }
}
};

//Main.cpp
#include "Matrix.h"
#include "MLP.h"
#include <vector>
#include <iostream>
#include <fstream>
#include <math.h>

template<typename T>
void log (std::ostream& file, const Matrix<T>& x, const Matrix<T>& y, const Matrix<T>& y_hat) {
    file << x;
    file << y;
    file << y_hat;
    file << "------------" << std::endl;
}

int main() {

  // init model
  std::vector<size_t> layers = {1,8,8,8,1};


  // open file to save loss, x, y, and model(x)
  std::ofstream my_file; 
  my_file.open ("data.txt");

  int max_iter{1};

  const double PI {3.14159};
  

  MLP<double> model(layers);
  std::cout << model.bias_vectors[0];

  
  for (int i = 0; i < max_iter; i++){
    auto x = mtx<double>::randn(1, 1).multiply_scalar(PI);
    auto y = x.apply_function([](double v) -> double { return sin(v) * sin(v); });

    // forward and backward
    auto y_hat = model.forward(x);
    auto weights_biases = model.backprop(y); 
    model.update_parameters(weights_biases,1.0); // loss and grads computed in here
    

    log<double>(my_file,x,y,y_hat);
  
  }

  my_file.close();
  
}

Ethan69 (4)

I appreciate the information and advice you have shared.
https://www.myhtspace.net/

seeplus (6458)

when I run the code I get an assertion error - in the backprop() method

What debugging of the code have you done? In cases like this you should use the debugger to trace through the code, watch the contents of the variables and see where in the code the execution deviates from that expected or where an assertion occurs. Then you know which line in the code is causing the issue and the contents of the variables involved. Once you know this then you have a clue as to the cause of the error. If you then don't understand, post the line in the above code causing the issue and the values of the appropriate variables.

Topic archived. No new replies allowed.