Can anyone help me speed up this code wi

Forum

Forum
General C++ Programming
Can anyone help me speed up this code wi

Can anyone help me speed up this code without taking any more memory

Hello everyone!

Here's a really simple neural network code I created:

#include <vector>
#include <iostream>
#include <cmath>
namespace nn {

	vector<float> GetRandoms(int size)
	{
		vector<float> ret;
		float current = -1.0;
		for (int a = 0; a <= size; a++) {
			current += 2.0 / (size + 1);
			if (current == 0.0) continue;
			current -= ((float)rand() / (float)RAND_MAX) / (size * 10.0);
			if (current < 0) ret.push_back(sqrt(current * -1.0) * -1.0);
			else ret.push_back(current);
		}

		int r = 0;
		for (int a = 0; a < size; a++) {
			r = rand() % size;
			current = ret[a];
			ret[a] = ret[r];
			ret[r] = current;
		}
		return ret;
	}

	struct axon
	{
		float weight;
		float delta;
	};

	class neuron
	{
	public:
		axon *axons;

		void correct(float target) {
			error = (target - output) * (1.0 - output * output);

			float e = error;
			e = fabs(e);
			if (e > 1.0) e = 1.0;
			ea += (e - ea) * (fabs(e - ea) * e * ea);
		}

		float output;
		float error;

		float bias;
		float deltabias;

		float ea;
	};

	struct layer
	{
		neuron *n;
		int entities;
	};

	class network
	{
	public:

		float *input;
		neuron *output;

		layer *layers;
		int layer_count;

		network() {
			layer_count = 0;
		}

		void Correct(float *in)
		{
			for (int a = 0; a < layers[layer_count - 1].entities; a++) {
				layers[layer_count - 1].n[a].correct(in[a]);
			}
		}

		void Outputs(float *out)
		{
			for (int a = 0; a < layers[layer_count - 1].entities; a++) {
				out[a] = layers[layer_count - 1].n[a].output;
			}
		}

		void Inputs(float *in)
		{
			for (int a = 0; a < layers[0].entities; a++) {
				input[a] = in[a];
			}
		}

		void Learn()
		{
			float sum, e;
			for (int y = layer_count - 2; y > 0; y--)
			{
				for (int n = layers[y].entities - 1; n >= 0; n--) {
					sum = 0.0;
					for (int n2 = layers[y + 1].entities - 1; n2 >= 0; n2--) {
						sum += layers[y + 1].n[n2].error * layers[y + 1].n[n2].axons[n].weight;
					}
					sum /= layers[y + 1].entities;
					layers[y].n[n].error = (1.0 - layers[y].n[n].output * layers[y].n[n].output) * sum;
					e = layers[y].n[n].error;
					e = fabs(e);
					if (e > 1.0) e = 1.0;
					layers[y].n[n].ea += (e - layers[y].n[n].ea) * (fabs(e - layers[y].n[n].ea) * e * layers[y].n[n].ea);
				}
			}

			float eta, alpha;
			for (int y = layer_count - 1; y > 1; y--)
			{
				for (int n = layers[y].entities - 1; n >= 0; n--) {
					alpha = eta = layers[y].n[n].ea * layers[y].n[n].ea;
					alpha = 1.0 - alpha;
					for (int n2 = layers[y - 1].entities - 1; n2 >= 0; n2--) {
						layers[y].n[n].axons[n2].delta = layers[y - 1].n[n2].output * layers[y].n[n].error * eta + layers[y].n[n].axons[n2].delta * alpha;
						layers[y].n[n].axons[n2].weight += layers[y].n[n].axons[n2].delta;
					}
					layers[y].n[n].deltabias = layers[y].n[n].error * eta + layers[y].n[n].deltabias * alpha;
					layers[y].n[n].bias += layers[y].n[n].deltabias;
				}
			}

			for (int n = layers[1].entities - 1; n >= 0; n--) {
				alpha = eta = layers[1].n[n].ea * layers[1].n[n].ea;
				alpha = 1.0 - alpha;
				for (int n2 = layers[0].entities - 1; n2 >= 0; n2--) {
					layers[1].n[n].axons[n2].delta = ((float*)layers[0].n)[n2] * layers[1].n[n].error * eta + layers[1].n[n].axons[n2].delta * alpha;
					layers[1].n[n].axons[n2].weight += layers[1].n[n].axons[n2].delta;
				}
				layers[1].n[n].deltabias = layers[1].n[n].error * eta + layers[1].n[n].deltabias * alpha;
				layers[1].n[n].bias += layers[1].n[n].deltabias;
			}
		}

		void FeedForward()
		{
			int w = 0;
			float sum;
			for (int n = 0; n < layers[1].entities; n++) {
				sum = 0.0;
				for (int n2 = 0; n2 < layers[0].entities; n2++) {
					sum += ((float *)layers[0].n)[n2] * layers[1].n[n].axons[n2].weight;
				}
				sum /= layers[0].entities;
				layers[1].n[n].output = tanh(sum + layers[1].n[n].bias);
			}

			for (int y = 1; y < layer_count - 1; y++) {
				for (int n = 0; n < layers[y + 1].entities; n++) {
					sum = 0.0;
					for (int n2 = 0; n2 < layers[y].entities; n2++) {
						sum += layers[y].n[n2].output * layers[y + 1].n[n].axons[n2].weight;
					}
					sum /= layers[y].entities;
					layers[y + 1].n[n].output = tanh(sum + layers[y + 1].n[n].bias);
				}
			}
		}

		void Create(int inputs, int outputs, int Layers, ...) {
			Layers += 2;
			layers = new layer[Layers];
			layers[0].entities = inputs;

			input = new float[inputs];
			va_list vl;
			va_start(vl, Layers );
			for (int a = 1; a < Layers - 1; a++){
				layers[a].entities = va_arg(vl, int);
			}
			va_end(vl);
			layers[Layers - 1].entities = outputs;

			layer_count = Layers;
			int a = 0;
			for (int y = 1; y < Layers; y++) {
				layers[y].n = new neuron[layers[y].entities];
				for (int n = 0; n < layers[y].entities; n++) {
					layers[y].n[n].deltabias = 0.0;
					layers[y].n[n].error = 0.0;
					layers[y].n[n].output = 0.0;
					layers[y].n[n].bias = 0.0;
					layers[y].n[n].ea = 1.0;
					layers[y].n[n].axons = new axon[layers[y - 1].entities];
				}
			}
			layers[0].n = (neuron *)input;
			output = layers[Layers - 1].n;
			for (int n = 0; n < layers[0].entities; n++) {
				((float *)layers[0].n)[n] = 0.0;
			}

			for (int y = 1; y < Layers; y++) {
				for (int n = 0; n < layers[y].entities; n++) {
					vector<float> randoms = GetRandoms(layers[y - 1].entities + 1);
					for (int a = 0; a < layers[y - 1].entities; a++) {
						layers[y].n[n].axons[a].weight = randoms[a];
						layers[y].n[n].axons[a].delta = 0.0;
					}
					layers[y].n[n].bias = randoms[layers[y - 1].entities];
					layers[y].n[n].deltabias = 0.0;
				}
			}
		}

		void free() {
			if (!layer_count) return;
			delete[] input;
			for (int y = 1; y < layer_count; y++) {
				for (int n = 0; n < layers[y].entities; n++) {
					delete[] layers[y].n[n].axons;
				}
				delete[] layers[y].n;
			}
			delete[] layers;
		}
	};
}

I feel like it can be done better. The code could be faster.
This is how I use the class:

struct learn_xor
{
	float input1;
	float input2;
	float target;
};

vector<learn_xor> problem;

	nn::network n;

	n.Create(2, 1, 1, 2);
	problem.push_back({ 0.0, 0.0, 0.0 });
	problem.push_back({ 1.0, 0.0, 1.0 });
	problem.push_back({ 1.0, 1.0, 0.0 });
	problem.push_back({ 0.0, 1.0, 1.0 });
	int a = 0;

	while (1)
	{
		float error = 0;
		cout << "epoch:" << a << endl;
		for (int b = 0; b < 4; b++)
		{	
			cout << "inputs:" << problem[b].input1 << "/" << problem[b].input2 << endl;
			n.input[0] = problem[b].input1;
			n.input[1] = problem[b].input2;
			n.FeedForward();
			
			cout << "results:" << n.output[0].output << endl;
			cout << "targets:" << problem[b].target << endl << endl;;
			n.output[0].correct(problem[b].target);
			n.Learn();
			error += fabs(problem[b].target - n.output[0].output);
		}
		cout << "error:" << error / 4 << endl << endl << endl;
		a++;
		system("pause");
	}

Last edited on

closed account (48bpfSEw)

You can speed up your code by using clusters.

We had this database structure of articles (clothes). Every cloth could have differnt cloth-sizes:

Table ARTICLES
Nr, Size
article 1, cloth-size S
article 1, cloth-size L
article 1, cloth-size X
article 1, cloth-size XXL
article 2, cloth-size S
article 2, cloth-size XXL

I had the idea to make clusters, that means, storing the cloth-sizes as attributes

Table ARTICLES
Nr, sizeS, sizeL, sizeX, sizeXXL
article 1, y, y, y, y
article 2, y, n, n, y

The speed up of data accessing was about 10 times!

What if your nodes can manage 10 connections at the same time?!

Last edited on

AbstractionAnon (6954)

line 6,8: Avoid returning a vector by value. That involves copying the vector for every call to GetRandoms(). Instead pass it as an argument by reference. If you know the maximum number of randoms in advance, presize the vector using reserve() to prevent repeated reallocations of the vector.

Line 204: Avoid declaring a vector inside nested for loops. This causes the vector to be constructed on the stack for each iteration of the inner loop and destructed at the bottom of the inner loop. Move randoms to line 170. You can use vector.clear() at line 204 to ensure the vector is empty at the start of an iteration.

edit: Correction per JLBorges

Last edited on

Gyiove (186)

@Necip

Can you give me some code example cause right now I'm not sure what you're talking about.
I'm really lost.

@AbstractionAnon
I knew that using vector there was a really bad idea.
Luckily, void Create() gets call once in program's life time.

JLBorges (13770)

> Avoid returning a vector by value. That involves copying the vector for every call to GetRandoms().

No.

Every mainstream compiler implements NRVO.

If a function returns a class type by value, and the return statement's expression is the name of a non-volatile object with automatic storage duration, which isn't the function parameter, or a catch clause parameter, and which has the same type (ignoring top-level cv-qualification) as the return type of the function, then copy/move is omitted. When that local object is constructed, it is constructed directly in the storage where the function's return value would otherwise be moved or copied to. This variant of copy elision is known as NRVO, "named return value optimization".
http://en.cppreference.com/w/cpp/language/copy_elision

std::vector<int> foo()
{
    std::vector<int> vec( 1'000'000 ) ;
    std::iota( std::begin(vec), std::end(vec), 0 ) ;
    return vec ; // NRVO 
}

std::vector<int> bar()
{
    std::vector<int> vec( 1'000'000 ) ;
    std::iota( std::begin(vec), std::end(vec), 0 ) ;
    return std::move(vec) ; // warning: moving a local object in a return statement prevents copy elision
                            // note: remove std::move call here
}

http://coliru.stacked-crooked.com/a/d96bf5c00ba9f95e

Topic archived. No new replies allowed.

C++

Forum

Can anyone help me speed up this code without taking any more memory