Can anyone help me speed up this code without taking any more memory

Hello everyone!

Here's a really simple neural network code I created:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#include <vector>
#include <iostream>
#include <cmath>
namespace nn {

	vector<float> GetRandoms(int size)
	{
		vector<float> ret;
		float current = -1.0;
		for (int a = 0; a <= size; a++) {
			current += 2.0 / (size + 1);
			if (current == 0.0) continue;
			current -= ((float)rand() / (float)RAND_MAX) / (size * 10.0);
			if (current < 0) ret.push_back(sqrt(current * -1.0) * -1.0);
			else ret.push_back(current);
		}

		int r = 0;
		for (int a = 0; a < size; a++) {
			r = rand() % size;
			current = ret[a];
			ret[a] = ret[r];
			ret[r] = current;
		}
		return ret;
	}

	struct axon
	{
		float weight;
		float delta;
	};

	class neuron
	{
	public:
		axon *axons;

		void correct(float target) {
			error = (target - output) * (1.0 - output * output);

			float e = error;
			e = fabs(e);
			if (e > 1.0) e = 1.0;
			ea += (e - ea) * (fabs(e - ea) * e * ea);
		}

		float output;
		float error;

		float bias;
		float deltabias;

		float ea;
	};

	struct layer
	{
		neuron *n;
		int entities;
	};

	class network
	{
	public:

		float *input;
		neuron *output;

		layer *layers;
		int layer_count;

		network() {
			layer_count = 0;
		}

		void Correct(float *in)
		{
			for (int a = 0; a < layers[layer_count - 1].entities; a++) {
				layers[layer_count - 1].n[a].correct(in[a]);
			}
		}

		void Outputs(float *out)
		{
			for (int a = 0; a < layers[layer_count - 1].entities; a++) {
				out[a] = layers[layer_count - 1].n[a].output;
			}
		}

		void Inputs(float *in)
		{
			for (int a = 0; a < layers[0].entities; a++) {
				input[a] = in[a];
			}
		}

		void Learn()
		{
			float sum, e;
			for (int y = layer_count - 2; y > 0; y--)
			{
				for (int n = layers[y].entities - 1; n >= 0; n--) {
					sum = 0.0;
					for (int n2 = layers[y + 1].entities - 1; n2 >= 0; n2--) {
						sum += layers[y + 1].n[n2].error * layers[y + 1].n[n2].axons[n].weight;
					}
					sum /= layers[y + 1].entities;
					layers[y].n[n].error = (1.0 - layers[y].n[n].output * layers[y].n[n].output) * sum;
					e = layers[y].n[n].error;
					e = fabs(e);
					if (e > 1.0) e = 1.0;
					layers[y].n[n].ea += (e - layers[y].n[n].ea) * (fabs(e - layers[y].n[n].ea) * e * layers[y].n[n].ea);
				}
			}

			float eta, alpha;
			for (int y = layer_count - 1; y > 1; y--)
			{
				for (int n = layers[y].entities - 1; n >= 0; n--) {
					alpha = eta = layers[y].n[n].ea * layers[y].n[n].ea;
					alpha = 1.0 - alpha;
					for (int n2 = layers[y - 1].entities - 1; n2 >= 0; n2--) {
						layers[y].n[n].axons[n2].delta = layers[y - 1].n[n2].output * layers[y].n[n].error * eta + layers[y].n[n].axons[n2].delta * alpha;
						layers[y].n[n].axons[n2].weight += layers[y].n[n].axons[n2].delta;
					}
					layers[y].n[n].deltabias = layers[y].n[n].error * eta + layers[y].n[n].deltabias * alpha;
					layers[y].n[n].bias += layers[y].n[n].deltabias;
				}
			}

			for (int n = layers[1].entities - 1; n >= 0; n--) {
				alpha = eta = layers[1].n[n].ea * layers[1].n[n].ea;
				alpha = 1.0 - alpha;
				for (int n2 = layers[0].entities - 1; n2 >= 0; n2--) {
					layers[1].n[n].axons[n2].delta = ((float*)layers[0].n)[n2] * layers[1].n[n].error * eta + layers[1].n[n].axons[n2].delta * alpha;
					layers[1].n[n].axons[n2].weight += layers[1].n[n].axons[n2].delta;
				}
				layers[1].n[n].deltabias = layers[1].n[n].error * eta + layers[1].n[n].deltabias * alpha;
				layers[1].n[n].bias += layers[1].n[n].deltabias;
			}
		}

		void FeedForward()
		{
			int w = 0;
			float sum;
			for (int n = 0; n < layers[1].entities; n++) {
				sum = 0.0;
				for (int n2 = 0; n2 < layers[0].entities; n2++) {
					sum += ((float *)layers[0].n)[n2] * layers[1].n[n].axons[n2].weight;
				}
				sum /= layers[0].entities;
				layers[1].n[n].output = tanh(sum + layers[1].n[n].bias);
			}

			for (int y = 1; y < layer_count - 1; y++) {
				for (int n = 0; n < layers[y + 1].entities; n++) {
					sum = 0.0;
					for (int n2 = 0; n2 < layers[y].entities; n2++) {
						sum += layers[y].n[n2].output * layers[y + 1].n[n].axons[n2].weight;
					}
					sum /= layers[y].entities;
					layers[y + 1].n[n].output = tanh(sum + layers[y + 1].n[n].bias);
				}
			}
		}

		void Create(int inputs, int outputs, int Layers, ...) {
			Layers += 2;
			layers = new layer[Layers];
			layers[0].entities = inputs;

			input = new float[inputs];
			va_list vl;
			va_start(vl, Layers );
			for (int a = 1; a < Layers - 1; a++){
				layers[a].entities = va_arg(vl, int);
			}
			va_end(vl);
			layers[Layers - 1].entities = outputs;

			layer_count = Layers;
			int a = 0;
			for (int y = 1; y < Layers; y++) {
				layers[y].n = new neuron[layers[y].entities];
				for (int n = 0; n < layers[y].entities; n++) {
					layers[y].n[n].deltabias = 0.0;
					layers[y].n[n].error = 0.0;
					layers[y].n[n].output = 0.0;
					layers[y].n[n].bias = 0.0;
					layers[y].n[n].ea = 1.0;
					layers[y].n[n].axons = new axon[layers[y - 1].entities];
				}
			}
			layers[0].n = (neuron *)input;
			output = layers[Layers - 1].n;
			for (int n = 0; n < layers[0].entities; n++) {
				((float *)layers[0].n)[n] = 0.0;
			}

			for (int y = 1; y < Layers; y++) {
				for (int n = 0; n < layers[y].entities; n++) {
					vector<float> randoms = GetRandoms(layers[y - 1].entities + 1);
					for (int a = 0; a < layers[y - 1].entities; a++) {
						layers[y].n[n].axons[a].weight = randoms[a];
						layers[y].n[n].axons[a].delta = 0.0;
					}
					layers[y].n[n].bias = randoms[layers[y - 1].entities];
					layers[y].n[n].deltabias = 0.0;
				}
			}
		}

		void free() {
			if (!layer_count) return;
			delete[] input;
			for (int y = 1; y < layer_count; y++) {
				for (int n = 0; n < layers[y].entities; n++) {
					delete[] layers[y].n[n].axons;
				}
				delete[] layers[y].n;
			}
			delete[] layers;
		}
	};
}


I feel like it can be done better. The code could be faster.
This is how I use the class:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
struct learn_xor
{
	float input1;
	float input2;
	float target;
};

vector<learn_xor> problem;

	nn::network n;

	n.Create(2, 1, 1, 2);
	problem.push_back({ 0.0, 0.0, 0.0 });
	problem.push_back({ 1.0, 0.0, 1.0 });
	problem.push_back({ 1.0, 1.0, 0.0 });
	problem.push_back({ 0.0, 1.0, 1.0 });
	int a = 0;

	while (1)
	{
		float error = 0;
		cout << "epoch:" << a << endl;
		for (int b = 0; b < 4; b++)
		{	
			cout << "inputs:" << problem[b].input1 << "/" << problem[b].input2 << endl;
			n.input[0] = problem[b].input1;
			n.input[1] = problem[b].input2;
			n.FeedForward();
			
			cout << "results:" << n.output[0].output << endl;
			cout << "targets:" << problem[b].target << endl << endl;;
			n.output[0].correct(problem[b].target);
			n.Learn();
			error += fabs(problem[b].target - n.output[0].output);
		}
		cout << "error:" << error / 4 << endl << endl << endl;
		a++;
		system("pause");
	}
Last edited on
closed account (48bpfSEw)
You can speed up your code by using clusters.

We had this database structure of articles (clothes). Every cloth could have differnt cloth-sizes:

Table ARTICLES
Nr, Size
article 1, cloth-size S
article 1, cloth-size L
article 1, cloth-size X
article 1, cloth-size XXL
article 2, cloth-size S
article 2, cloth-size XXL


I had the idea to make clusters, that means, storing the cloth-sizes as attributes

Table ARTICLES
Nr, sizeS, sizeL, sizeX, sizeXXL
article 1, y, y, y, y
article 2, y, n, n, y


The speed up of data accessing was about 10 times!


What if your nodes can manage 10 connections at the same time?!
Last edited on
line 6,8: Avoid returning a vector by value. That involves copying the vector for every call to GetRandoms(). Instead pass it as an argument by reference. If you know the maximum number of randoms in advance, presize the vector using reserve() to prevent repeated reallocations of the vector.

Line 204: Avoid declaring a vector inside nested for loops. This causes the vector to be constructed on the stack for each iteration of the inner loop and destructed at the bottom of the inner loop. Move randoms to line 170. You can use vector.clear() at line 204 to ensure the vector is empty at the start of an iteration.

edit: Correction per JLBorges



Last edited on
@Necip

Can you give me some code example cause right now I'm not sure what you're talking about.
I'm really lost.

@AbstractionAnon
I knew that using vector there was a really bad idea.
Luckily, void Create() gets call once in program's life time.
> Avoid returning a vector by value. That involves copying the vector for every call to GetRandoms().

No.

Every mainstream compiler implements NRVO.
If a function returns a class type by value, and the return statement's expression is the name of a non-volatile object with automatic storage duration, which isn't the function parameter, or a catch clause parameter, and which has the same type (ignoring top-level cv-qualification) as the return type of the function, then copy/move is omitted. When that local object is constructed, it is constructed directly in the storage where the function's return value would otherwise be moved or copied to. This variant of copy elision is known as NRVO, "named return value optimization".
http://en.cppreference.com/w/cpp/language/copy_elision


1
2
3
4
5
6
7
8
9
10
11
12
13
14
std::vector<int> foo()
{
    std::vector<int> vec( 1'000'000 ) ;
    std::iota( std::begin(vec), std::end(vec), 0 ) ;
    return vec ; // NRVO 
}

std::vector<int> bar()
{
    std::vector<int> vec( 1'000'000 ) ;
    std::iota( std::begin(vec), std::end(vec), 0 ) ;
    return std::move(vec) ; // warning: moving a local object in a return statement prevents copy elision
                            // note: remove std::move call here
}

http://coliru.stacked-crooked.com/a/d96bf5c00ba9f95e
Topic archived. No new replies allowed.