Testing algorithm speed....

So basically, I have 2 algorithms. One is 40x faster than the other. There are only 3 major code difference between each.

The algorithm is basically alpha blending one bitmap onto the other.

Bench1 = slow one
Bench2 = fast one

When I run Bench1 then Bench2, Bench2 will take about the same amount of processing time as Bench1. However, if I run Bench2 then Bench1, it will show that Bench1 is 40x faster than Bench2.

However, if I run Bench1, then deallocate then reallocate the bitmaps and run Bench2, Bench2 will run normally.

This is my time class that I'm using to benchmark. The time is given in milliseconds.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
//.h
#pragma comment (lib,"Winmm.lib")

#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <mmsystem.h>

#include <limits> // numeric_limits<>::epsilon

/********************************************************************
class cTime
Returns the difference in time from the last iteration.
********************************************************************/
class cTime
{
public:
	static cTime*	Instance();
	static void		Destroy();

	void	operator()();
	UINT64	GetDTime(){return mDTime;}
	float	GetGameTime(); // GetTickCount

private:
	cTime();
	~cTime();

	static cTime	*mInstance;
	UINT64  mLastGameTime;

	UINT64	mDTime;
};

#endif 


This is its source file
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#include "Time.hpp"

cTime	*cTime::mInstance	=	0;

cTime*		cTime::Instance()
{
	if(mInstance)
		return mInstance;
	return mInstance = new cTime;
}

void		cTime::Destroy()
{
	if(mInstance)
	{
		delete mInstance;
		mInstance = 0;
	}
}

cTime::cTime():mDTime(0)
{
	mLastGameTime = (UINT64)timeGetTime();
}

cTime::~cTime(){}

void		cTime::operator()()
{
	UINT64 ticks;
	ticks = (UINT64)timeGetTime();

	mDTime = ticks - mLastGameTime;

	mLastGameTime = ticks;
}


This is the 'main'
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#include "time.hpp"
#include <iostream>

using namespace std;

long int MAX = 1000;

int BMPSize = 100;

void Bench1(float *&Src, float *&Dest)
{
	cTime *TPtr  =	cTime::Instance();
	float UnitAlpha=1,InverseAlpha=1;

	for( int cnt = 0; cnt < 5; ++cnt )
	{
		(*TPtr)();
		for( int i = 0; i < MAX; ++i )
		{  
			for( int Indx  = 0; Indx < BMPSize; ++Indx )
			{
				//only difference is here!
				UnitAlpha		= Src[Indx];
				InverseAlpha	= 1-UnitAlpha;

				Dest[Indx]		= Dest[Indx]*InverseAlpha + Src[Indx] * UnitAlpha;
				Dest[++Indx]	= Dest[Indx]*InverseAlpha + Src[Indx] * UnitAlpha;
				Dest[++Indx]	= Dest[Indx]*InverseAlpha + Src[Indx] * UnitAlpha;
				Dest[++Indx]	= 1;
			}
		}

		(*TPtr)();

		cout<<TPtr->GetDTime();
		cout<<endl;
	}
}

void Bench2(float *&Src, float *&Dest)
{
	cTime *TPtr  =	cTime::Instance();
	float UnitAlpha=1,InverseAlpha=1;

	for( int cnt = 0; cnt < 5; ++cnt )
	{
		(*TPtr)();
		for( int i = 0; i < MAX; ++i )
		{
			for( int Indx = 0; Indx < BMPSize; ++Indx )
			{
				//only difference is here!
				UnitAlpha		= Src[Indx+3];
				Dest[Indx]		= UnitAlpha * (Src[Indx] - Dest[Indx]) + Dest[Indx];
				Dest[++Indx]	= UnitAlpha * (Src[Indx] - Dest[Indx]) + Dest[Indx];
				Dest[++Indx]	= UnitAlpha * (Src[Indx] - Dest[Indx]) + Dest[Indx];
				Dest[++Indx]	= 1;
			}
		}

		(*TPtr)();

		cout<<TPtr->GetDTime();
		cout<<endl;
	}
}

int main()
{
	//I'm allocating because deletion is the only way to regain up to actual speed
	float *Src = new float[BMPSize];
	float *Dest = new float[BMPSize];

	delete [] Src;
	delete [] Dest;

	Src = new float[BMPSize];
	Dest = new float[BMPSize];

	////////////////BENCH1///////////////////
	//normalizing the buffer
	memset(Dest,0,BMPSize);
	memset(Src,0,BMPSize);

	//Bench1 is the slower algorithm
	cout<<"Bench1: Processing slower algorithm\n";
	Bench1(Src,Dest);

	cout<<"\n\n";

	////////////////BENCH2///////////////////
	//normalizing the buffer
	memset(Dest,0,BMPSize);
	memset(Src,0,BMPSize);

	//Starting the faster algorithm
	cout<<"Bench2: Processing faster algorithm.  Did not deallocate\n";
	Bench1(Src,Dest);

	cout<<"\n\n";

	////////////////BENCH2///////////////////
	//Starting the faster one - with deallocation
	delete [] Src;
	delete [] Dest;

	Src = new float[BMPSize];
	Dest = new float[BMPSize];

	memset(Dest,0,BMPSize);
	memset(Src,0,BMPSize);

	//Starting the faster one
	cout<<"Bench2: Processing faster algorithm.  Decallocated, this is actual processing speed of this algorithm\n";
	Bench2(Src,Dest);

	system("pause");
	
	return 0;
}


Output
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
Bench1: Processing slower algorithm
44
40
40
56
49


Bench2: Processing faster algorithm.  Did not deallocate
46
40
41
41
41


Bench2: Processing faster algorithm.  Decallocated, this is actual processing sp
eed of this algorithm
1
1
2
1
1
Last edited on
I'm assuming you are asking why they're different. I suspect it's the typo on line 98.

No swearing allowed :)
Last edited on
Topic archived. No new replies allowed.