Finding duplicate files using windows and Native C++

Jul 28, 2016 at 2:45am
This is a console program but I've borrowed so much of code like recursive algorithm from windows so why the question here.

I am willing to program an algorithm which finds the duplicate files, first it has to find the files of same sizes and if the hash matches then the file will be displayed as an output.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
class duplicateFinder {
private:
	bool isDuplcateFound = false;
	multimap<int, wchar_t*> duplicate;
	multimap<int, wchar_t*>::iterator iterate;
	multimap<char**, int> Hash;
	multimap<char**, int> confirmation;
public:
	bool processDrive(const wchar_t* drive);
	int getFileSize(const char* file);
	void display();
};

int duplicateFinder::getFileSize(const char* file)
{
	ifstream mySource;
	mySource.open(file, ios_base::binary);
	mySource.seekg(0, ios_base::end);
	int size = mySource.tellg();
	mySource.close();
	return size;
}

bool duplicateFinder::processDrive(const wchar_t* sDir)
{
	// referred http://www.stackoverflow.com/questions/2314542/listing-directory-contents-using-c-and-windows
	//Map creation and usage
	WIN32_FIND_DATA fdFile;
	HANDLE hFind = NULL;

	wchar_t sPath[2048];
	wsprintf(sPath, L"%s\\*.*", sDir);

	if ((hFind = FindFirstFile(sPath, &fdFile)) == INVALID_HANDLE_VALUE)
	{
		wprintf(L"Path not found: [%s]\n", sDir);
		return false;
	}

	do
	{
		
		if (wcscmp(fdFile.cFileName, L".") != 0
			&& wcscmp(fdFile.cFileName, L"..") != 0)
		{
			
			wsprintf(sPath, L"%s\\%s", sDir, fdFile.cFileName);
			if (fdFile.dwFileAttributes &FILE_ATTRIBUTE_DIRECTORY)
			{
				wprintf(L"Directory: %s\n", sPath);
				processDrive(sPath); 
			}
			else
			{
				//wprintf(L"File: %s\n", sPath);
				char** arr;
				char* hash = new char[MAX_PATH];
				memset(hash, 0, MAX_PATH);
				int correction;
				correction = wcstombs(hash, sPath, MAX_PATH);
				iterate = duplicate.find(getFileSize(hash));
				if (iterate == duplicate.end())
				{
					duplicate.insert(pair<int, wchar_t*>(getFileSize(hash), sPath));
				}
			}
		}
	} while (FindNextFile(hFind, &fdFile));
	FindClose(hFind);
	return isDuplcateFound;
}

void duplicateFinder::display()
{
	for (multimap<int, wchar_t*>::iterator it = duplicate.begin(); it != duplicate.end(); ++it)
		wcout << it->first << " => " << it->second << '\n';

}


In multimap it adds all files how could I program to add only files of same sizes?

Thank you for your time
Last edited on Jul 28, 2016 at 2:46am
Aug 2, 2016 at 10:50am
You may use a map<> with the file size as a key and vector of names as the value.
Aug 15, 2016 at 5:11pm
Hi, Sorry for the late reply. Could you be briefer. Since even If I choose the value as a container(vector - you said) How could it display both the outputs. My program will only display one duplicate file not the both.
Topic archived. No new replies allowed.