Huffman algorithm

Hello.

I have been using the forum for almost a year and always got help from people around here. The forum contributed greatly to my c++ skills, a language i picked up as a hobby and now i wish to give back with a Huffman algorithm i wrote, and i would like to give a shout out to @TheIdeasMan for putting in his time and helping me optimize it and fix bugs.

the code:
source.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
/*
#######################################################################################################################################
Copyright 2017 Daniel Rossinsky

Permission is hereby granted, free of charge, to any person obtaining a copy of this software
and associated documentation files (the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#######################################################################################################################################
*/

#include"Huffman.h"

int main(int argc, char *argv[]) {
	
	if (argc < 4) std::cout << "Too few arguments\n";
	else if (argc == 4) {
		if (*argv[1] == 'c') Huffman::compress(argv[2], argv[3], argv[3]);
		else if (*argv[1] == 'd') {
			std::string temp{ argv[2] };
			std::size_t pathEnd{ temp.find_last_of("/\\") };
			Huffman::decompress(argv[2], argv[3], temp.substr(0, pathEnd + 1));
		}//end of else if
		else std::cout << "Unknown command\n";
	}//end of else if
	else std::cout << "Too much arguments\n";
	return 0;
	
	//Huffman::compress("C:/Users/User/Desktop/test.txt", "C:/Users/User/Desktop/", "C:/Users/User/Desktop/");
	//Huffman::decompress("C:/Users/User/Desktop/testCompressed.bin", "C:/Users/User/Desktop/testKey.bin", "C:/Users/User/Desktop/");
}

/*
cmd example:
-----------
compress:
syntax: huffman.exe c filePath dest
example: C:/Users/User/Desktop/huffman.exe c C:/Users/User/Desktop/test.txt C:/Users/User/Desktop/

decompress:
syntax: huffman.exe d filePath keyPath
example: C:/Users/User/Desktop/huffman.exe d C:/Users/User/Desktop/testCompressed.bin C:/Users/User/Desktop/testKey.bin

NOTE:
-----
You can use the commented code in main instead
*/


Huffman.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#ifndef HUFFMAN
#define HUFFMAN

#include<iostream>
#include<map>
#include<vector>
#include<string>
#include<deque>
#include<memory>

namespace Huffman {
	namespace inner {
		struct node;

		/*type aliases*/
		using Table		= std::map<char, std::size_t>;
		using Cypher	= std::map<char, std::vector<bool> >;
		using smartNode = std::unique_ptr<node>;
		/*type aliases*/

		struct node {
			smartNode	m_left;
			smartNode	m_right;
			std::size_t m_frequency{};
			char		m_data{};

			node() = default;
			node(smartNode left, smartNode right) :
				m_left{ std::move(left) }, m_right{ std::move(right) } {
				m_frequency = m_left->m_frequency + m_right->m_frequency;
			}
		};

		struct functor {
			bool operator()(smartNode const& first, smartNode const& second) const
			{
				return first->m_frequency > second->m_frequency;
			}
		};

		/*shared functions*/
		smartNode makeTree(std::deque<smartNode>& nodeData);
		void readFile(const std::string& filePath, std::string& fileContent);
		std::deque<smartNode> storeFreqTable(const Table& table);
		/*shared functions*/

		/*compressor related functions*/
		void setNameAndExten(const std::string& filePath, std::string& fileName, std::string& fileExten);
		void UpdateFreqTable(Table& freqTable, const std::string& fileContent);
		void encode(smartNode const &root, Cypher& key, std::vector<bool>& code);
		void createBinaryFile(const std::string& filePath,
							  const std::string& fileName,
							  const std::string& fileContent,
							  Cypher& key,
							  std::vector<bool>& code);
		void createKey(const std::string& filePath,
					   const Table& freqTable,
					   const std::string& fileName,
					   const std::string& fileExten);
		/*compressor related functions*/

		/*decompressor related functions*/
		void readKey(Table& freqTable,
					 std::string& fileExten,
					 const std::string keyPath,
			         std::string& fileContent);
		std::size_t decodedContentSize(const Table& freqTable);
		void decode(const std::string& filePath,
					std::string& decodedContent,
					smartNode root,
					std::string& fileName,
					std::string& fileContent);
		void createFile(const std::string& decodedContent,
						const std::string& locToDecompress,
						const std::string& fileName,
						const std::string& fileExten);
		/*decompressor related functions*/
	}//end of inner namespace

	void compress(const std::string& filePath, const std::string& locToCreateKey, const std::string& locToCompress);
	void decompress(const std::string& filePath, const std::string& keyPath, const std::string& locToDecompress);
}//end of Huffman namespace

#endif 
Last edited on
Huffman.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#include"Huffman.h"
#include<fstream>
#include<sstream>
#include<algorithm>
#include<cstdlib>


/*----------------SHARED_FUNCTIONS_START----------------*/
Huffman::inner::smartNode Huffman::inner::makeTree(std::deque<smartNode>& nodeData) {
	while (nodeData.size() > 1) {
		std::sort(nodeData.begin(), nodeData.end(), functor());
		smartNode leftSon{ std::move(nodeData.back()) };
		nodeData.pop_back();
		smartNode rightSon{ std::move(nodeData.back()) };
		nodeData.pop_back();
		smartNode parent = std::make_unique<node>(std::move(leftSon), std::move(rightSon));
		nodeData.emplace_back(std::move(parent));
	}//end of while loop
	return std::move(nodeData.front());
}

void Huffman::inner::readFile(const std::string& filePath, std::string& fileContent) {
	std::ifstream inFile(filePath, std::ios::binary);
	if (inFile.is_open()) {
		auto const start_pos{ inFile.tellg() };
		inFile.ignore(std::numeric_limits<std::streamsize>::max());
		std::streamsize char_count{ inFile.gcount() };
		inFile.seekg(start_pos);
		fileContent = std::string(static_cast<std::size_t>(char_count), '0');
		inFile.read(&fileContent[0], static_cast<std::streamsize> (fileContent.size()));
		inFile.close();
	}//end of if
	else {
		std::cout << "Unable to open file\n";
		std::exit(EXIT_FAILURE);
	}//end of else
}

std::deque<Huffman::inner::smartNode> Huffman::inner::storeFreqTable(const Table& table) {
	std::deque<smartNode> nodeData;
	for (const auto& index : table) {
		smartNode leaf = std::make_unique<node>();
		leaf->m_data = index.first;
		leaf->m_frequency = index.second;
		nodeData.emplace_back(std::move(leaf));
	}//end of for loop
	return nodeData;
}
/*-----------------SHARED_FUNCTIONS_END-----------------*/

/*-----------------COMPRESSOR_FUNCTIONS_START-----------------*/
void Huffman::inner::setNameAndExten(const std::string& filePath,
									 std::string& fileName,
					                 std::string& fileExten) {
	std::size_t foundName{ filePath.find_last_of("/\\") };
	std::size_t foundExten{ filePath.find_last_of('.') };
	fileName = filePath.substr(foundName + 1, foundExten - foundName - 1);
	fileExten = filePath.substr(foundExten);
}

void Huffman::inner::UpdateFreqTable(Table& freqTable, const std::string& fileContent) {
	for (const auto& data : fileContent) {
		++freqTable[data];
	}//end of for loop
}

void Huffman::inner::encode(smartNode const &root,
							Cypher& key,
							std::vector<bool>& code) {
	if (root->m_left != nullptr) {
		code.emplace_back(false);
		encode(std::move(root->m_left), key, code);
	}//end of if
	if (root->m_right != nullptr) {
		code.emplace_back(true);
		encode(std::move(root->m_right), key, code);
	}//end of if 
	if (root->m_data) key[root->m_data] = code;
	if (!code.empty()) code.pop_back();
}

void Huffman::inner::createBinaryFile(const std::string& filePath,
									  const std::string& fileName,
									  const std::string& fileContent,
									  Cypher& key,
									  std::vector<bool>& code) {
	int offSet{}; int tempBuff{}; int inBuff{};
	std::ofstream outFile(filePath + fileName + "Compressed.bin", std::ios::binary);
	if (outFile.is_open()) {
		for (const auto& data : fileContent) {
			tempBuff = data;
			code = key[static_cast<char>(tempBuff)];
			for (const auto& index : code) {
				inBuff |= index << (7 - offSet);
				++offSet;
				if (offSet == 8) {
					offSet = 0;
					outFile.put(static_cast<char>(inBuff));
					inBuff = 0;
				}//end of if
			}//end of for loop
		}//end of for loop
		outFile.close();
	}//end of if
	else {
		std::cout << "Unable to open file\n";
		std::exit(EXIT_FAILURE);
	}//end of else
}

void Huffman::inner::createKey(const std::string& filePath,
							   const Table& freqTable,
							   const std::string& fileName,
							   const std::string& fileExten) {
	std::ofstream outFile(filePath + fileName + "Key.bin", std::ios::binary);
	if (outFile.is_open()) {
		auto&& index{ freqTable.begin() };
		do {
			outFile.put(index->first);
			outFile.put(' ');
			outFile << std::to_string(index->second);
			++index;
			if (index != freqTable.end()) outFile.put(' ');
		} while (index != freqTable.end());
		outFile << fileExten;
		outFile.close();
	}//end of if
	else {
		std::cout << "Unable to open file\n";
		std::exit(EXIT_FAILURE);
	}//end of else
}
/*------------------COMPRESSOR_FUNCTIONS_END------------------*/

/*-----------------DECOMPRESSOR_FUNCTIONS_START-----------------*/
void Huffman::inner::readKey(Table& freqTable,
							 std::string& fileExten,
							 const std::string keyPath,
							 std::string& fileContent) {
	char buffer{};
	std::string freq{};
	readFile(keyPath, fileContent);
	for (std::size_t index{}; index < fileContent.length(); ++index) {
		buffer = fileContent[index];
		index += 2;
		do {
			freq += fileContent[index];
			++index;
		} while ((fileContent[index] != ' ') && (fileContent[index] != '.'));
		if (fileContent[index] == '.') {
			fileExten = fileContent.substr(index, (fileContent.length() - 1));
			index = fileContent.length();
		}//end of if
		else {
			freqTable[buffer] = static_cast<unsigned int>(std::stoi(freq));
			freq.clear();
		}//end of else
	}//end of for
	freqTable[buffer] = static_cast<unsigned int>(std::stoi(freq));
	fileContent.clear();
	fileContent.shrink_to_fit();
}

std::size_t Huffman::inner::decodedContentSize(const Table& freqTable) {
	std::size_t size{};
	for (const auto& index : freqTable) size += index.second;					
	return size;
}

void Huffman::inner::decode(const std::string& filePath,
						    std::string& decodedContent,
						    smartNode root,
							std::string& fileName,
							std::string& fileContent) {
	node* temp = root.get();
	int offSet{}; int inBuff{};
	std::size_t foundName{ filePath.find_last_of("/\\") };
	fileName = filePath.substr(foundName + 1, filePath.find_last_of('C') - foundName - 1);		
	readFile(filePath, fileContent);
	for (const auto& data : fileContent) {
		inBuff = data;
		while (offSet < 8) {
			if (inBuff & (1 << (7 - offSet))) temp = temp->m_right.get();
			else							  temp = temp->m_left.get();
			if (temp->m_data) {
				decodedContent += temp->m_data;
				temp = root.get();
			}//end of if 
			++offSet;
		}//end of while
		offSet = 0;
	}//end of for
}

void Huffman::inner::createFile(const std::string& decodedContent,
								const std::string& locToDecompress,
								const std::string& fileName,
								const std::string& fileExten) {
	std::ofstream outFile(locToDecompress + fileName + fileExten, std::ios::binary);
	if (outFile.is_open()) {
		outFile.write(&decodedContent[0], static_cast<std::streamsize>(decodedContent.size()));
		outFile.close();
	}//end of if
	else {
		std::cout << "Unable to open file\n";
		std::exit(EXIT_FAILURE);
	}//end of else
}
/*------------------DECOMPRESSOR_FUNCTIONS_END------------------*/
Last edited on
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
void Huffman::compress(const std::string& filePath,
					   const std::string& locToCreateKey,
					   const std::string& locToCompress) {
	std::string                        fileName;
	std::string                        fileExten;
	Huffman::inner::setNameAndExten(filePath, fileName, fileExten);

	std::string						   fileContent;
	Huffman::inner::readFile(filePath, fileContent);

	Huffman::inner::Table			   freqTable;
	Huffman::inner::UpdateFreqTable(freqTable, fileContent);

	Huffman::inner::smartNode root = Huffman::inner::makeTree(Huffman::inner::storeFreqTable(freqTable));

	Huffman::inner::Cypher             key;
	std::vector<bool>                  code;
	encode(root, key, code);
	Huffman::inner::createBinaryFile(locToCompress, fileName, fileContent, key, code);
	Huffman::inner::createKey(locToCreateKey, freqTable, fileName, fileExten);
}

void Huffman::decompress(const std::string& filePath,
						 const std::string& keyPath,
						 const std::string& locToDecompress) {
	Huffman::inner::Table       freqTable;
	std::string                 fileExten;
	std::string                 fileContent;
	Huffman::inner::readKey(freqTable, fileExten, keyPath, fileContent);

	Huffman::inner::smartNode root = Huffman::inner::makeTree(Huffman::inner::storeFreqTable(freqTable));

	std::string                 fileName;
	std::string                 decodedContent;
	decodedContent.reserve(Huffman::inner::decodedContentSize(freqTable));
	decode(filePath, decodedContent, std::move(root), fileName, fileContent);
	Huffman::inner::createFile(decodedContent, locToDecompress, fileName, fileExten);
}
Last edited on
The code was edited to use unique_ptr for memory management and namespaces.
Any critique is welcome.
Last edited on
Topic archived. No new replies allowed.