LZW Compressor
 All Files Functions Typedefs
lzw_v3.cpp
Go to the documentation of this file.
1 
2 
3 
4 
5 
6 
7 
8 
9 
10 
11 
12 
13 
14 
15 
16 
17 
18 
19 
20 #include <algorithm>
21 #include <cstddef>
22 #include <cstdint>
23 #include <cstdlib>
24 #include <exception>
25 #include <fstream>
26 #include <ios>
27 #include <iostream>
28 #include <istream>
29 #include <limits>
30 #include <map>
31 #include <memory>
32 #include <ostream>
33 #include <stdexcept>
34 #include <string>
35 #include <utility>
36 #include <vector>
37 
39 using CodeType = std::uint16_t;
40 
41 namespace globals {
42 
44 const CodeType dms {std::numeric_limits<CodeType>::max()};
45 
46 } // namespace globals
47 
53 void compress(std::istream &is, std::ostream &os)
54 {
55  std::map<std::pair<CodeType, char>, CodeType> dictionary;
56 
57  // "named" lambda function, used to reset the dictionary to its initial contents
58  const auto reset_dictionary = [&dictionary] {
59  dictionary.clear();
60 
61  const long int minc = std::numeric_limits<char>::min();
62  const long int maxc = std::numeric_limits<char>::max();
63 
64  for (long int c = minc; c <= maxc; ++c)
65  {
66  // to prevent Undefined Behavior, resulting from reading and modifying
67  // the dictionary object at the same time
68  const CodeType dictionary_size = dictionary.size();
69 
70  dictionary[{globals::dms, static_cast<char> (c)}] = dictionary_size;
71  }
72  };
73 
74  reset_dictionary();
75 
76  CodeType i {globals::dms}; // Index
77  char c;
78 
79  while (is.get(c))
80  {
81  // dictionary's maximum size was reached
82  if (dictionary.size() == globals::dms)
83  reset_dictionary();
84 
85  if (dictionary.count({i, c}) == 0)
86  {
87  // to prevent Undefined Behavior, resulting from reading and modifying
88  // the dictionary object at the same time
89  const CodeType dictionary_size = dictionary.size();
90 
91  dictionary[{i, c}] = dictionary_size;
92  os.write(reinterpret_cast<const char *> (&i), sizeof (CodeType));
93  i = dictionary.at({globals::dms, c});
94  }
95  else
96  i = dictionary.at({i, c});
97  }
98 
99  if (i != globals::dms)
100  os.write(reinterpret_cast<const char *> (&i), sizeof (CodeType));
101 }
102 
108 void decompress(std::istream &is, std::ostream &os)
109 {
110  std::vector<std::pair<CodeType, char>> dictionary;
111 
112  // "named" lambda function, used to reset the dictionary to its initial contents
113  const auto reset_dictionary = [&dictionary] {
114  dictionary.clear();
115  dictionary.reserve(globals::dms);
116 
117  const long int minc = std::numeric_limits<char>::min();
118  const long int maxc = std::numeric_limits<char>::max();
119 
120  for (long int c = minc; c <= maxc; ++c)
121  dictionary.push_back({globals::dms, static_cast<char> (c)});
122  };
123 
124  const auto rebuild_string = [&dictionary](CodeType k) -> std::vector<char> {
125  std::vector<char> s; // String
126 
127  while (k != globals::dms)
128  {
129  s.push_back(dictionary.at(k).second);
130  k = dictionary.at(k).first;
131  }
132 
133  std::reverse(s.begin(), s.end());
134  return s;
135  };
136 
137  reset_dictionary();
138 
139  CodeType i {globals::dms}; // Index
140  CodeType k; // Key
141 
142  while (is.read(reinterpret_cast<char *> (&k), sizeof (CodeType)))
143  {
144  // dictionary's maximum size was reached
145  if (dictionary.size() == globals::dms)
146  reset_dictionary();
147 
148  if (k > dictionary.size())
149  throw std::runtime_error("invalid compressed code");
150 
151  std::vector<char> s; // String
152 
153  if (k == dictionary.size())
154  {
155  dictionary.push_back({i, rebuild_string(i).front()});
156  s = rebuild_string(k);
157  }
158  else
159  {
160  s = rebuild_string(k);
161 
162  if (i != globals::dms)
163  dictionary.push_back({i, s.front()});
164  }
165 
166  os.write(&s.front(), s.size());
167  i = k;
168  }
169 
170  if (!is.eof() || is.gcount() != 0)
171  throw std::runtime_error("corrupted compressed file");
172 }
173 
179 void print_usage(const std::string &s = "", bool su = true)
180 {
181  if (!s.empty())
182  std::cerr << "\nERROR: " << s << '\n';
183 
184  if (su)
185  {
186  std::cerr << "\nUsage:\n";
187  std::cerr << "\tprogram -flag input_file output_file\n\n";
188  std::cerr << "Where `flag' is either `c' for compressing, or `d' for decompressing, and\n";
189  std::cerr << "`input_file' and `output_file' are distinct files.\n\n";
190  std::cerr << "Examples:\n";
191  std::cerr << "\tlzw_v3.exe -c license.txt license.lzw\n";
192  std::cerr << "\tlzw_v3.exe -d license.lzw new_license.txt\n";
193  }
194 
195  std::cerr << std::endl;
196 }
197 
205 int main(int argc, char *argv[])
206 {
207  if (argc != 4)
208  {
209  print_usage("Wrong number of arguments.");
210  return EXIT_FAILURE;
211  }
212 
213  enum class Mode {
214  Compress,
215  Decompress
216  };
217 
218  Mode m;
219 
220  if (std::string(argv[1]) == "-c")
221  m = Mode::Compress;
222  else
223  if (std::string(argv[1]) == "-d")
224  m = Mode::Decompress;
225  else
226  {
227  print_usage(std::string("flag `") + argv[1] + "' is not recognized.");
228  return EXIT_FAILURE;
229  }
230 
231  const std::size_t buffer_size {1024 * 1024};
232 
233  // these custom buffers should be larger than the default ones
234  const std::unique_ptr<char[]> input_buffer(new char[buffer_size]);
235  const std::unique_ptr<char[]> output_buffer(new char[buffer_size]);
236 
237  std::ifstream input_file;
238  std::ofstream output_file;
239 
240  input_file.rdbuf()->pubsetbuf(input_buffer.get(), buffer_size);
241  input_file.open(argv[2], std::ios_base::binary);
242 
243  if (!input_file.is_open())
244  {
245  print_usage(std::string("input_file `") + argv[2] + "' could not be opened.");
246  return EXIT_FAILURE;
247  }
248 
249  output_file.rdbuf()->pubsetbuf(output_buffer.get(), buffer_size);
250  output_file.open(argv[3], std::ios_base::binary);
251 
252  if (!output_file.is_open())
253  {
254  print_usage(std::string("output_file `") + argv[3] + "' could not be opened.");
255  return EXIT_FAILURE;
256  }
257 
258  try
259  {
260  input_file.exceptions(std::ios_base::badbit);
261  output_file.exceptions(std::ios_base::badbit | std::ios_base::failbit);
262 
263  if (m == Mode::Compress)
264  compress(input_file, output_file);
265  else
266  if (m == Mode::Decompress)
267  decompress(input_file, output_file);
268  }
269  catch (const std::ios_base::failure &f)
270  {
271  print_usage(std::string("File input/output failure: ") + f.what() + '.', false);
272  return EXIT_FAILURE;
273  }
274  catch (const std::exception &e)
275  {
276  print_usage(std::string("Caught exception: ") + e.what() + '.', false);
277  return EXIT_FAILURE;
278  }
279 
280  return EXIT_SUCCESS;
281 }