1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
|
#include <locale>
#include <iostream>
#include <algorithm>
#include <iterator>
#include <fstream>
#include <sstream>
#include<string>
#include <vector>
#include<set>
#include<iomanip>
using namespace std;
//From cppreference.com (mostly): Class ctype encapsulates character classification features. All stream input operations performed
//through std::basic_istream<charT> use the std::ctype<charT> of the locale imbued in the stream to identify whitespace characters
//for input tokenization. A locale, in turn, includes a ctype facet that classifies character types. Such a facet, incorporating
//further characters, could be as follows:
class my_ctype : public ctype<char>{
private:
mask my_table[table_size]; //unspecified bitmask type;
public:
my_ctype(size_t refs = 0) : std::ctype<char>(&my_table[0], false, refs){
copy_n(classic_table(), table_size, my_table);
my_table['-'] = (mask)space; //casts the delimiters to space;
my_table['\''] = (mask)space;
my_table['('] = (mask)space;
my_table[')'] = (mask)space;
my_table['!'] = (mask)space;
my_table[','] = (mask)space;
my_table['/'] = (mask)space;
my_table['.'] = (mask)space;
my_table['"'] = (mask)space;
my_table['&'] = (mask)space;//sample array; can be expanded/modified depending on type of delimiters being handled;
}
};
vector<string>& case_vector(vector<string>& words)//so that words like "Go" and 'go' appear just once as 'go' in the final result;
{
for (auto& word : words)
{
if (!word.empty()&&isupper(word[0]))//check if word[0] is uppercase, if not continue to next word;
{
auto lowercase_initial_word = tolower(word[0], locale()) + word.substr(1);
//if word[0] uppercase,convert word to lowercase;
if (find(words.begin(), words.end(), lowercase_initial_word) != words.end())//and check if there is any match
{
word = lowercase_initial_word;
}
}
}
return words;
}
class comp_char//sets words starting with uppercase to appear next to lowercase words of same letter;
{
private:
std::vector<int> collation_table;
public:
comp_char() : collation_table(std::numeric_limits<unsigned char>::max())
{
std::iota(collation_table.begin(), collation_table.end(), 0);
for (int i = 0; i < 26; i++)
{
collation_table['a' + i] = i * 2;
collation_table['A' + i] = i * 2 + 1;
}
}
bool operator()(unsigned char a, unsigned char b)
{
return collation_table[a] < collation_table[b];
}
};
struct cmp_str//the functor to be used in sort() below to place lowercase and uppercase words of same letter adjacent;
{
bool operator()(std::string const &a, std::string const &b)
{
comp_char cmp;
size_t i = 0;
while (a[i] == b[i] && i < a.size())
++i;
return cmp(a[i], b[i]);
}
};
int main()
{
fstream File;
vector<string>v;
File.open("F:\\test.txt");
if(File.is_open())
{
while(!File.eof())
{
string line;
getline(File, line);
stringstream stream(line);
locale x(locale::classic(), new my_ctype);
//locale ctor using the classic() and my_ctype facet; locale destructor deletes the raw pointer;
stream.imbue(x);//imbue sets the locale of the stream object;
copy(istream_iterator<string>(stream),istream_iterator<string>(),back_inserter(v));
//copies all elements in the range into the vector<string>;
//derived, stringstream class, uses istream iterator;
// std::ostream_iterator<std::string>(std::cout, "\n")//in case you want to print to screen;
}
}
case_vector(v);
set<string> s;
for(auto& itr: v)
{
s.insert(itr);//creating the set with the vector elements;
//faster to remove duplicates by passing through set first instead of algo() unique on vector if #duplicates large;
}
v.assign(s.begin(), s.end());//reading the duplicate removed strings back to the original vector;
sort(v.begin(), v.end(), cmp_str());//sorting the vector so that upper and lower case words of same letter are adjacent;
for(auto& itr: v)
{
cout<<left<<setw(20)<<itr;
}
}
|