Indian Computing Olympiad problem implementation

This is the link to the problem : http://opc.iarcs.org.in/index.php/problems/WORDLIST

This is what I coded, but unfortunately I'm getting a runtime error even though it seems logically and syntactically correct.

#include<bits/stdc++.h>
using namespace std;
#define c temp[j]

int main()
{
ios::sync_with_stdio(false);
vector<string> words;
string temp,uni;
int N,i,j,start,no=0;

//Input and Computation
cin >> N;
for(i = 0;i < N;i++)
{
getline(cin,temp);//Getting the line of text
start = 0;
for(j = 0;j < (int)temp.length();j++)//Extracting words and storing in vector
{
if(c >= 65 && c <= 90)
c = c + 32;

if(c == ' '||c == '.'||c == ';'||c == ','||c == ':')
{
uni = temp.substr(start,j-start-1);
words.push_back(uni);
no++;
start = j;
}
}
temp = "";
}
sort(words.begin(),words.end());

//Output
cout << no;
for(i = 0;i < no-1;i++)//Removing duplicates
{
if(words[i] != words[i+1])
cout << words[i] <<'\n';
}
cout << words[no-1];

return 0;
}
There are several things that needs to be done to clean up given raw text to get to a dictionary of some sorts:

1. remove delimiters like period, comma, semi-colon, ampersand, exclamation etc that appear in the text - this is done through the my_ctype class in the code below, you can edit the list of delimiters as you wish

2. then make sure that words like "Go" and 'go' appear only as 'go' in the dictionary but words like "Nike" that appear with uppercase initial throughout are left un-changed - this is done through the function case_vector() in below code

3. then remove all duplicate words - this is done by passing the initial vector<string> through a set<string>

4. finally we want to make sure that words like "four" and "Fed" appear under "F"; unless something is done all words beginning upper-case appear before all words beginning lower-case. This is done through the comp_char class and the cmp_str functor using comp_char objects. This functor is passed to std::sort() that operates on a vector assigned with the elements of the set constructed in 3 above

You may also wish to split up the code into header and implementation files. Please read, research and if anything is still unclear do come back here.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#include <locale>
#include <iostream>
#include <algorithm>
#include <iterator>
#include <fstream>
#include <sstream>
#include<string>
#include <vector>
#include<set>
#include<iomanip>

using namespace std;
//From cppreference.com (mostly): Class ctype encapsulates character classification features. All stream input operations performed
//through std::basic_istream<charT> use the std::ctype<charT> of the locale imbued in the stream to identify whitespace characters
//for input tokenization. A locale, in turn, includes a ctype facet that classifies character types. Such a facet, incorporating
//further characters, could be as follows:
class my_ctype : public ctype<char>{
    private:
        mask my_table[table_size];  //unspecified bitmask type;
    public:
        my_ctype(size_t refs = 0) : std::ctype<char>(&my_table[0], false, refs){
        copy_n(classic_table(), table_size, my_table);
        my_table['-'] = (mask)space; //casts the delimiters to space;
        my_table['\''] = (mask)space;
        my_table['('] = (mask)space;
        my_table[')'] = (mask)space;
        my_table['!'] = (mask)space;
        my_table[','] = (mask)space;
        my_table['/'] = (mask)space;
        my_table['.'] = (mask)space;
        my_table['"'] = (mask)space;
        my_table['&'] = (mask)space;//sample array; can be expanded/modified depending on type of delimiters being handled;
    }
};
vector<string>& case_vector(vector<string>& words)//so that words like "Go" and 'go' appear just once as 'go' in the final result;
{
    for (auto& word : words)
    {
        if (!word.empty()&&isupper(word[0]))//check if word[0] is uppercase, if not continue to next word;
        {
            auto lowercase_initial_word = tolower(word[0], locale()) + word.substr(1);
            //if word[0] uppercase,convert word to lowercase;
            if (find(words.begin(), words.end(), lowercase_initial_word) != words.end())//and check if there is any match
            {
                word = lowercase_initial_word;
            }
        }
    }
    return words;
}
class comp_char//sets words starting with uppercase to appear next to lowercase words of same letter;
{
    private:
        std::vector<int> collation_table;
    public:
        comp_char() : collation_table(std::numeric_limits<unsigned char>::max())
        {
            std::iota(collation_table.begin(), collation_table.end(), 0);

            for (int i = 0; i < 26; i++)
            {
                collation_table['a' + i] = i * 2;
                collation_table['A' + i] = i * 2 + 1;
            }
        }

    bool operator()(unsigned char a, unsigned char b)
    {
        return collation_table[a] < collation_table[b];
    }
};
struct cmp_str//the functor to be used in sort() below to place lowercase and uppercase words of same letter adjacent;
{
    bool operator()(std::string const &a, std::string const &b)
    {
        comp_char cmp;
        size_t i = 0;
        while (a[i] == b[i] && i < a.size())
            ++i;
        return cmp(a[i], b[i]);
    }
};

int main()
{
    fstream File;
    vector<string>v;
    File.open("F:\\test.txt");
    if(File.is_open())
    {
        while(!File.eof())
        {
            string line;
            getline(File, line);
            stringstream stream(line);
            locale x(locale::classic(), new my_ctype);
            //locale ctor using the classic() and my_ctype facet; locale destructor deletes the raw pointer;
            stream.imbue(x);//imbue sets the locale of the stream object;
            copy(istream_iterator<string>(stream),istream_iterator<string>(),back_inserter(v));
            //copies all elements in the range into the vector<string>;
            //derived, stringstream class, uses istream iterator;
            // std::ostream_iterator<std::string>(std::cout, "\n")//in case you want to print to screen;
        }
    }
    case_vector(v);
    set<string> s;
    for(auto& itr: v)
    {
        s.insert(itr);//creating the set with the vector elements;
        //faster to remove duplicates by passing through set first instead of algo() unique on vector if #duplicates large;
    }
    v.assign(s.begin(), s.end());//reading the duplicate removed strings back to the original vector;
    sort(v.begin(), v.end(), cmp_str());//sorting the vector so that upper and lower case words of same letter are adjacent;

    for(auto& itr: v)
    {
        cout<<left<<setw(20)<<itr;
    }
}

Sample Text
"Gold is being driven by Fed inaction -- the same inaction that last year pointed to three to four rate hikes during 2016, yet has not produced a single one," said Gavin Wendt, founding director & senior resource analyst at MineLife Pty. "This draws into question the underlying robustness of the U.S. economy."

Output: http://pastie.org/10954905



Topic archived. No new replies allowed.