Word counting program using sets

Hi,
I'm new here. I need help with a c++ program.

The goal of the program is to produce a count of the number of times each word occurs in Macbeth, except that a group of common words (and, to, be, etc.) are not to be included in the count. the program should read the words to be excluded from a file and store them in a set. When counting it should check whether the words in the set appear in macbeth and ignore it if so.

===================================================================

Code:


#include <iostream>
#include <map>
#include <fstream>
#include <string>
#include <set>

using namespace std;

typedef map<string, int> word_count_list;

string StringToLower(string strToConvert) {//change each element of the string to lower case
for (unsigned int i = 0; i < strToConvert.length(); i++) {
strToConvert[i] = tolower(strToConvert[i]);
}
return strToConvert;//return the converted string
}

string TrimHeadPunc(string word) {
int index;
//remove any punctuations at head of the word
while ((index = word.find_first_of(".,!?\\\"\':[]&();-")) == 0) {
word.erase(0, 1);
}
return word;
}

string TrimTailPunc(string word) {
int index;
//remove any punctuations at tail of the word
while ((index = word.find_last_of(".,!?\\\"\':[]&();-")) == (word.length()
- 1)) {
word.erase(word.length() - 1, 1);
}
return word;
}

bool IsCapitalWord(string word) {
bool bCapital = true;
for (unsigned int i = 0; i < word.length(); i++) {
if (islower(word[i])) {
bCapital = false;
break;
}
}
return bCapital;
}

int main() {

word_count_list word_count;
string filename = "MacbethPlay.txt";
string output = "result.txt";
ifstream infile(filename.c_str());
ofstream outfile(output.c_str());

string word;

if (!infile.is_open()) {
cout << "Error opening file " + filename << endl;
return 0;
}

while (infile >> word) {
// Remove punctuations
int index;
// hyphen
while ((index = word.find_first_of("-.,!?\\\"\':[]&();"))
!= string::npos) {
word.erase(index, 1);
}

if (IsCapitalWord(word))
continue;

word = StringToLower(word);
++word_count[word];
cout << word << " " << word_count[word] << endl;

}

// how many different words in Macbeth?
int nWords = 0;
// Print out the word counts.
word_count_list::const_iterator current(word_count.begin());

while (current != word_count.end()) {
// cout << "The word '" << current->first << "' appears " << current->second <<" times." << endl;

++current;
++nWords;
}

cout << endl << endl << endl << "word count finished" << endl;
cout << "There are (approximately) " << nWords << "different words in Macbeth \n";

cout << "Word count finished." << endl;
return 0;


}

====================================================================

where i am stuck:

i can get the words to be excluded from the file and insert them into a set:

ifstream myfile;
myfile.open ("WordsToBeExcluded.txt");
string words;

while(myfile>>words)
{
set<string> excludedWords;
set<string>::iterator it;
excludedWords.insert(words)
}

myfile.close();

the problem is that i can not figure out how to make the program look for the excluded words in the set and skip them during the count. i was thinking of using the .find() and .erase commands for sets with a loop, but don't know to use it for the multiple words in the set.

Any help would be appreciated, thanks in advance.
When you read a word, check if it's in the set using set::count http://www.cplusplus.com/reference/stl/set/count/
before increasing its counter
Please use [code][/code] tags
Thanks!
Topic archived. No new replies allowed.