Words array problem

Hi.

I have an assignment to grab a text file, read all the words from it, assort them in an array, remove duplicates, and to count how many times the word appear in the text.
I finished all of it, except one issue that I can't figure out.
The issue is that if I have a word like 'candidate-driven', I'm getting on the console 'candidatedriven', or high-calibre gives me 'highcalibre'.

If someone here can just guide me, or to give me an idea how to fix it, that would be great.

This is the text that I have in the txt file (I didn't find a way to post the file here):


txt file
Organisations across all industries will face a major shortage of IT professionals in 2011 and be forced to offer higher salaries to secure top talent, according to a Global Salary Survey recently compiled by international recruitment consultancy Robert Walters.

The annual Salary Survey reports that demand for IT professionals across all disciplines is expected to increase significantly in 2011, due to the ongoing roll out of major projects and technology upgrades.

Says Robert Walters’ Managing Director – Australia, James Nicholson, “Across Australia we expect to see fierce competition for top talent and a very candidate-driven market, which will lead to increased salaries and candidates having multiple options and/or counter offers.

“As a result, some companies will need to look overseas to source high-calibre IT professionals.”



here's my code:

main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#include<iostream>
#include<iomanip>
#include<fstream>
#include"words.h"
int main()
{
	WordCounts para;
	para.readFile();
	para.populate();
	para.sort();
	para.removeDuplicates();
	para.printList();

	system("pause");
	return 0;
}


definitions.cpp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#pragma once
#include <iostream>
#include <string>
#include <fstream>
#include <iomanip>
#include "words.h"

using namespace std;

WordCounts::WordCounts()
{
	//initializing values;
	string arrWordsTemp [1000] = {""};
	string arrWords [1000] = {""};
	counter = 0;
	temp = "";
}
void WordCounts::readFile()
{
	iFile.open ("C:\\Users\\Zur\\Downloads\\shortage (1).txt");

	string word;
	if (iFile.is_open())
	{
		while (!iFile.eof())
		{
			iFile >> word;
			arrWordsTemp [counter] += word;
			counter++;
		}
	}
}

void WordCounts::populate()
{
	for (int x = 0; x < counter; x++)
	{
		for (int y = 0; y < arrWordsTemp[x].length(); y++)
		{
			if ( arrWordsTemp[x][y] >= 65 && arrWordsTemp[x][y] <= 90)
		{
			arrWordsTemp[x][y] = tolower(arrWordsTemp[x][y]);
		}

			if (arrWordsTemp[x][y] >= 97 && arrWordsTemp[x][y] <= 122)
			{
				list[x].word += arrWordsTemp[x][y];  //add letters (no punctuation) to new array using outer loop as counter for array subscript.
			}
			if (arrWordsTemp[x][y] >=48 && arrWordsTemp[x][y] <= 57)
			{
				list[x].word += arrWordsTemp[x][y];
			}
			
		}
	}

	//remove empty array subscripts
	for (int a = 0; a < counter; a++)
	{
		for (int b = 0; b < counter; b++)
		{
			if (list[b].word == "")
			{
				temp = list[b].word;
				list[b].word = list[b + 1].word;
				temp = "";
			}
		}
	}

}

void WordCounts::sort() //sort words in non-decreasing order
{
		for (int x = 0; x < counter - 1; x++)
	{
		for (int y = 0; y < counter - 1; y++)
		{
			if (list[y].word > list[y + 1].word)
			{
				temp = list[y].word;
				list[y].word = list[y + 1].word;
				list[y + 1].word = temp;
			}
		}
	}
}

void WordCounts::removeDuplicates()
{
		for(int i=0;i<counter;i++)
	{
		list[i].count=1;
	
		if (list[i].word == "")
		{
			counter = counter - 1;
		}
		}
		for(int i=1; i<counter;)
		{
			if(list[i-1].word == list[i].word)
			{
				list[i-1].count++;
				for(int j=i; j<counter-1;j++)
				{
					list[j].word=list[j+1].word;
				}

				counter=counter-1;
			}else
			{
			i++;
			}
		} 
		
}

void WordCounts::printList()
{
	cout << fixed << setw(15) << "WORD" << fixed << setw(27) << "REPEATED" << endl << endl;
	for (int x = 0; x < counter - 1; x++)
	{
		cout << fixed << setw(15) << list[x].word << fixed << setw(20) << list[x].count << " times" << endl;
	}
}


Header file (words.h)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#pragma once
#include <iostream>
#include <string>
#include <fstream>
#include <iomanip>

using namespace std;

class WordCounts
{
public:
	//constructor
	WordCounts();
	
	void readFile();				//reads the file.
	void printList();				//displaying the output.

	void populate();				//populating the words into an array.
	void sort();					//sorting words in non-decreasing order.
	void removeDuplicates();			//removing duplicates and counts for repeating word.

private:
	struct wordRec					//Structure that contains a string for words, and a counter for repeating duplicates.
	{
		string word;				//string value that stores the words
		int count;					//indicates how many times repeated 
	};
	
	wordRec list[2000];	//An array that holds the words and counts.
	string arrWordsTemp[1000];
	string temp;					//used to swap
	
	int counter;					//number of the words
	ifstream iFile;					//used to get file

};


           WORD                   REPEATED

           2011                   2 times
              a                   4 times
      according                   1 times
         across                   3 times
            all                   2 times
            and                   4 times
          andor                   1 times
         annual                   1 times
             as                   1 times
      australia                   3 times
             be                   1 times
             by                   1 times
candidatedriven                   1 times
     candidates                   1 times
      companies                   1 times
    competition                   1 times
       compiled                   1 times
    consultancy                   1 times
        counter                   1 times
         demand                   1 times
       director                   1 times
    disciplines                   1 times
            due                   1 times
         expect                   1 times
       expected                   1 times
           face                   1 times
         fierce                   1 times
            for                   2 times
         forced                   1 times
         global                   1 times
         having                   1 times
    highcalibre                   1 times
Press any key to continue . . .



I had to cut the output.. but you can see what I mean..

Thanks for your help.
Well your populate() function in the class is ignoring all punctuation including "-"
I had to write a program like this recently, not using txt but similar. :\. I don't really know what I would recommend doing though X_O.

Topic archived. No new replies allowed.