Word Frequency C++ Issue

Hi I need help!! My program is supposed to count word occurences from a text file and the output is supposed to look like this

a 2
count 1
hello 1
test 2
this 1
words 1

but instead it does this :

a 2
a 2
count 1
hello 1
test 2
test 2
this 1
words 1


Is there a way to take the second "a" out of the output file without messing up the count of the other words?
I placed all of the words into 2 separate arrays and then compared them. Here is my code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
/* word occurences

*/

#include <iostream>
#include <stdlib.h>
#include <string>
#include <iomanip>
#include <fstream>



using namespace std;
struct wordCount
{		
     string word;		
     int count;	
}; 
class wordFrequency
{
    public:
        //wordFrequency();
        int countFile(int counter);
        void openFile(ifstream &inFile, ofstream &outFile);
        void periods(string arr[], int n);
        void bubbleSort(string arr[], int n);    
        
    private:
        string wordsM[];
        //struct wordFreq[];
};

int main()
{
    wordFrequency word;
    //Declare variables
    int length;
    int counter;
    
    
    length = word.countFile(counter);
    string wordsM[length];
    struct wordCount wordFreq[length]; //make array of structs
    
    //Declare stream variables
    ifstream inFile;
    ofstream outFile;
    
    
    word.openFile(inFile, outFile);
    
    
    
    while (!inFile.eof()) //while not at end of file
    {
          
          for(int i=0; i < length; i++) 
          {
                    inFile >> wordsM[i];  //read words from file into array
                    word.periods(wordsM, length);
          
          
          wordFreq[i].word = wordsM[i]; //place words from file into 2nd array
          wordFreq[i].count = 0;
          }
          

    }
    word.bubbleSort(wordsM , length); //function to alphabetize words
    for (int i = 0; i < length;i++)
    {
        for (int j = 0; j < length; j++)
        {
            if (wordsM[i] == wordFreq[j].word) 
            {
               //compare the words in the 2 arrays, if there is a match 
               //increment count for that word
               wordFreq[i].count++;
            }
        
        }
        
        outFile << setw(15) << wordsM[i] << setw(4) << wordFreq[i].count <<endl;
    }
    
    inFile.close();
    outFile.close();
    system ("Pause");
    return 0;
}
int wordFrequency::countFile(int counter)
{
    ifstream inFile;
    int counts = 0;
    string str;


    inFile.open("Words.txt");
    while (!inFile.eof())
    {
          inFile >> str;
          counts++;
    }
    
     return counts;
}
void wordFrequency::openFile(ifstream &inFile, ofstream &outFile)
{
    //open input file
    inFile.open("Words.txt");
    
    if (!inFile)
    {
                cout << "Cannot open input file. Program terminates!" << endl;
    }
    
    //open output file
    outFile.open("WordsOutput.txt"); 
    
    
    outFile << left << setw(15) << "WORD" << setw(4)
                    <<"OCCURRENCES\n\n"<< endl;
   
}
void wordFrequency::periods(string arr[], int n)
{
	int i;
	size_t found;
	for ( i = 0; i < n; i++)
	{
          // find periods and replace them with spaces so when we compare 
          //strings there will be no periods in the way
          found=arr[i].find('.');
          if (found!=string::npos)
          {
          arr[i].replace(arr[i].find('.'),1,"");
          }
           //find commas also
          found = arr[i].find(',');
          if (found!=string::npos)
          {
          arr[i].replace(arr[i].find(','),1,"");
          }
        
     }
} 
void wordFrequency::bubbleSort(string arr[], int n)
{
	int i, j;
	string temp;
	
	for ( i = 1; i < n; i++)
	{
        for (j = 0; j < n - i; j++)
        {
            if (arr[j] > arr[j+1])
            {
                       temp = arr[j];
                       arr[j] = arr[j+1];
                       arr[j+1] = temp;
            }
        }
     }
}     






              
My first guess is that you're saving the same word more than once. So "a" and "test" show up twice.
Topic archived. No new replies allowed.