Hi.
I have an assignment to grab a text file, read all the words from it, assort them in an array, remove duplicates, and to count how many times the word appear in the text.
I finished all of it, except one issue that I can't figure out.
The issue is that if I have a word like 'candidate-driven', I'm getting on the console 'candidatedriven', or high-calibre gives me 'highcalibre'.
If someone here can just guide me, or to give me an idea how to fix it, that would be great.
This is the text that I have in the txt file (I didn't find a way to post the file here):
txt file
Organisations across all industries will face a major shortage of IT professionals in 2011 and be forced to offer higher salaries to secure top talent, according to a Global Salary Survey recently compiled by international recruitment consultancy Robert Walters.
The annual Salary Survey reports that demand for IT professionals across all disciplines is expected to increase significantly in 2011, due to the ongoing roll out of major projects and technology upgrades.
Says Robert Walters’ Managing Director – Australia, James Nicholson, “Across Australia we expect to see fierce competition for top talent and a very candidate-driven market, which will lead to increased salaries and candidates having multiple options and/or counter offers.
“As a result, some companies will need to look overseas to source high-calibre IT professionals.”
here's my code:
main.cpp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
|
#include<iostream>
#include<iomanip>
#include<fstream>
#include"words.h"
int main()
{
WordCounts para;
para.readFile();
para.populate();
para.sort();
para.removeDuplicates();
para.printList();
system("pause");
return 0;
}
|
definitions.cpp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
|
#pragma once
#include <iostream>
#include <string>
#include <fstream>
#include <iomanip>
#include "words.h"
using namespace std;
WordCounts::WordCounts()
{
//initializing values;
string arrWordsTemp [1000] = {""};
string arrWords [1000] = {""};
counter = 0;
temp = "";
}
void WordCounts::readFile()
{
iFile.open ("C:\\Users\\Zur\\Downloads\\shortage (1).txt");
string word;
if (iFile.is_open())
{
while (!iFile.eof())
{
iFile >> word;
arrWordsTemp [counter] += word;
counter++;
}
}
}
void WordCounts::populate()
{
for (int x = 0; x < counter; x++)
{
for (int y = 0; y < arrWordsTemp[x].length(); y++)
{
if ( arrWordsTemp[x][y] >= 65 && arrWordsTemp[x][y] <= 90)
{
arrWordsTemp[x][y] = tolower(arrWordsTemp[x][y]);
}
if (arrWordsTemp[x][y] >= 97 && arrWordsTemp[x][y] <= 122)
{
list[x].word += arrWordsTemp[x][y]; //add letters (no punctuation) to new array using outer loop as counter for array subscript.
}
if (arrWordsTemp[x][y] >=48 && arrWordsTemp[x][y] <= 57)
{
list[x].word += arrWordsTemp[x][y];
}
}
}
//remove empty array subscripts
for (int a = 0; a < counter; a++)
{
for (int b = 0; b < counter; b++)
{
if (list[b].word == "")
{
temp = list[b].word;
list[b].word = list[b + 1].word;
temp = "";
}
}
}
}
void WordCounts::sort() //sort words in non-decreasing order
{
for (int x = 0; x < counter - 1; x++)
{
for (int y = 0; y < counter - 1; y++)
{
if (list[y].word > list[y + 1].word)
{
temp = list[y].word;
list[y].word = list[y + 1].word;
list[y + 1].word = temp;
}
}
}
}
void WordCounts::removeDuplicates()
{
for(int i=0;i<counter;i++)
{
list[i].count=1;
if (list[i].word == "")
{
counter = counter - 1;
}
}
for(int i=1; i<counter;)
{
if(list[i-1].word == list[i].word)
{
list[i-1].count++;
for(int j=i; j<counter-1;j++)
{
list[j].word=list[j+1].word;
}
counter=counter-1;
}else
{
i++;
}
}
}
void WordCounts::printList()
{
cout << fixed << setw(15) << "WORD" << fixed << setw(27) << "REPEATED" << endl << endl;
for (int x = 0; x < counter - 1; x++)
{
cout << fixed << setw(15) << list[x].word << fixed << setw(20) << list[x].count << " times" << endl;
}
}
|
Header file (words.h)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
|
#pragma once
#include <iostream>
#include <string>
#include <fstream>
#include <iomanip>
using namespace std;
class WordCounts
{
public:
//constructor
WordCounts();
void readFile(); //reads the file.
void printList(); //displaying the output.
void populate(); //populating the words into an array.
void sort(); //sorting words in non-decreasing order.
void removeDuplicates(); //removing duplicates and counts for repeating word.
private:
struct wordRec //Structure that contains a string for words, and a counter for repeating duplicates.
{
string word; //string value that stores the words
int count; //indicates how many times repeated
};
wordRec list[2000]; //An array that holds the words and counts.
string arrWordsTemp[1000];
string temp; //used to swap
int counter; //number of the words
ifstream iFile; //used to get file
};
|
WORD REPEATED
2011 2 times
a 4 times
according 1 times
across 3 times
all 2 times
and 4 times
andor 1 times
annual 1 times
as 1 times
australia 3 times
be 1 times
by 1 times
candidatedriven 1 times
candidates 1 times
companies 1 times
competition 1 times
compiled 1 times
consultancy 1 times
counter 1 times
demand 1 times
director 1 times
disciplines 1 times
due 1 times
expect 1 times
expected 1 times
face 1 times
fierce 1 times
for 2 times
forced 1 times
global 1 times
having 1 times
highcalibre 1 times
Press any key to continue . . . |
I had to cut the output.. but you can see what I mean..
Thanks for your help.