Count the frequency of words in array and delete duplicates

I want to user to input a string and then I will make substring array of words in that string. Then I would count the frequency of each word and delete duplicates in the array.

F[4] is used to store the frequency of each unique word.

e.g. input: I love apple apple

output:
apple 2
i 1
love 1

But on lines 56-70, my function contains some problem that could not output the desired value.

Please help!

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
  #include<iostream>
#include<fstream>
#include<cmath>
#include<string>
#include<cstring>
using namespace std;

int substring(string str,string li[],int f[]);
void calntf(int no,int f[],double tf[]);

int main(){
	int F[4]={0};
	double TF[4],IDF[4],TFIDF[4];
	int no(0);
	string input,list[4];
	
	cout<<"Please input the query string: ";
	getline(cin,input);
	no=substring(input,list,F);
	calntf(no,F,TF);
	for(int i=0;i<no;i++)cout<<TF[i]<<"\t";





	return 0;
}

int substring(string str,string li[],int f[]){
	//convert to lower case
	for (int i=0;i<str.length();i++)
		str[i]=tolower(str[i]);


	//divide the sentense into substring
	int i(0); // numebr of substring
	int strpos(0);// start position
	int endpos= str.find(' '); //the position of ' '
	for(i;endpos>0;i++){
	li[i]=str.substr(strpos,endpos-strpos);
	strpos=endpos+1;
	endpos=str.find(' ',strpos);
	if(endpos<0){i++;li[i]=str.substr(strpos);}
	}


	//sort alphabetically
	for(int y=0;y<i-1;y++){
		for(int j=y+1;j<i;j++){
			if (li[y]>li[j]) swap(li[y],li[j]);
		}
	}

	//count frequency and erase the duplication
	//initialize the frequency of each word
	for(int y=0;y<i;y++) f[y]=1;
	
		for(int y=0;y<i-1;y++){
			for(int j=y+1;j<i;j++){
			if(li[y].compare(li[j])==0) {
				f[y]++;
					for(int x=j;x<i-1;x++){
					li[x]=li[x+1]; 
					f[x]=f[x+1];
					}	
				li[i-1]="\0"; f[i-1]=NULL;	
			}	
		}
	}


	int m=i;
		for(int y=1; y<i && y<=m; y++) 
	{
		while(li[y-1]==li[y])
		{
			f[y-1]++;
			for(int j=y; j<i; j++)
			{
				li[j-1]=li[j];
				if(j==i-1)
				{
					li[j]="\0";
					f[j]=NULL;
				}
			}
			m--;
		}
	}
		

		for(int x=0;x<i;x++){
			//if(li[x]!="\0"){
			cout<<li[x]<<"\t"<<f[x];
			//}
			cout<<endl;
		}

	return i;

}

void calntf(int no,int f[],double tf[]){
	for(int i=0;i<no;i++){
		if(f[i]!=NULL)
		tf[i]=double(f[i])/no;
}
}


	




apple 2
i 1
love 1
0

0.5 0.25 0.25 -9.25596e+061

When you look at the output you can see that it works ok. It counts the words correctly and also calcs the frequency correctly. The problem is the output. Though you have only 3 distinct words it tries to output 4 and of course the fourth is rubbish.
But if I try to input something like eat eat apple apple
the output is
apple 2
eat 2
1
0

And I intend to delete all the duplicates so I don't want to have that 1 stored.
How can I do to improve that?
You need a variable to store the number of unique words after you have deleted the duplicates.
Topic archived. No new replies allowed.