Pass by reference

I want to read 3 txt file and then extract the words and do some calculation on the frequency etc.

But the 'find' function doesn't work properly.
You can find it on lines 177-185.
It doesn't store the value well.
It output some memory address thing, it's so awkward.
Please help to see what's wrong. Thanks so much!

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#include<iostream>
#include<fstream>
#include<cmath>
#include <math.h> 
#include<string>
#include<vector>
#include <algorithm>
using namespace std;

void clearpun(string &str);
void lower(string &str);
void sort(int i,vector<string> &li);
int  substring(string&str,vector<string> &li);
void delandcount(int i,int &w,int f[],vector<string> &li);
void caltf(int no,int &w,int f[],double tf[]);
void calidf(int w,int m[],double idf[]);
void find(vector<string> arr,vector<string> seek,int w,int m[]);
void caltfidf(int w,double tf[],double idf[],double tfidf[]);


int main(){
	int F[4]={0},F1[1000]={0},F2[1000]={0},F3[1000]={0}; //frequency of unique words in each document
	int m[4]={0},m1[1000]={1},m2[1000]={1},m3[1000]={1}; //number of documents that each contains the term i
	double TF[4],TF1[1000],TF2[1000],TF3[1000],IDF[4],IDF1[1000],IDF2[1000],IDF3[1000],TFIDF[4],TFIDF1[1000],TFIDF2[1000],TFIDF3[1000];
	int no[4]={0},word[4]={0},totalword(0);
	vector<string> qlist;vector<string> d1list; vector<string> d2list; vector<string> d3list;
	string input,list[4];
	string fname[3]={"D1.txt","D2.txt","D3.txt"};

	cout<<"Please input the query string: ";
	getline(cin,input);
	clearpun(input);
	lower(input);
	no[0]=substring(input,qlist);
	sort(no[0],qlist);
	delandcount(no[0],word[0],F,qlist);
	caltf(no[0],word[0],F,TF);
	

	ifstream fin;

	for(int i=0;i<3;i++){
	fin.open(fname[i]);

	if (!fin.is_open())               
    {
      cout << "Unable to open file "<< fname[i] << endl;
      exit(-10);
    }


	while(fin>>input){
		
		clearpun(input);
		lower(input);
		if(i==0)       d1list.push_back(input);
		else if(i==1)  d2list.push_back(input);
		else           d3list.push_back(input);
		no[i+1]++;

		if(fin.fail()) exit(-11);
	}
	if(i==0)        {sort(no[1],d1list); delandcount(no[1],word[1],F1,d1list); caltf(no[1],word[1],F1,TF1); }
	else if(i==1)   {sort(no[2],d2list); delandcount(no[2],word[2],F2,d2list); caltf(no[2],word[2],F2,TF2); }
	else            {sort(no[3],d3list); delandcount(no[3],word[3],F3,d3list); caltf(no[3],word[3],F3,TF3); }
	
	fin.close();
	}

	//find the number of documents that each contains the term
		find(d1list,qlist,word[0],m);
		find(d2list,qlist,word[0],m);
		find(d3list,qlist,word[0],m);
		find(d2list,d1list,word[1],m1);
		find(d3list,d1list,word[1],m1);
		find(d1list,d2list,word[2],m2);
		find(d3list,d2list,word[2],m2);
		find(d1list,d3list,word[3],m3);
		find(d2list,d3list,word[3],m3);

		for(int i=0;i<word[1];i++)cout<<m[i];
		//calculate idf
		/*calidf(word[0],m,IDF);
		calidf(word[1],m1,IDF1);
		calidf(word[2],m2,IDF2);
		calidf(word[3],m3,IDF3);

		//calculate tfidf
		caltfidf(word[0],TF,IDF,TFIDF);
		caltfidf(word[1],TF1,IDF1,TFIDF1);
		caltfidf(word[2],TF2,IDF2,TFIDF2);
		caltfidf(word[3],TF3,IDF3,TFIDF3);*/

	//for(int y=0;y<word[0];y++)
		//cout<<qlist[y]<<" "<<F1[y]<<TF1[y]<<endl;

	return 0;
	
}

void clearpun(string &str){
	//remove punctuation
	for(int i=0;i<str.length();i++){
		if(str[i]==','||str[i]=='.'||str[i]=='?'||str[i]==';'||str[i]==':'||str[i]=='-'||str[i]=='!')
			str[i]='\0';
	}
}

void lower(string &str){
	//convert to lower case
	for (int i=0;i<int(str.length());i++)
		str[i]=tolower(str[i]);}



int substring(string&str,vector<string> &li){
	//divide the input sentense into substring
	int i(0); // numebr of word
	int strpos(0);// start position
	int endpos= str.find(' '); //the position of ' '
	for(i;endpos>0;i++){
	li.push_back(str.substr(strpos,endpos-strpos));
	strpos=endpos+1;
	endpos=str.find(' ',strpos);
	if(endpos<0){i++;li.push_back(str.substr(strpos));}
	}
	return i;
}


void sort(int i,vector<string> &li){
	//sort alphabetically
	for(int y=0;y<i-1;y++){
		for(int j=y+1;j<i;j++)
			if (li[y]>li[j]) swap(li[y],li[j]);
		}
}
	
void delandcount(int i,int &w,int f[],vector<string> &li){
	//count frequency and erase the duplication
	//initialize the frequency of each word
	for(int y=0;y<i;y++) f[y]=1;
	w=i; // store the number of unique words
		for(int y=0;y<w-1;y++){
			for(int j=y+1;j<w;j++){
			if(li[y].compare(li[j])==0) {
				f[y]++;
					for(int x=j;x<i-1;x++){
					li[x]=li[x+1]; 
					f[x]=f[x+1];
					}	
				li[w-1]="\0"; f[w-1]=NULL;	w--; 
				j--; //since the order is rearranged need to compare once more, so the j value should remain unchanged
			}	
		}
	}
}



void caltf(int no,int &w,int f[],double tf[]){
	for(int i=0;i<w;i++){
		if(f[i]!=NULL)
		tf[i]=double(f[i])/no;
}
}

void calidf(int w,int m[],double idf[]){
	for(int i=0;i<w;i++){
		idf[i]=1+log(3/m[i])/log(2);
		//cout<<idf[i]<<endl;

	}

}

void find(vector<string> arr,vector<string> seek,int w,int m[]){
    for(int i=0;i<w;i++){
			if (find(arr.begin(), arr.end(), seek[i]) != arr.end() ){
				m[i]++;
			}
			cout<<m[i]<<endl;
	}
	cout<<endl;
}

void caltfidf(int w,double tf[],double idf[],double tfidf[]){
	for(int i=0;i<w;i++){
		tfidf[i]=tf[i]*idf[i];
	}

}

	






I would suggest that you first fix all your compiler warnings and errors (you should have several warnings). Here is what my compiler says about your code:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
main.cpp||In function ‘int main()’:|
main.cpp|24|warning: unused variable ‘IDF’ [-Wunused-variable]|
main.cpp|24|warning: unused variable ‘IDF1’ [-Wunused-variable]|
main.cpp|24|warning: unused variable ‘IDF2’ [-Wunused-variable]|
main.cpp|24|warning: unused variable ‘IDF3’ [-Wunused-variable]|
main.cpp|24|warning: unused variable ‘TFIDF’ [-Wunused-variable]|
main.cpp|24|warning: unused variable ‘TFIDF1’ [-Wunused-variable]|
main.cpp|24|warning: unused variable ‘TFIDF2’ [-Wunused-variable]|
main.cpp|24|warning: unused variable ‘TFIDF3’ [-Wunused-variable]|
main.cpp|25|warning: unused variable ‘totalword’ [-Wunused-variable]|
main.cpp||In function ‘void clearpun(std::string&)’:|
main.cpp|103|warning: comparison between signed and unsigned integer expressions [-Wsign-compare]|
main.cpp||In function ‘int substring(std::string&, std::vector<std::basic_string<char> >&)’:|
main.cpp|121|warning: statement has no effect [-Wunused-value]|
main.cpp||In function ‘void delandcount(int, int&, int*, std::vector<std::basic_string<char> >&)’:|
main.cpp|152|warning: converting to non-pointer type ‘int’ from NULL [-Wconversion-null]|
main.cpp||In function ‘void caltf(int, int&, int*, double*)’:|
main.cpp|163|error: NULL used in arithmetic [-Werror=pointer-arith]|
||=== Build failed: 1 error(s), 12 warning(s) (0 minute(s), 1 second(s)) ===|


Next please show an example of your input file, and any inputs you make to the program. Then show the output your program is generating.
Topic archived. No new replies allowed.