1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
|
#include<iostream>
#include<fstream>
#include<cmath>
#include<string>
#include<cstring>
using namespace std;
int substring(string str,string li[],int f[]);
void calntf(int no,int f[],double tf[]);
int main(){
int F[4]={0};
double TF[4],IDF[4],TFIDF[4];
int no(0);
string input,list[4];
cout<<"Please input the query string: ";
getline(cin,input);
no=substring(input,list,F);
calntf(no,F,TF);
for(int i=0;i<no;i++)cout<<TF[i]<<"\t";
return 0;
}
int substring(string str,string li[],int f[]){
//convert to lower case
for (int i=0;i<str.length();i++)
str[i]=tolower(str[i]);
//divide the sentense into substring
int i(0); // numebr of substring
int strpos(0);// start position
int endpos= str.find(' '); //the position of ' '
for(i;endpos>0;i++){
li[i]=str.substr(strpos,endpos-strpos);
strpos=endpos+1;
endpos=str.find(' ',strpos);
if(endpos<0){i++;li[i]=str.substr(strpos);}
}
//sort alphabetically
for(int y=0;y<i-1;y++){
for(int j=y+1;j<i;j++){
if (li[y]>li[j]) swap(li[y],li[j]);
}
}
//count frequency and erase the duplication
//initialize the frequency of each word
for(int y=0;y<i;y++) f[y]=1;
for(int y=0;y<i-1;y++){
for(int j=y+1;j<i;j++){
if(li[y].compare(li[j])==0) {
f[y]++;
for(int x=j;x<i-1;x++){
li[x]=li[x+1];
f[x]=f[x+1];
}
li[i-1]="\0"; f[i-1]=NULL;
}
}
}
int m=i;
for(int y=1; y<i && y<=m; y++)
{
while(li[y-1]==li[y])
{
f[y-1]++;
for(int j=y; j<i; j++)
{
li[j-1]=li[j];
if(j==i-1)
{
li[j]="\0";
f[j]=NULL;
}
}
m--;
}
}
for(int x=0;x<i;x++){
//if(li[x]!="\0"){
cout<<li[x]<<"\t"<<f[x];
//}
cout<<endl;
}
return i;
}
void calntf(int no,int f[],double tf[]){
for(int i=0;i<no;i++){
if(f[i]!=NULL)
tf[i]=double(f[i])/no;
}
}
|