1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
#include<iostream>
#include<fstream>
#include<cmath>
#include <math.h>
#include<string>
using namespace std;
void clearpun(string &str);
int substring(string str,string li[]);
void sort(int i,string li[]);
void delandcount(int i,int &w,int f[],string li[]);
void calntf(int no,int &w,int f[],double tf[]);
void calidf(int no,int w,int f[],double idf[]);
int main(){
int F[4]={0},F1[1000]={0},F2[1000]={0},F3[1000]={0};
double TF[4],TF2[1000][3],IDF[4],IDF2[1000][3],TFIDF[4],TFIDF2[1000][3];
int no(0),word(0);
string input,list[4],list2[1000][3];
string name[3]={"D1.txt","D2.txt","D3.txt"};
cout<<"Please input the query string: ";
getline(cin,input);
no=substring(input,list);
sort(no,list);
delandcount(no,word,F,list);
calntf(no,word,F,TF);
calidf(no,word,F,IDF);
ifstream fin;
for(int i=0;i<3;i++){
fin.open("D1.txt");
if (!fin.is_open())
{
cout << "Unable to open file D1.txt." << endl;
exit(-10);
}
while(fin>>input){
getline(fin,input);
clearpun(input);
substring(input,&list2[0][i]);
if(fin.fail()) exit(-11);
}
fin.close();
}
return 0;
}
void clearpun(string &str){
for(int i=0;i<str.length();i++){
if(!isalpha(str[i]))
//if(str[i]==','||str[i]=='.'||str[i]=='?'||str[i]==';'||str[i]==':'||str[i]=='-'||str[i]=='!')
str[i]='\0';
}
}
int substring(string str,string li[]){
//convert to lower case
for (int i=0;i<int(str.length());i++)
str[i]=tolower(str[i]);
//divide the sentense into substring
int i(0); // numebr of word
int strpos(0);// start position
int endpos= str.find(' '); //the position of ' '
for(i;endpos>0;i++){
li[i]=str.substr(strpos,endpos-strpos);
strpos=endpos+1;
endpos=str.find(' ',strpos);
if(endpos<0){i++;li[i]=str.substr(strpos);}
}
return i;
}
void sort(int i,string li[]){
//sort alphabetically
for(int y=0;y<i-1;y++){
for(int j=y+1;j<i;j++)
if (li[y]>li[j]) swap(li[y],li[j]);
}
}
void delandcount(int i,int &w,int f[],string li[]){
//count frequency and erase the duplication
//initialize the frequency of each word
for(int y=0;y<i;y++) f[y]=1;
w=i; // store the number of unique words
for(int y=0;y<w-1;y++){
for(int j=y+1;j<w;j++){
if(li[y].compare(li[j])==0) {
f[y]++;
for(int x=j;x<i-1;x++){
li[x]=li[x+1];
f[x]=f[x+1];
}
li[w-1]="\0"; f[w-1]=NULL; w--;
j--; //since the order is rearranged need to compare once more, so the j value should remain unchanged
}
}
}
}
void calntf(int no,int &w,int f[],double tf[]){
for(int i=0;i<w;i++){
if(f[i]!=NULL)
tf[i]=double(f[i])/no;
}
}
void calidf(int no,int w,int f[],double idf[]){
for(int i=0;i<w;i++){
idf[i]=1+log(no/f[i])/log(2);
cout<<idf[i]<<endl;
}
}
|