1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
|
#include <iostream>
#include <fstream>
#include <algorithm>
#include <string>
#include <map>
#include <vector>
using namespace std;
typedef map<string,int> word_count_list;
struct val_lessthan : binary_function < pair<string,int>, pair<string,int>, bool > {
bool operator() (const pair<string,int>& x, const pair<string,int>& y) const
// Descending order of frequency ">"
{return x.second>y.second;}
}val_lt;
int main(int argc, char *argv[]) {
// Check the correct nr of input parameters
if (argc!=3) {
cout << "USE: ./wordFrequency fileIN fileOUT" << endl;
return 1;
}
ifstream file_IN(argv[1], ios::in | ios::binary);
ofstream file_OUT(argv[2], ios::out| ios::trunc | ios::binary);
word_count_list word_count;
string word;
while (file_IN >> word){
// Remove punctuations
unsigned int posicion;
while ((posicion = word.find_first_of(".,:;\"'!¡?¿/()^[]{}\\;-_*+")) != string::npos)
word.erase(posicion, 1);
++word_count[word];
}
vector<pair<string,int> > wordvector;
copy(word_count.begin(), word_count.end(), back_inserter(wordvector));
//Sort the vector by the second value (frequency)
sort(wordvector.begin(), wordvector.end(), val_lt);
for(unsigned int i=0; i<wordvector.size(); ++i) {
// Store in the output file, per row (word frequency)
file_OUT << wordvector[i].first << " " << wordvector[i].second << endl;
}
// Close the output file
file_OUT.close();
return 0;
}
|