Converting from uppercase to lowercase letters and removing signs from a .txt file

Hello, Here I have a code that reads a .txt file into arrays. The program reads through the .txt file and counts each word and prints out top 10 words, from 1. place to 10. place and displays at the bottom the total number of words in the text file.

I need to change the code so that the program converts every uppercase letters to lower case letters and removes the following signs if they appear behind a word : ".,?!:"

Can somebody help me ?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
  #include <iostream>
#include <fstream>
#include <cstdlib>
#include <string>
#include <cctype>
using namespace std;

const int MAX_WORDS = 100000;

void insertionSort(int counter[], string words[], int length);

void read_unique_words(ifstream &infile, string uniquewords[], int wordcount[], int &numberof_unique_words);
void open_file(ifstream &infile, string thefilename);

int main()
{
    ifstream infile;
    string thefilename; 
    cin >> thefilename;
    open_file(infile, thefilename);

    string uniquewords[MAX_WORDS];
    int uniquewordcount[MAX_WORDS];
    int numberof_unique_words = 0;

    read_unique_words(infile, uniquewords, uniquewordcount, numberof_unique_words);

    insertionSort(uniquewordcount, uniquewords, numberof_unique_words);

    for (int i = 0; i < 10; i++) {
        cout << uniquewords[i] << " - " << uniquewordcount[i] << endl;
    }
    cout << "Number of unique words: " << numberof_unique_words << endl;

    infile.close();
    return 0;
}

void insertionSort(int counter[], string words[], int length) {
  int i, j, tmp;
  string tmp_word;

  for (i = 1; i < length; i++) {
    tmp = counter[i];
    tmp_word = words[i];
    j = i;
    while (j > 0 && counter[j - 1] < tmp) {
      counter[j] = counter[j - 1];
      words[j] = words[j - 1];
      j--;
    }
    counter[j] = tmp;
    words[j] = tmp_word;
  }
}

void read_unique_words(ifstream &infile, string uniquewords[], int wordcount[], int &numberof_unique_words) {
    string s;

    while (infile >> s) {
        int word_index = -1;
        for (int i = 0; i < numberof_unique_words; i++) {
            if (uniquewords[i] == s) {
                word_index = i;
            }
        }
        if (word_index == -1) {
            uniquewords[numberof_unique_words] = s;
            wordcount[numberof_unique_words] = 1;
            numberof_unique_words++;
        }
        else {
            wordcount[word_index]++;
        }
    }
}

void open_file(ifstream &infile, string thefilename) {
    infile.open(thefilename.c_str());
    if (infile.fail()) {
        cout << "Can't open the file: " << thefilename << endl;
        exit(1);
    }
}



So what are the contents of your input file?
Anyone that can create this code can create a function to modify a string.

Insert function before line 30 to convert A-Z and then strip non a-z chars.
Return the new string to main.

http://www.cplusplus.com/reference/cctype/tolower/
Last edited on
Topic archived. No new replies allowed.