1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
|
//to compare a sentence of the user to a text contained in a text file
//and to have the sentence that most resembles or that corresponds to the words of the user
#include <iostream>
#include <string>
#include <fstream>
#include <vector>
#include <iomanip>
#include <cctype>
#include <cstring>
#include <algorithm>
// Compare word to list.
// This is just a simple comparison. Change if a more sophisticated comparision required
auto compWord(const std::vector<std::string>& tofind, const std::string& word) {
return std::find(tofind.cbegin(), tofind.cend(), word);
}
// Obtain list of words to compare
auto getWords() {
std::vector<std::string> tofind;
std::cout << "Enter the word(s) that the sentence must contain to match. 0 to terminate\";
for (std::string word; (std::cin >> word) && word != "0"; ) {
std::string tomat;
for (const auto& ch : word)
if (std::isalpha(static_cast<unsigned char>(ch)))
tomat += static_cast<char>(std::tolower(static_cast<unsigned char>(ch)));
tofind.emplace_back(tomat);
}
return tofind;
}
int main()
{
constexpr const char* senterm {"!?."};
std::ifstream textf("myfile.txt");
if (!textf)
return (std::cout << "Cannot open input file\n"), 1;
textf >> std::noskipws; // Need to not skip whitespace for the file iteration
const std::string text((std::istream_iterator<char>(textf)), std::istream_iterator<char>()); // File text
const auto tofind {getWords()}; // Words to find
std::vector<unsigned> wrdmatch(tofind.size()); // Count of each word found per sentence
std::string word; // Current word found in sentence
const char *ststrt {}, *stend {}; // begin/end of current sentence
const char *beststrt {}, *bestend {}; // begin/end of sentence with highest match
bool atend {}; // Set when end of text
bool gotsent {}; // Set when have a sentence
unsigned most {}; // Most matched words
unsigned match {}; // Number matched in a sentence
for (auto chp {text.c_str()}; !atend; atend = (*chp++) == 0) {
if (*chp && std::isalpha(static_cast<unsigned char>(*chp))) {
// Got a character. Add to current word
word += static_cast<char>(std::tolower(static_cast<unsigned char>(*chp)));
gotsent = true;
if (ststrt == nullptr)
ststrt = chp;
} else {
if (*chp == 0 || std::isspace(static_cast<unsigned char>(*chp)) || std::strchr(senterm, *chp) != NULL)
// End of word
if (!word.empty()) {
// Do a word compare. This is simply an equality test here, but could be soundex etc etc
if (const auto itr {compWord(tofind, word)}; itr != tofind.cend()) {
++match;
++wrdmatch[itr - tofind.cbegin()];
}
word.clear();
}
if (*chp == 0 || std::strchr(senterm, *chp) != NULL) {
// End of sentence
stend = chp;
if (gotsent && stend != ststrt) {
if (match > most && std::all_of(wrdmatch.begin(), wrdmatch.end(), [](auto no) {return no > 0; })) {
most = match;
beststrt = ststrt;
bestend = stend;
}
std::fill(wrdmatch.begin(), wrdmatch.end(), 0);
match = 0;
ststrt = nullptr;
gotsent = false;
}
}
}
}
std::cout << "A best sentence match with " << most << " total matches is:\n";
std::cout << std::string(beststrt, bestend) << '\n';
}
|