1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
|
#include <iostream>
#include <string>
#include <map>
#include <regex>
#include <random>
#include <sstream>
enum word_class { NOUN, VERB, ADJECTIVE, ADVERB, PREPOSITION, INVALID };
std::ostream& operator<< ( std::ostream& stm, word_class wc )
{
static const char* const text[] { "NOUN", "VERB", "ADJECTIVE", "ADVERB", "PREPOSITION", "INVALID" };
if( wc < NOUN || wc > INVALID ) wc = INVALID ;
return stm << text[wc] ;
}
// C++11: http://www.stroustrup.com/C++11FAQ.html#init-list
const std::map< std::string, word_class > word_class_lookup =
{
{ "[NOUN]", NOUN },
{ "[VERB]", VERB },
{ "[ADJECTIVE]", ADJECTIVE },
{ "[ADVERB]", ADVERB },
{ "[PREPOSITION]", PREPOSITION }
};
// key: word_class mapped data: vector containing all the words belonging to that class
// C++11: https://en.cppreference.com/w/cpp/language/type_alias
using word_map = std::map< word_class, std::vector<std::string> > ;
// parse a line from the input file, return pair { word_class, word }
// eg. input: "jump[VERB]" return pair { VERB, "jump" }
std::pair< word_class, std::string > parse( const std::string& line )
{
// C++11: https://en.cppreference.com/w/cpp/regex
// ^ - beginning of string
// \s* - optional white space,
// (\w+) - word (capture 1) eg. "jump"
// \s* - optional white space,
// (\[\w+\]) - tag in square brackets (capture 2) eg. "[VERB]"
// \s* - optional white space,
// $ - end of string
static const std::regex wc_re( "^\\s*(\\w+)\\s*(\\[\\w+\\])\\s*$" ) ;
std::smatch match ;
if( std::regex_match( line, match, wc_re ) ) // if the pattern was matched
{
// locate the entry for the tag (capture 2) in word_class_lookup
// C++11: http://www.stroustrup.com/C++11FAQ.html#auto
const auto iter = word_class_lookup.find( match[2] ) ;
if( iter != word_class_lookup.end() ) // if found
// C++11: http://www.stroustrup.com/C++11FAQ.html#uniform-init
return { iter->second, match[1] } ; // return pair { word_class, word (capture 1) }
// eg. pair { VERB, "jump" }
}
// C++11: http://www.stroustrup.com/C++11FAQ.html#uniform-init
return { INVALID, {} } ; // parse failed, return pair with INVALID word_class
}
// parse lines in the input stream, add valid entries to the map
word_map parse_lines( std::istream& stm )
{
word_map map ;
std::string line ;
while( std::getline( stm, line ) ) // for each line in the file
{
// requires C++17: structured binding
// https://en.cppreference.com/w/cpp/language/structured_binding
const auto [ wc, word ] = parse(line) ; // parse it
// if valid line, append the word to the appropriate vector
// C++11: http://www.stroustrup.com/C++11FAQ.html#rval
if( wc != INVALID ) map[wc].push_back( std::move(word) ) ;
}
return map ;
}
// given the word_class, return a random word belonging to that class
// eg. input: word_class VERB return "jump"
std::string random_word( word_class wc, const word_map& map )
{
// C++11: https://en.cppreference.com/w/cpp/numeric/random
static std::mt19937 rng( std::random_device{}() ) ;
static std::uniform_int_distribution<std::size_t> distrib ;
// C++11: http://www.stroustrup.com/C++11FAQ.html#decltype
using param_type = decltype(distrib)::param_type ;
const auto iter = map.find(wc) ; // locate the word_class in the map
if( iter != map.end() ) // if found
{
const auto& words = iter->second ; // get the vector of words belonging to this class
if( !words.empty() )
{
// return a random selection from this vector
// ie. the word at a random position in [ 0, N-1]
distrib.param( param_type( 0, words.size()-1 ) ) ;
return words[ distrib(rng) ] ;
}
}
// C++11: http://www.stroustrup.com/C++11FAQ.html#uniform-init
return {} ; // lookup failed, return an empty string
}
// generate a random sentence based on the pattern, picking random words from the map
// example pattern: "The [ADJECTIVE] [NOUN] [VERB] [ADVERB]"
std::string random_sentence( const std::string& pattern, const word_map& map )
{
// (\[\w+\]) - tag in square brackets eg. [NOUN]
static const std::regex tags_re( "\\[\\w+\\]" ) ;
std::string sentence ;
std::smatch match ;
auto next = pattern.begin() ;
while( std::regex_search( next, pattern.end(), match, tags_re ) ) // for each match
{
sentence += match.prefix() ; // append the unmatched prefix
const auto iter = word_class_lookup.find( match[0] ) ; // look up the matched tag
// if found, append a random word belonging to the corresponding class
if( iter != word_class_lookup.end() )
sentence += random_word( iter->second, map ) ;
else sentence += match[0] ; // not found, append verbatim
next = match[0].second ; // continue the search from the end of this match
}
// C++11: http://www.stroustrup.com/C++11FAQ.html#uniform-init
sentence += { next, pattern.end() } ; // finally, append unmatched characters at the end
return sentence ;
}
int main() // minimal test driver
{
// use a string stream (instead of the file) for testing
// C++11: http://www.stroustrup.com/C++11FAQ.html#raw-strings
std::istringstream file( R"(
string[NOUN]
vector [NOUN]
map[NOUN]
expression[NOUN]
algorithm[NOUN]
Stroustrup[NOUN]
Ch1156 [NOUN]
find [VERB]
match [VERB]
print [VERB]
remove [VERB]
jump[VERB]
great[ADJECTIVE]
simple[ADJECTIVE]
gentle [ADJECTIVE]
happy[ADJECTIVE]
once[ADVERB]
early[ADVERB]
quickly [ADVERB]
loudly[ADVERB]
for[PREPOSITION]
into[PREPOSITION]
beside [PREPOSITION]
about[PREPOSITION]
through[PREPOSITION]
)" ) ;
const auto map = parse_lines(file) ;
std::cout << "dictionary:\n-------------\n" ;
// C++11: http://www.stroustrup.com/C++11FAQ.html#for
for( const auto& pair : map )
{
std::cout << pair.first << " : [ " ;
for( const auto& word : pair.second ) std::cout << word << ' ' ;
std::cout << "]\n" ;
}
const std::string pattern = "Hey [NOUN]! The [ADJECTIVE] [NOUN], and "
"[ADJECTIVE], [ADJECTIVE] [NOUN] "
"[VERB] very [ADVERB] [PREPOSITION] [NOUN].\n"
"This is a very [ADJECTIVE] [NOUN] indeed!!" ;
std::cout << "\npattern:\n" << pattern
<< "\n\ngenerated sentence:\n" << random_sentence( pattern, map ) << '\n' ;
}
|