Easier way to find position of word/letter etc in string?

I am practicing file IO and trying to master string manipulation and the string class in general so I wrote a program that imports a list of words and sorts them into vectors based on the words class(noun, verb, etc) using a delimeter(word[WORDCLASS]) and I am wondering if this is the most efficient way of doing this, not sure if any new better way has come along since C++17 has been out. a lot of examples i find on google still use old c++98 code.

1
2
3
4
5
6
7
8
for (string::size_type position = sentence.find(findNoun, 0); position != string::npos; position = sentence.find(findNoun, position))
{
	if (sentence.npos != position)
	{
		sentence.replace(sentence.find(findNoun), findNoun.length(), 
                noun.at(rand() % noun.size()));
	}
}


The entire program is here if it helps any, its kind of a mess, I havent been able to clean it up and refactor it yet, sorry about that.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
//Random Sentence Generator
//By Chay Hawk
//September 1st, 2019 @ 11:04 AM
//
//The random sentence generator loads words from a list and sorts and enters them 
//into a vector based on a hard coded delimeter. The program then generates a
//hardcoded sentence structure with those words based on their class.

#include <iostream>
#include <ctime>
#include <cstdlib>
#include <vector>
#include <fstream>
#include <string>
#include <algorithm>
#include <istream>

using namespace std;

void GetLine(istream& load, vector<string>& vStr, string strToFind);

int main()
{
	srand(time(0));

	ifstream load("Words.txt");
	ifstream load2("Words.txt");
	ifstream load3("Words.txt");
	ifstream load4("Words.txt");
	ifstream load5("Words.txt");

	string findNoun = "[NOUN]";
	string findVerb = "[VERB]";
	string findAdjective = "[ADJECTIVE]";
	string findAdverb = "[ADVERB]";
	string findPreposition = "[PREPOSITION]";

	/* Contents of file should be:
		word[CLASS]

		EX: jump[VERB]

		Order does not matter.
	*/

	vector<string> noun, verb, adjective, adverb, preposition;

	cout << "\nNOUNS\n" << endl;

	GetLine(load, noun, findNoun);

	cout << "\nVERBS\n" << endl;

	GetLine(load2, verb, findVerb);

	cout << "\nADVERBS\n" << endl;

	GetLine(load3, adverb, findAdverb);

	cout << "\nADJECTIVES\n" << endl;

	GetLine(load4, adjective, findAdjective);

	cout << "\nPREPOSITIONS\n" << endl;

	GetLine(load5, preposition, findPreposition);

	cout << "\nRANDOM SENTENCE GENERATOR VERSION 1.0.1\n" << endl;

	int choice{};

	//Make this its own function named RandomSentenceGenerator()
	/*while(choice != -1)
	{
		cout << "How many sentences would you like to generate?\n" << endl;
		cout << ">";
		cin >> choice;

		for (int i = 0; i < choice; i++)
		{
			cout << "The " << adjective.at(rand() % adjective.size()) << " " << noun.at(rand() % noun.size())
				<< " " << verb.at(rand() % verb.size()) << " " << adverb.at(rand() % adverb.size()) << "." << endl;
		}

		cout << '\n';
	}*/

	string sentence{}, found{};

	while (choice != -1)
	{
		cout << "Write your own adlib sentence putting word classes: [VERB], [NOUN], [ADVERB], [PREPOSITION] or [ADJECTIVE] where you want them." << endl;
		cout << "EXAMPLE: The [ADJECTIVE] [NOUN] [VERB] [ADVERB]\n" << endl;

		getline(cin, sentence);

		/* TO DO
			-Make a generic for loop that outputs the word category based on what the user types.
				A. Could do this by making a vector of pair string and int, string holding the word class 
				   delimeter and int holding the position of it in the string, and iterate over the string 
				   collecting all word classes and their positions to replace.
			-Make separate functions that output random sentences generated by the computer, and one that
				the player can write themselves.
			-Refine word types into further classes I.E: determiners, pronouns, conjunctions etc.
		*/

		//Find Nouns
		for (string::size_type position = sentence.find(findNoun, 0); position != string::npos; position = sentence.find(findNoun, position))
		{
			if (sentence.npos != position)
			{
				sentence.replace(sentence.find(findNoun), findNoun.length(), noun.at(rand() % noun.size()));
			}
		}

		//Find Verbs
		for (string::size_type position = sentence.find(findVerb, 0); position != string::npos; position = sentence.find(findVerb, position))
		{
			if (sentence.npos != position)
			{
				sentence.replace(sentence.find(findVerb), findVerb.length(), verb.at(rand() % verb.size()));
			}
		}

		//Find Adjectives
		for (string::size_type position = sentence.find(findAdjective, 0); position != string::npos; position = sentence.find(findAdjective, position))
		{
			if (sentence.npos != position)
			{
				sentence.replace(sentence.find(findAdjective), findAdjective.length(), adjective.at(rand() % adjective.size()));
			}
		}

		//Find Adverbs
		for (string::size_type position = sentence.find(findAdverb, 0); position != string::npos; position = sentence.find(findAdverb, position))
		{
			if (sentence.npos != position)
			{
				sentence.replace(sentence.find(findAdverb), findAdverb.length(), adverb.at(rand() % adverb.size()));
			}
		}

		//Find Prepositions
		for (string::size_type position = sentence.find(findPreposition, 0); position != string::npos; position = sentence.find(findPreposition, position))
		{
			if (sentence.npos != position)
			{
				sentence.replace(sentence.find(findPreposition), findPreposition.length(), preposition.at(rand() % preposition.size()));
			}
		}

		cout << "\n\n" << sentence << "\n\n" << endl;
	}

	return 0;
}

//Wrote custom getline with string delimeter since C++ getline only uses a char delimeter.
//Vector is only needed here so overloading function isnt necessary now.
void GetLine(istream& load, vector<string>& vStr, string delimeter)
{
	while (!load.eof())
	{
		string STR{};
		getline(load, STR); //Load file into string

		size_t pos = STR.find(delimeter); //find delimeter position

		if (pos != std::string::npos)
		{
			STR.erase(pos); //Erase string at the position its found
			vStr.push_back(STR); //Push back the word
		}
	}

	for (auto j : vStr)
	{
		cout << j << endl;
	}
}
Last edited on
this is something I did for simple formatter.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
        std::string_view::iterator cursor = text.begin();
        std::string_view::iterator start_range; //an opening of a tag.
        std::string_view::iterator start_formatting = text.begin();

        for(; cursor != text.end(); cursor++)
        {
            switch(*cursor)
            {
            case '[':
                start_range = cursor;
                break;
            case ']':
                if(start_range && *start_range == '[' 
                    && start_range+1 != cursor)   //if not empty
                {
                    std::string_view tag(&*start_range + 1, std::distance(start_range + 1, cursor));

                    //didn't test this, but this doesn't have to be complicated
                    //output.push_back(start_formatting, start_range -1);
                    //if (tag == "NOUN") {output.push_back(random_from_list(noun_list));} 
                    //else if(tag == "ADJECTIVE") ...

                    start_formatting = cursor + 1;
                    start_range = std::string_view::iterator();
                }
                break;
        } 
//and push the last bit in 


reserve() on output can help, OR you could just skip the output string and print directly to cout.

This copies a string every loop.
1
2
3
4
	for (auto j : vStr)
	{
		cout << j << endl;
	}


to avoid this just call load.seekg (0, load.beg);
1
2
3
4
5
ifstream load("Words.txt");
	ifstream load2("Words.txt");
	ifstream load3("Words.txt");
	ifstream load4("Words.txt");
	ifstream load5("Words.txt");


And that doesn't really solve anything, you should be going through a file only once. So just find the enclosing [] and compare it against everything.

Personally I think it makes more sense to use a .INI file-like notation, and just put [VERB] at the top and now all the lines under it are defined as verbs.

Also be careful of windows "/r/n", you avoided it by putting the [VERB] after the text, one way to avoid it is by not using getline (it's only for terminals in my opinion) and instead just calling get() for every letter looking for the newline, ignoring values below the ascii value of 32.

For safety don't use load.eof() because it is fragile for flexibility. Use either if(load) or if(getline(load, STR)). Also getline(std::cin, str) can fail if someone presses CTRL+Z, and there are many cases where a previous operation can cause an error state, always check because this can lead to an infinite loop.
CH1156,

You seem pretty proficient with your code. It does what you want it to do. At this stage of programming, I think you need to concentrate on HOW you do what's needed.

- You read the input file 5 times. If this were a full dictionary of words that would take substantially longer than reading it once.
- Throughout the code, you need to find the vector<string> that corresponds to a particular tag. That suggests a function. Better yet, a map<string, vector<string>>.
- It would be far more efficient to create the sentence in a new string.
- A function should do just one thing. So GetLine() should read the file, but shouldn't print out the words. That should be done in a separate function.

The modified program below shows these suggestions. This program reads just one line of input and substitutes the tags for words.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
//Random Sentence Generator
//By Chay Hawk
//September 1st, 2019 @ 11:04 AM
//
//The random sentence generator loads words from a list and sorts and enters them 
//into a vector based on a hard coded delimeter. The program then generates a
//hardcoded sentence structure with those words based on their class.

#include <iostream>
#include <ctime>
#include <cstdlib>
#include <vector>
#include <fstream>
#include <string>
#include <algorithm>
#include <istream>
#include <map>

using namespace std;

map < string, vector < string >> initMap()
{
    map < string, vector < string >> result;
    result["[NOUN]"];
    result["[VERB]"];
    result["[ADJECTIVE]"];
    result["[ADVERB]"];
    result["[PREP]"];
    return result;
}

// Create and initialize the map.
map < string, vector < string >> tagMap(initMap());

/* Load an input file into the tagged vectors.
   Contents of file should be:
   word[CLASS]

   EX: jump[VERB]

   Order does not matter.
*/
bool loadFile(istream & strm);

// Substitute tags in the string
string substituteTags(const string & str);

// Print the words in the vector to cout
void printWords(const vector < string > &vec);

int
main()
{
    srand(time(0));

    ifstream load("Words.txt");
    loadFile(load);

    // For each entry in the map.  A map entry is a pair<> where
    // "first" is the map key and "second" is the map value. So in our
    // case, first is the key and second is the vector of words
    for (auto & entry:tagMap) {
	cout << "\n" << entry.first << "\n";
	printWords(entry.second);
    }

    cout << "\nRANDOM SENTENCE GENERATOR VERSION 1.0.1\n" << endl;

    //Make this its own function named RandomSentenceGenerator()
    /*while(choice != -1)
       {
       cout << "How many sentences would you like to generate?\n" << endl;
       cout << ">";
       cin >> choice;

       for (int i = 0; i < choice; i++)
       {
       cout << "The " << adjective.at(rand() % adjective.size()) << " " << noun.at(rand() % noun.size())
       << " " << verb.at(rand() % verb.size()) << " " << adverb.at(rand() % adverb.size()) << "." << endl;
       }

       cout << '\n';
       } */

    string sentence;

    cout <<
	"Write your own adlib sentence putting word classes: [VERB], [NOUN], [ADVERB], [PREPOSITION] or [ADJECTIVE] where you want them."
	<< endl;
    cout << "EXAMPLE: The [ADJECTIVE] [NOUN] [VERB] [ADVERB]\n" << endl;

    getline(cin, sentence);
    string result = substituteTags(sentence);
    cout << "\n\n" << result << "\n\n" << endl;

    return 0;
}


//Wrote custom getline with string delimeter since C++ getline only uses a char delimeter.
//Vector is only needed here so overloading function isnt necessary now.
bool
loadFile(istream & load)
{
    string line;
    while (getline(load, line)) {
	size_t pos = line.find('[');	//find delimeter position

	if (pos != string::npos) {
	    string tag = line.substr(pos);	// remember the tag
	    line.erase(pos);			 // erase the tag from the
	    // line, leaving just the word
	    auto iter = tagMap.find(tag);
	    if (iter == tagMap.end()) {
		cout << "Error in input. \"" << tag << "\" isn't a valid tag.\n";
	    } else {
		iter->second.push_back(line);	 // insert the tag in the
		// right word list
	    }
	}
    }
    return true;
}


// If str[pos] is the beginning of a valid tag then return true and
// copy the tag into "tag". Otherwise return false
bool
isTag(const string & str, size_t pos, string & tag)
{
    if (str[pos] == '[') {
	size_t endPos = str.find(']', pos);
	if (endPos != string::npos) {
	    tag = str.substr(pos, endPos - pos + 1);
	    return tagMap.find(tag) != tagMap.end();
	}
    }
    return false;
}


// Substitute tags in the string
string
substituteTags(const string & str)
{
    string result;		// the resulting sentence
    string tag;
    for (unsigned i = 0; i < str.size();) {
	if (isTag(str, i, tag)) {
	    // It's a valid tag. Insert a random word from the map
	    auto iter = tagMap.find(tag);
	    result += iter->second[rand() % iter->second.size()];
	    i += tag.size();			 // skip the entire tag
	} else {
	    result += str[i++];			 // add the character to the output and
	    // advance to the next char
	}
    }
    return result;
}


// Print the words in the vector to cout
void
printWords(const vector < string > &vec)
{
  for (auto & word:vec) {
	cout << word << '\n';
    }
}

> trying to master string manipulation and the string class in general
> not sure if any new better way has come along since C++17 has been out

C++11 added the regular expressions library to the standard;
it is a great tool for searching for or matching patterns in strings.


> a lot of examples i find on google still use old c++98 code.

Here is some code using the standard regular expressions library, and some of the newer C++ features:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#include <iostream>
#include <string>
#include <map>
#include <regex>
#include <random>
#include <sstream>

enum word_class { NOUN, VERB, ADJECTIVE, ADVERB, PREPOSITION, INVALID };

std::ostream& operator<< ( std::ostream& stm, word_class wc )
{
    static const char* const text[] { "NOUN", "VERB", "ADJECTIVE", "ADVERB", "PREPOSITION", "INVALID" };

    if( wc < NOUN || wc > INVALID ) wc = INVALID ;
    return stm << text[wc] ;
}

// C++11: http://www.stroustrup.com/C++11FAQ.html#init-list
const std::map< std::string, word_class > word_class_lookup =
{
    { "[NOUN]", NOUN },
    { "[VERB]", VERB },
    { "[ADJECTIVE]", ADJECTIVE },
    { "[ADVERB]", ADVERB },
    { "[PREPOSITION]", PREPOSITION }
};

// key: word_class mapped data: vector containing all the words belonging to that class
// C++11: https://en.cppreference.com/w/cpp/language/type_alias
using word_map = std::map< word_class, std::vector<std::string> > ;

// parse a line from the input file, return pair { word_class, word }
// eg. input: "jump[VERB]" return pair { VERB, "jump" }
std::pair< word_class, std::string > parse( const std::string& line )
{
    // C++11: https://en.cppreference.com/w/cpp/regex

    // ^ - beginning of string
    // \s* - optional white space,
    // (\w+) - word (capture 1) eg. "jump"
    // \s* - optional white space,
    // (\[\w+\]) - tag in square brackets (capture 2) eg. "[VERB]"
    // \s* - optional white space,
    // $ - end of string
    static const std::regex wc_re( "^\\s*(\\w+)\\s*(\\[\\w+\\])\\s*$" ) ;

    std::smatch match ;
    if( std::regex_match( line, match, wc_re ) ) // if the pattern was matched
    {
        // locate the entry for the tag (capture 2) in word_class_lookup
        // C++11: http://www.stroustrup.com/C++11FAQ.html#auto
        const auto iter = word_class_lookup.find( match[2] ) ;

        if( iter != word_class_lookup.end() ) // if found
            // C++11: http://www.stroustrup.com/C++11FAQ.html#uniform-init
            return { iter->second, match[1] } ; // return pair { word_class, word (capture 1) }
                                                // eg. pair { VERB, "jump" }
    }

    // C++11: http://www.stroustrup.com/C++11FAQ.html#uniform-init
    return { INVALID, {} } ; // parse failed, return pair with INVALID word_class
}

// parse lines in the input stream, add valid entries to the map
word_map parse_lines( std::istream& stm )
{
    word_map map ;

    std::string line ;
    while( std::getline( stm, line ) ) // for each line in the file
    {
        // requires C++17: structured binding
        // https://en.cppreference.com/w/cpp/language/structured_binding
        const auto [ wc, word ] = parse(line) ; // parse it

        // if valid line, append the word to the appropriate vector
        // C++11: http://www.stroustrup.com/C++11FAQ.html#rval
        if( wc != INVALID ) map[wc].push_back( std::move(word) ) ;
    }

    return map ;
}

// given the word_class, return a random word belonging to that class
// eg. input: word_class VERB return "jump"
std::string random_word( word_class wc, const word_map& map )
{
    // C++11: https://en.cppreference.com/w/cpp/numeric/random
    static std::mt19937 rng( std::random_device{}() ) ;
    static std::uniform_int_distribution<std::size_t> distrib ;

    // C++11: http://www.stroustrup.com/C++11FAQ.html#decltype
    using param_type = decltype(distrib)::param_type ;

    const auto iter = map.find(wc) ; // locate the word_class in the map
    if( iter != map.end() ) // if found
    {
        const auto& words = iter->second ; // get the vector of words belonging to this class
        if( !words.empty() )
        {
            // return a random selection from this vector
            // ie. the word at a random position in [ 0, N-1]
            distrib.param( param_type( 0, words.size()-1 ) ) ;
            return words[ distrib(rng) ] ;
        }
    }

    // C++11: http://www.stroustrup.com/C++11FAQ.html#uniform-init
    return {} ; // lookup failed, return an empty string
}

// generate a random sentence based on the pattern, picking random words from the map
// example pattern: "The [ADJECTIVE] [NOUN] [VERB] [ADVERB]"
std::string random_sentence( const std::string& pattern, const word_map& map )
{
    // (\[\w+\]) - tag in square brackets eg. [NOUN]
    static const std::regex tags_re( "\\[\\w+\\]" ) ;

    std::string sentence ;

    std::smatch match ;
    auto next = pattern.begin() ;
    while( std::regex_search( next, pattern.end(), match, tags_re ) ) // for each match
    {
        sentence += match.prefix() ; // append the unmatched prefix

        const auto iter = word_class_lookup.find( match[0] ) ; // look up the matched tag

        // if found, append a random word belonging to the corresponding class
        if( iter != word_class_lookup.end() )
            sentence += random_word( iter->second, map ) ;

        else sentence += match[0] ; // not found, append verbatim

        next = match[0].second ; // continue the search from the end of this match
    }

    // C++11: http://www.stroustrup.com/C++11FAQ.html#uniform-init
    sentence += { next, pattern.end() } ; // finally, append unmatched characters at the end

    return sentence ;
}

int main() // minimal test driver
{
    // use a string stream (instead of the file) for testing
    // C++11: http://www.stroustrup.com/C++11FAQ.html#raw-strings
    std::istringstream file( R"(
                                 string[NOUN]
                                 vector [NOUN]
                                 map[NOUN]
                                 expression[NOUN]
                                 algorithm[NOUN]
                                 Stroustrup[NOUN]
                                 Ch1156 [NOUN]

                                 find    [VERB]
                                 match [VERB]
                                 print  [VERB]
                                 remove    [VERB]
                                 jump[VERB]

                                 great[ADJECTIVE]
                                 simple[ADJECTIVE]
                                 gentle  [ADJECTIVE]
                                 happy[ADJECTIVE]

                                 once[ADVERB]
                                 early[ADVERB]
                                 quickly  [ADVERB]
                                 loudly[ADVERB]

                                 for[PREPOSITION]
                                 into[PREPOSITION]
                                 beside     [PREPOSITION]
                                 about[PREPOSITION]
                                 through[PREPOSITION]
                               )" ) ;

    const auto map = parse_lines(file) ;

    std::cout << "dictionary:\n-------------\n" ;
    // C++11: http://www.stroustrup.com/C++11FAQ.html#for
    for( const auto& pair : map )
    {
        std::cout << pair.first << " : [ " ;
        for( const auto& word : pair.second ) std::cout << word << ' ' ;
        std::cout << "]\n" ;
    }

    const std::string pattern = "Hey [NOUN]! The [ADJECTIVE] [NOUN], and "
                                "[ADJECTIVE], [ADJECTIVE] [NOUN] "
                                "[VERB] very [ADVERB] [PREPOSITION] [NOUN].\n"
                                "This is a very [ADJECTIVE] [NOUN] indeed!!" ;

    std::cout << "\npattern:\n" << pattern
              << "\n\ngenerated sentence:\n" << random_sentence( pattern, map ) << '\n' ;
}

http://coliru.stacked-crooked.com/a/98ebc2f92db91553
https://rextester.com/FCML24772
Topic archived. No new replies allowed.