Finding multiple strings in an array of chars

I'm currently writing a program that reads a text file, stores the characters into an array, searches that array for a "start" word and a "stop" word, stores the locations of those words and copies/outputs everything between those two characters into a separate text file. My problem right now is that while the "start" word is consistent throughout the text file, there are at least 3 different "stop" words. this causes my program to get stuck in a loop and output a file larger than the input, which makes no sense.

my current method works for only one "stop" word and it works by converting the array of chars into a single string so that i may use str.find to find the "stop" word. but this only works for one word, i need it to work for several different "stop" words for example: "stop, end, quit"

here's my program

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <algorithm>

using namespace std;

int main() {


	ifstream input("test input.txt", ifstream::binary); //open input
	if (input) {
		input.seekg(0, input.end); //seekg moves the pointer to the last character in the file
		size_t length = input.tellg(); //tellg determines the size of the file by getting the posStartition of the last character, and assigns this size to the variable "length"
		input.seekg(0, input.beg); //seekg moves the pointer back to the beginning of the file to prepare it to be read

		char * buffer = new char[length]; //create the array (buffer) that will store the characters
		input.read(buffer, length); //read the input file, store the characters into the array (buffer) and only read "length" amount of characters



		string str = buffer; //converts the array of characters (buffer) into a single string in order to be searched

		char start[] = "text"; // the "start" word is declared as an array at first in order to get its character length
		int startLen = sizeof(start) / sizeof(*start);

		string substrStart = start; //declares the substring to be searched for
		string substrStop("stop");
		//string substrStop[3] = {"stop1", "stop2", "stop3"};

		vector <int> tallyStart; //declares a vector named tallyStart which will be used to keep track of the locations where certain strings occur
		vector <int> tallyStop;

		size_t posStart = 0; //delcares the parameter which will set the beginning point for the search in "buffer"
		size_t posStop = 0;

		for (unsigned int i = 0; i < length; i++) { //arbitrary iterations as a catch-all for occurrences of "substrStart" in "str"
			size_t found = str.find(substrStart, posStart); //searches "buffer" for the specified word "word" and assigns this location to a variable named "found"
			if (found > length) //checks if the occurrence location is greater than the file length
				break; //if so, exit the loop
			else if (found != string::npos) { //checks to see if no occurrence is found
				tallyStart.push_back(found); //if not, add occurrence to "tallyStart"
				posStart = found + 1; //move the beginning point for the search forward by one character in order to look for another occurrence
			}
		}

		//copy / paste of above loop, except for "end" occurrences
		for (unsigned int j = 0; j < length; j++) {

			//main method
			size_t end = str.find(substrStop, posStop);
			if (end > length)
				break;
			else if (end != string::npos) {
				tallyStop.push_back(end);
				posStop = end + 1;
			}

			//possible method 1
				//nothing yet

			//possible method 2
			/*auto stop = find_if(begin(substrStop), end(substrStop), [&](const string& s) {return substrStart.find(s) != string::npos; });
			if (stop > length))
				break;
			else if (stop != end(substrStop)) {
				tallyStop.push_back(stop);
				posStop = stop + 1;
			}*/
		}

		for (int a = 0; a < tallyStart.size(); a++) { //this loop erases the "start" word from the output
			for (int b = 0; b < startLen; b++) {
				buffer[tallyStart[a] + b] = ' ';
			}
		}

		ofstream output("test output.txt"); //open output
			for (int x = 0; x < tallyStart.size(); x++) { //iterate for every "start" word
				for (int y = 0; y < (tallyStop[x] - tallyStart[x]); y++) { //iterate for every character between the "start" word and the "stop" word
					output << buffer[tallyStart[x] + y]; // output every character from the "start" word to the "stop" word (including the "start" word for whatever reason >:-( )
				}
			}
		output.close(); //close output

	input.close(); //close input
	}

	return 0;
}


this is the test input:

do
not
output
this
text
oipwer[oinq34g[ni0
THIS ONE
AND THIS
stop
only
that
which
is
in
caps
text
THIS ONE TOO
DONT FORGET THIS ONE
'ap;sldkf]oqme]rvpom
stop
one
more
almost
there
text
asdvkjqpn;o'riv [q
LAST ONE
SIKE YOU THOUGHT
stop
ok
that's
enough


and this is the test output:
     
oipwer[oinq34g[ni0
THIS ONE
AND THIS
     
THIS ONE TOO
DONT FORGET THIS ONE
'ap;sldkf]oqme]rvpom
    
asdvkjqpn;o'riv [q
LAST ONE
SIKE YOU THOUGHT
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
using namespace std;

int main() {
    ifstream input("test input.txt");
    if (!input) {
        cerr << "Can't open input file.\n";
        return 0;
    }

    ofstream output("test output.txt");
    if (!output) {
        cerr << "Can't open output file.\n";
        return 0;
    }

    input.seekg(0, input.end);
    size_t length = input.tellg();
    input.seekg(0, input.beg);

    string str(length, ' ');
    input.read((char*)str.data(), length);

    string strStart("text");
    vector<string> strStop{"stop", "stop2", "stop3"};

    for (size_t posStart = 0; posStart < length; ) {
        posStart = str.find(strStart, posStart);
        if (posStart == string::npos)
            break;
        posStart += strStart.size();

        // Find the stop string that is closest to the start string.
        size_t posStop = length + 1;
        for (const auto& s: strStop) {
            size_t pos = str.find(s, posStart);
            if (pos < posStop)
                posStop = pos;
        }

        output << str.substr(posStart, posStop - posStart);

        posStart = posStop + 1;
    }
}


BTW, your comments are terrible! Don't clutter your program with comments that add nothing to the code.
@tpb

Was kinda hoping for help with my code as opposed to a full on answer. I'm a beginner so I can't really discern your methods by just looking at your code. nevertheless, your code works perfectly. Thanks for the help, and thanks for the feedback, I guess I just built that habit because my instructor is always nagging me to explain every line of code.
Topic archived. No new replies allowed.