How to remove extra lines from a text file.

I am creating a program to remove extra spaces, comments and extra lines from a text file. I cannot figure out how to remove extra lines. Any help is appreciated thank you! :D

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include <iostream>
#include <string>
#include <fstream>
#include <vector>
#include <algorithm>
#include <iterator>

int main()
{
	std::ifstream inStream("file.txt");
	if (inStream)
	{
		std::vector<std::string> lines;

		// Read data from file
		std::cout << "--- BEFORE ---\n";
		for (std::string line; std::getline(inStream, line); )
		{
			std::cout << line << '\n';

			lines.push_back(line);
		}



		// Sanitize data
		bool inComment = false;
		for (auto lineIt = lines.begin(); lineIt != lines.end();)
		{

			std::string& line = *lineIt;
			
			const bool startMultilineComment = (line.find("/*") != std::string::npos);
			const bool endMultilineComment = (line.find("*/") != std::string::npos);
			const bool ExtraSpace = (line.find("\n") != std::string::npos);

			while (line.find("//") != std::string::npos) { //removing comments that look like //
				size_t Beg = line.find("//");
				line.erase(Beg, line.find("\n", Beg) - Beg); //stops the removal after /n
			}

			if (inComment || startMultilineComment)
			{
				// Erase comment blocks
				inComment = !endMultilineComment;
				lineIt = lines.erase(lineIt);
			}
			else
			{
				// Erase redundant spaces
				line.erase(
					std::unique(line.begin(), line.end(),
						[](char lhs, char rhs)
				{
					return (lhs == rhs) && (lhs == ' ');
				}),
					line.end()
					);

				// Erase leading whitespace
				line.erase(0, line.find_first_not_of(' ')); //Searches the string for the first character that does not match any of the characters specified in its arguments.


				++lineIt;
			}

		}

		// Display sanitized data
		std::cout << "\n--- AFTER ---\n";
		std::copy(lines.cbegin(), lines.cend(), std::ostream_iterator<std::string>(std::cout, "\n"));
	}
	system("pause");
	return 0;
}

Below is the text file I am converting
program a2018;
/* This program computes and prints the value
of an expresson*/
var
//declare variables
ab1, cd, e33a,d18 :integer;
begin
ab1 =3;//intialize variables
cd= 4;
e33a =5 ;
show ( ab1 ); //display ab1

/* comuter the value of
the expression*/

d18=ab1*(cd+2* e33a)
show( d18 ); //display the value of d18
end

This is how the text file is supposed to look

program a2018;
var
ab1, cd, e33a,d18 :integer;
begin
ab1 =3;
cd= 4;
e33a =5 ;
show ( ab1 );
d18=ab1*(cd+2* e33a)
show( d18 );
end

This is how mine looks after running the program for reference
program a2018;
var

ab1, cd, e33a,d18 :integer;
begin
ab1 =3;
cd= 4;
e33a =5 ;
show ( ab1 );


d18=ab1*(cd+2* e33a)
show( d18 );
end
Why don't you check for an empty line(for-loop on line 17) before you add it to the vector?
Hello Tyler1,

When I ran the program with Thomas1965's suggestion the vector"lines" looks like this:

-		lines	{ size=16 }	std::vector<std::basic_string<char,std::char_traits<char>,std::allocator<char> >,std::allocator<std::basic_string<char,std::char_traits<char>,std::allocator<char> > > >
		[capacity]	19	int
+		[allocator]	allocator	std::_Compressed_pair<std::allocator<std::basic_string<char,std::char_traits<char>,std::allocator<char> > >,std::_Vector_val<std::_Simple_types<std::basic_string<char,std::char_traits<char>,std::allocator<char> > > >,1>
+		[0]	"program a2018;"	std::basic_string<char,std::char_traits<char>,std::allocator<char> >
+		[1]	"/* This program computes and prints the value"	std::basic_string<char,std::char_traits<char>,std::allocator<char> >
+		[2]	"of an expresson*/"	std::basic_string<char,std::char_traits<char>,std::allocator<char> >
+		[3]	"var"	std::basic_string<char,std::char_traits<char>,std::allocator<char> >
+		[4]	"//declare variables"	std::basic_string<char,std::char_traits<char>,std::allocator<char> >
+		[5]	"ab1, cd, e33a,d18 :integer;"	std::basic_string<char,std::char_traits<char>,std::allocator<char> >
+		[6]	"begin"	std::basic_string<char,std::char_traits<char>,std::allocator<char> >
+		[7]	"ab1 =3;//intialize variables"	std::basic_string<char,std::char_traits<char>,std::allocator<char> >
+		[8]	"cd= 4;"	std::basic_string<char,std::char_traits<char>,std::allocator<char> >
+		[9]	"e33a =5 ;"	std::basic_string<char,std::char_traits<char>,std::allocator<char> >
+		[10]	"show ( ab1 ); //display ab1"	std::basic_string<char,std::char_traits<char>,std::allocator<char> >
+		[11]	"/* comuter the value of"	std::basic_string<char,std::char_traits<char>,std::allocator<char> >
+		[12]	"the expression*/"	std::basic_string<char,std::char_traits<char>,std::allocator<char> >
+		[13]	"d18=ab1*(cd+2* e33a)"	std::basic_string<char,std::char_traits<char>,std::allocator<char> >
+		[14]	"show( d18 ); //display the value of d18"	std::basic_string<char,std::char_traits<char>,std::allocator<char> >
+		[15]	"end"	std::basic_string<char,std::char_traits<char>,std::allocator<char> >
Instead of deleting element 4 of the vector the program just erases or deletes the contents of the string leaving a blank line in the final output.

Hope that helps,

Andy
closed account (SECMoG1T)
thought i could give it a try

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#include <iostream>
#include <string>
#include <cctype>
#include <vector>
#include <fstream>

const std::string NO_DATA{};

///remove comments, empty lines or lines containing spaces only
std::string sanitize_data(std::string data,bool& in_multiline_comment/*for multiline comments*/)
{
    auto just_spaces = [](const std::string strs){
                            for(auto c : strs)
                             if(!std::isspace(c))
                                    return false;
                             return true;
                           };
    auto found      = [](auto pos){return (pos != std::string::npos);};

    ///check for multi-line comments
    if(in_multiline_comment)///this comment spreads in multiple lines
    {
        auto multiline_comm_end = data.find("*/");
        if(found(multiline_comm_end))
        {
            data ={data.begin()+multiline_comm_end+2,data.end()};
            in_multiline_comment = false;
        }

        else
            return NO_DATA;///the content of variable data is a comment spreading over multiple lines, discard
    }

    else
    {
        std::size_t multiline_start = data.find("/*"),multi_end=data.find("*/");
        if(found(multiline_start) && found(multi_end))
        {
            data = {data.begin(),data.begin()+multiline_start};
            data += {data.begin()+multi_end+2,data.end()};
        }

        else if(found(multiline_start) && !found(multi_end))
        {
            data = {data.begin(),data.begin()+multiline_start};
            in_multiline_comment = true;
        }
    }

    ///check for line comments
    auto line_comment = data.find("//");

    if(found(line_comment))
      data ={data.begin(),data.begin()+line_comment};


    if(data.empty() || data =="\n" || just_spaces(data))
        return NO_DATA;

    return data;
}



std::vector<std::string> sanitize_file(const std::string& file_name)
{
    std::vector<std::string> data_vec;

    std::ifstream my_file(file_name);

    if(!my_file)
    {
        std::cout<<"failed to open the file\n";
        return data_vec;
    }

    std::string str;
    bool multiline_flag = false;

    while(std::getline(my_file,str,'\n'))
    {
        str = sanitize_data(str,multiline_flag);

        if(str != NO_DATA)
          data_vec.push_back(str);
    }

    return data_vec;
}

int main()
{
    auto vec = sanitize_file("test.txt");

    for(auto str : vec)
        std::cout<<str<<"\n";
}

---

program a2018;
var
ab1, cd, e33a,d18 :integer;
begin
ab1 =3;
cd= 4;
e33a =5 ;
show ( ab1 );
d18=ab1*(cd+2* e33a)
show( d18 );
end
Last edited on
Topic archived. No new replies allowed.