need program debugger

the following C++ program is code for extracting email addresses from an external text file. the program is not working 100 percent. please debug or help to get this program working.


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
//pirooz vahedian



#include <iostream>
using namespace std;
#include <fstream>
#include <string>
#include <cctype>
#include <algorithm>

class toLower {public: char operator() (char c) const {return tolower(c) ;}};

bool isValidLocalEmailCharacter(char c) {

  bool validCheck = false;
  if ((c>='A' && c<='Z') || (c>='a' && c<='z') || (c>='0' && c<='9') || (c=='.') || (c=='!') || (c=='#') || (c=='$') || (c=='%') || (c=='&') || (c==39) || (c=='*') || (c=='+') || (c=='-') || (c=='/') || (c=='=') || (c=='?') || (c=='^') || (c=='_') || (c=='`') || (c=='{') || (c=='|') || (c=='}') || (c=='~'))
    {
    validCheck = true;
    }
  return validCheck;
}

bool isValidDomainEmailCharacter(char c) {

  bool validCheck = false;
  if ((c>='A' && c<='Z') || (c>='a' && c<='z') || (c>='0' && c<='9') || (c=='.') || (c=='-') || (c=='+'))
    {
    validCheck = true;
    }
  return validCheck;
}
 
bool isDuplicate(string theAddress, string* address, int SIZE) {
  bool checkDuplicate = false;
  for (int i = 0; i < SIZE; i++)
    {
	string convertedAddress1 = address[i];
    transform(convertedAddress1.begin(), convertedAddress1.end(), convertedAddress1.begin(), toLower());
	  
    string convertedAddress2 = theAddress;
    transform(convertedAddress2.begin(), convertedAddress2.end(), convertedAddress2.begin(), toLower());
	  
    if (convertedAddress1 == convertedAddress2) checkDuplicate = true;
    }
  return checkDuplicate;
}

int main() { // Entry point

  ifstream fin;
  ofstream fout;

  // input file
  string inputFile; 
  cout << "Enter input file: [ blank for default] ";
  getline(cin, inputFile);
  if (inputFile.length() == 0)
    {
	inputFile = "fileContainingEmails.txt";
	cout << "fileContainingEmails.txt has been set as input" << endl;
	}
  
  // output file
  string outputFile; 
  cout << "Enter output file: [blank for default] ";
  getline(cin, outputFile);
    if (outputFile.length() == 0)
    {
	 cout << "copyPasteMyEmails.txt has been set as output file." << endl;
	outputFile = "copyPasteMyEmails.txt";
	}
  
  // Read input file
  fin.open(inputFile.c_str());
  if (!fin.good()) throw "I/O error";

  // make list
  const int MAX_ADDRESSES = 1000; // capacity
  int numAddresses = 0; // start empty
  string address[MAX_ADDRESSES];
  
  cout << endl;
  
  while(fin.good()) // loop till end of text file is reached
  {
  
  string line;
  getline(fin, line);
  
  int startPosition = 0; 
  int endPosition = 0;
  int numberOfPeriods = 0;
  bool doublePeriod = false;
  bool periodEndofSentence = false;
  
  for(int i = 0; i < line.length(); i++) // check lines
    {
	if(line[i] == '@') // check characters to locate position of @ symbol
	  {
	  int atSym = i;
	  for(int m = atSym-1; m >= 0; m--) // go back from position of @ to beginning if needed
	    {
		if(line[m] == '.') // count periods
		  {
		  
		  if(line[m+1] == '.')
		    {
			doublePeriod = true;
		    }
		  numberOfPeriods = numberOfPeriods + 1;
	      }
		if(isValidLocalEmailCharacter(line[m]) == false) // check till invalid character is found
	      {
		    startPosition = m+1; // start point of the next string
		    break;
		  } 
        }
		

		
	  for(int p = atSym+1; p <= line.length(); p++) // start from position of @ to end
	    {
		if(line[p] == '.') // count periods
		  {
		  if(line[p+1] == '.') // break if period is after @ symbol
		  	{
			  doublePeriod = true;
			  
		    }
		  numberOfPeriods = numberOfPeriods + 1;
	      }
	    if(isValidDomainEmailCharacter(line[p]) == false) // check till invalid character is found
	      {
		    endPosition = p;
			break;
		  }
        
		}
		int addressLength = endPosition - startPosition; // Calculate length of address
	    string theAddress = line.substr (startPosition,addressLength); // Capture substring
        cout << theAddress << endl;
	
		
		// check emails, if valid add to list.
        if (numAddresses < MAX_ADDRESSES) // check the list
	      {
		  if (numberOfPeriods > 0)  // Check if enough periods exist
		    {
			if ((line[startPosition] !='.') && (line[endPosition-1] !='.') || (line[endPosition] == ' ') && (line[endPosition-1] == '.')) // Does not start or end with period
		      {
			  if ((doublePeriod == false) && ( line[atSym+1] !='.')) // Does not have consecutive periods
			    {
			    if (addressLength < 254 && atSym - startPosition < 64 && endPosition - atSym < 253 ) // Restrict length
		   	      {
		          if (isDuplicate(theAddress,address,numAddresses) == false) // Skip duplicate addresses
		            {
		            address[numAddresses++] = theAddress; // Add to list
		   	        cout << " VALID" << endl;
		            }// if skip duplicate
		          else
		            {
		            cout << " INVALID! Duplicate." << endl;
		            }
		          } // if restrict length
		        else
		          {
		          cout << " INVALID! Max/min length expectation fail." << endl;
	              }
			    } // if double period
			  else
		        {
		          cout << " INVALID! Consecutive periods or period after @ symbol." << endl;
	            }
			  } // if start or end with period
			else
			  {
		      cout << " INVALID! Starts or ends with a period." << endl;
			  }
			}// if enough periods
		  else
		    {
		    cout << " INVALID! Not enough periods or email address incomplete." << endl;
		    }
			numberOfPeriods = 0; 
			 cout << endl;
		  } 
		 
      }
	} 
  } // While
  fin.close();
  
  if (numAddresses != 0) // open and write to the file if emails are found
  {
    fout.open(outputFile.c_str());
	cout << "Addresses Saved"<< endl;
    for (int i = 0; i < numAddresses; i++)
      { 
        cout << address[i] << endl;
        fout << address[i] << endl;
      }
	  
    cout << endl;
    cout << numAddresses << " emails were found and copied to the file " << outputFile << "." << endl;
	cout << endl;
	cout << "Open " << outputFile << " and copy its contents into the " << endl;
	cout << "'to', 'cc', or 'bcc' field of any email message." << endl;
	cout << endl;
	cout << "please use the 'bcc' field to keep the emails private." << endl;
	fout.close();
  }
  else // if no emails are in the file
    {
    cout << "The file doesnt have any emails. " << inputFile << endl;
    }

  return 0;
} // End Main


~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~
~~~~~~~~~~
~~~~~~~~~~
here is an input file content
wysiwyg
wheeeee
w00t
quadruple

"@."
@
derptard@gmail.com
derptard@gmail.c
pirooz@!.com

robertwright2132@gmail.com,
robertwright2132@gmail.com.
benleecooper@startpad.com,
professer@dvc.edu;
"@.
professer@dvc.ca.edu;
derp@.com
derp.@.com
derp.@com
derp.@gamil.com
$derp.@.com$
;derp.@com)(
<$$derp.@gamil.com$$>
derp#@$123@gmail.com.
sdf .derp#@$123@gmail.com
string convertedAddress2 = derp@gmail.com; a@a ;theAddress;

Last edited on
shouldn't be in the jobs section, please repost it the question in the right area and use code tags also.
Topic archived. No new replies allowed.