i'm working on a project that searches for valid email addresses, identifies and removes duplicate addresses, and then posts them to a separate file.
my problem is removing the duplicate addresses... here's what i have so far, any help would be greatly appreciated as i can't seem to find a fitting answer from any previous posts. cheers!
#include <deque>
#include <fstream>
#include <iostream>
#include <string>
usingnamespace std;
bool isValidEmailCharacter(char c)
{
bool result = false;
if (c >= 'A' && c <= 'Z') result = true;
elseif (c >= 'a' && c <= 'z') result = true;
elseif (c >= '0' && c <= '9') result = true;
elseif (c == '.' || c == '-' || c == '+') result = true;
return result;
} // isValidEmailCharacter
bool noDuplicate(deque<string>& email, string s)
{
bool found = false;
int i;
for (i = 0; i < email.size(); i++)
{
if (email[i] != s)
{
found = true;
break;
} // if
} // for
return found;
} // noDuplicate
void printEmails(deque<string>& email)
{
int i;
for (i = 0; i < email.size(); i++)
{
cout << email[i] << endl;
} // for
} // printEmails
int main()
{
deque<string> email; // email list
string input; // name of input file
string output; // name of output file
string lineFromFile; // line of input file
string defaultInput = "fileContainingEmails.txt";
string defaultOutput = "copyPasteMyEmails.txt";
ifstream fin;
ofstream fout;
int i; // iterator
cout << "This program was made to find any email addresses present in a user-designated \ntext file, extract them and post them in the output file of the user's choice.\n" << endl;
cout << "Enter input filename [default: fileContainingEmails.txt]: ";
getline(cin, input);
if (input == "")
input = defaultInput;
cout << "Enter output filename [default: copyPasteMyEmails.txt]: ";
getline(cin, output);
if (output == "" && input == "")
output = defaultOutput;
elseif (output == "" && input != "")
output = input;
cout << "Input file: " << input << endl;
cout << "Output file: " << output << endl;
fin.open(input.c_str());
if (!fin.good()) throw"I/O error";
while (fin.good()) // search input file
{
getline(fin, lineFromFile);
for (i = 0; i < lineFromFile.length(); i++)
{
bool hasDot = false;
if (lineFromFile[i] == '@' && i && i - 1 > 0 && i + 1 < lineFromFile.length())
{
int s;
for (s = i - 1; s > 0; s--) // searches start of email for valid characters
{
if (isValidEmailCharacter(lineFromFile[s]) == false)
{
s++;
break;
}
} // for
int e;
for (e = i + 1; e <= lineFromFile.length(); e++) // searches end of email for valid characters
{
if (lineFromFile[e] == '.') hasDot = true;
if (isValidEmailCharacter(lineFromFile[e]) == false) break;
} // for
i = e + 1;
if(hasDot) // dot validation
{
string s = lineFromFile.substr(s, e - s);
if(noDuplicate) email.push_back(lineFromFile.substr(s, e - s));
} // if
} // if
} // for
} // while
fin.close();
printEmails(email);
cout << endl;
if (email.size() > 0)
{
cout << email.size() << " email address(es) were found, and copied to the file: " << output << endl;
fout.open(output.c_str()); // write email addresses to text file
for (i = 0; i < email.size(); i++)
{
fout << email[i] << "; ";
} // for
fout.close();
} // if
else
cout << "Sorry, no email addresses were found in the file: " << input << endl;
return 0;
} // main
for (e = i + 1; e <= lineFromFile.length(); e++)
{
if (lineFromFile[e] == '.') //out of bounds
The logic in noDuplicate is wrong. If you found an element that is equal to the one you are testing, then you could say that the element is already in the list.
All you need to do is convert each email address to lowercase, then add them into a set<string> container. This container will only keep unique entries. Once you've loaded them all, write the contents of the set out to a file :)