1

I have written a program that looks for valid email addresses in a text file that has many emails both valid & invalid.

& then outputs the vaild emails in another text file.

The problem i have is creating a substring or object that will read the line left of the @ symbol & read true if there are multiple '.'

for example: read this line of text as a valid email - Jimmy.Cole@yahoo.com

&

if there are more '.' after the @ symbol read as false. here is my code:

#include<iostream>
#include<string>
#include<fstream>
using namespace std;
const string DEFAULT_INPUT_FILENAME="fileContainingEmails.txt";
const string DEFAULT_OUTPUT_FILENAME="copyPasteMyEmails.txt";
const int N=1000;   //assuming atmost 1000 email 

//function prototypes
bool isNotDuplicateEmail(string emails[], int emailCount, string emailTemp);
bool isValidEmail(string emails[], string emailTemp);
void WriteToFile(string emails[], int emailCount, string outputFile);
void DisplayEmailsToConsole(string emails[], int emailCount);
bool isValidEmailChar(char c);
int countOccurenceOfAt(string emailTemp);
int countOccurenceOfDot(string emailTemp);

int main()
{
    //variable declaration
    ifstream fin;
    string inputFile;
    string outputFile;
    int emailCount=0;
    string emails[N];


    //ask user for input and output file name
    cout<<"Enter input file name: ";
    getline(cin,inputFile);
    cout<<"Enter output file name: ";
    getline(cin,outputFile);

    //check for default names
    if(inputFile=="")
        inputFile=DEFAULT_INPUT_FILENAME;
    if(outputFile=="")
        outputFile=DEFAULT_OUTPUT_FILENAME;

    cout<<"Input File Name = "<<inputFile<<endl;
    cout<<"Output File Name = "<<outputFile<<endl;

    //open input file
    fin.open(inputFile.c_str());
    //if file does not exists, display error message and return
    if(!fin)
    {
        cout<<"ERROR: "<<inputFile<<" doen not exists."<<endl;
        return 1;
    }

    string emailTemp;
    /*read each line from file, and if it is not a duplicate email and a valid email
    then add it to email list and increment count*/
    while(getline(fin,emailTemp))
    {
        if(isNotDuplicateEmail(emails,emailCount,emailTemp) && isValidEmail(emails,emailTemp))
        {
            emails[emailCount]=emailTemp;
            emailCount++;
        }
    }
    //close input file
    fin.close();

    //call function to write emails to output file and console
    if(emailCount>0)
    {
        WriteToFile(emails, emailCount, outputFile);
        cout << endl << emailCount << " email addresses were found, and copied to the file " << outputFile<<endl;
        DisplayEmailsToConsole(emails, emailCount);
    }
    else
    {
        cout << "Sorry, no email addresses were found in the file " << inputFile << endl;
    }


    cout << endl<< "Press ENTER to continue..." << endl;
    cin.get();
    return 0;
}

bool isNotDuplicateEmail(string emails[], int emailCount, string emailTemp)
{
    /*Check each email address and if a duplicate email is found, return false*/
    /*If email is not a duplicate email, return true*/
    for(int i=0;i<emailCount;i++)
    {
        if(emailTemp==emails[i])
            return false;
    }
    return true;
}
bool isValidEmail(string emails[], string emailTemp)
{
    if(emailTemp=="")
        return false;
    /*If there are more than 1 @*/
    if (countOccurenceOfAt(emailTemp)!=1)
        return false;
    /*If there are more than 1 .*/
    if(countOccurenceOfDot(emailTemp)!=1)
        return false;


    int index1=-1;
    int index2=-1;


    //check each character for valid email char
    for (int i=0; i<emailTemp.size(); i++)
    {
        if(!isValidEmailChar(emailTemp[i]))
            return false;
    }

    //find index of @ and .
    index1=emailTemp.find_first_of('@');
    index2=emailTemp.find_first_of('.');

    if(index1<1)
        return false;
    if(index2<index1)
        return false;
    if((index2-index1)==1)
        return false;

    if(((emailTemp.size()-1)-index2)==0)
        return false;

    return true;
}

bool isValidEmailChar(char c)
{
    /* check for valid email characters 0-9, @, ., A-Z, a-z, _, +, - */
    if(c>='0' && c<='9')
        return true;
    if(c>='A' && c<='Z')
        return true;
    if(c>='a' && c<='z')
        return true;
    if(c=='@' || c=='.' || c=='_' || c=='+' || c=='-')
        return true;

    return false;

}

/*Function to count occurence of @ in email address*/
int countOccurenceOfAt(string emailTemp)
{
    int count=0;
    for(int i=0;i<emailTemp.size();i++)
    {
        if(emailTemp[i]=='@')
            count++;
    }
    return count;
}

/*Function to count occurence of . in email address*/
int countOccurenceOfDot(string emailTemp)
{
    int count=0;
    for(int i=0;i<emailTemp.size();i++)
    {
        if(emailTemp[i]=='.')
            count++;
    }
    return count;
}

void DisplayEmailsToConsole(string emails[], int emailCount)
{
    /*Display console message and then display each email address, one at a line*/
    cout << endl << endl << "You can open the output file and copy/paste its contents into the \"to\", \"cc\", or \"bcc\" field of any email message. It is best to use the \"bcc\" field so that everyone's email address does not appear in the message, to protect their privacy"<<endl;
    cout << endl << "Email List is: "<< endl;
    for (int i=0; i<emailCount; i++)
    {
        cout << emails[i] << endl;
        cout << endl;
    }
}

void WriteToFile(string emails[], int emailCount, string outputFile)
{
    /*Open file and write each email address to output file followed by ; */
    ofstream fout;
    fout.open(outputFile.c_str());
    for(int i=0; i<emailCount; i++)
    {
        fout << emails[i] << "; " << endl;
    }
    //close file
    fout.close();
}
Deanie
  • 2,316
  • 2
  • 19
  • 35
  • 6
    That's a lot of code. You should narrow it down a little, `WriteToFile` f.ex. is just noise in a question about substrings and input validation. (IMHO) – jrok Jul 23 '12 at 17:54
  • have a look at this other question: http://stackoverflow.com/questions/201323/using-a-regular-expression-to-validate-an-email-address – betabandido Jul 23 '12 at 18:09

3 Answers3

1

Following code is from the book Secure Programming Cookbook for C and C++:

#include <string.h>

int spc_email_isvalid(const char *address) {
  int        count = 0;
  const char *c, *domain;
  static char *rfc822_specials = "()<>@,;:\\\"[]";

  /* first we validate the name portion (name@domain) */
  for (c = address;  *c;  c++) {
    if (*c == '\"' && (c == address || *(c - 1) == '.' || *(c - 1) == 
        '\"')) {
      while (*++c) {
        if (*c == '\"') break;
        if (*c == '\\' && (*++c == ' ')) continue;
        if (*c <= ' ' || *c >= 127) return 0;
      }
      if (!*c++) return 0;
      if (*c == '@') break;
      if (*c != '.') return 0;
      continue;
    }
    if (*c == '@') break;
    if (*c <= ' ' || *c >= 127) return 0;
    if (strchr(rfc822_specials, *c)) return 0;
  }
  if (c == address || *(c - 1) == '.') return 0;

  /* next we validate the domain portion (name@domain) */
  if (!*(domain = ++c)) return 0;
  do {
    if (*c == '.') {
      if (c == domain || *(c - 1) == '.') return 0;
      count++;
    }
    if (*c <= ' ' || *c >= 127) return 0;
    if (strchr(rfc822_specials, *c)) return 0;
  } while (*++c);

  return (count >= 1);
}
perreal
  • 94,503
  • 21
  • 155
  • 181
1

Take a look at this link: http://www.codeproject.com/Articles/22777/Email-Address-Validation-Using-Regular-Expression

You can use regular expressions to take care of pattern matching with much better performance.

hashtpaa
  • 389
  • 2
  • 11
1

If all you need to know is if there are multiple periods before the @ symbol, then just do:

if (email.find_first_of('.') == email.find_last_of('.'))

If that expression returns true, then you know that the string has only one period (or none).

Mike Belotti
  • 405
  • 3
  • 8
  • Where is the check before the @ symbol? This returns false for "Jimmy.Cole@yahoo.com". – Marlon Jul 23 '12 at 18:16
  • Let "email" equal the part of the address before the @ symbol. So, let "email" equal "Jimmy.Cole" – Mike Belotti Jul 23 '12 at 18:21
  • hey Mike, i dont know where i should place this line of code. I have multiple emails in a text file with only a limited number of valid or true emails. my program reads the true emails fine with 1 exception. it cannot read emails like jonny.v@gmail.com bc of the . before the @ symbol. i cant seem to find the correct line to place this on. i think my head is overloaded or something. – riley hopkins Jul 23 '12 at 20:41
  • I'm thinking it'll go in `isValidEmail`. You can split up `emailTemp` into two strings--use `substr` to grab what's before the @ and what's after, storing them in separate strings. Once you have those, you can check them for whatever would make them invalid--so, you could take the "before" string and run it through that if check I mentioned--if that returns true, then you know you're good as far as periods go. That help? – Mike Belotti Jul 23 '12 at 20:53