2

I'm trying to output the number of email addresses in a text file in HTML format using regex, I can open the file and read the file but I'm not sure how to use regex to search the file for the regex pattern.

update: ok I used a test text file and it works but not on the actual text file that is in HTML format, it outputs the phone numbers but not the number of email addresses.

int _tmain(int argc, _TCHAR* argv[])
{
ifstream htmlText;
string line;


string eduEmail = "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.+-]+\.edu$";
string nonEduEmail = "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.+-]+\.com$";
string phoneNumbers = "[[:digit:]]{2}-[[:digit:]]{3}-[[:digit:]]{4}";

int eduEmails = 0;
int nonEduEmails = 0;
int num_phoneNumbers = 0;

htmlText.open("ltudirectory.txt");


if (htmlText.good())
{
    while (getline(htmlText, line))
    {
        cout << line << endl;
        regex r_edu(eduEmail); //the pattern to search for edu emails
        regex r_com(nonEduEmail); //the pattern to search for .com emails
        regex r_phoneNumbers(phoneNumbers); //the pattern to search for .com    emails


        bool eduEmail_match = regex_search(line, r_edu);
        bool nonEmail_match = regex_search(line, r_com);
        bool phoneNumber_match = regex_search(line, r_phoneNumbers);


        if (eduEmail_match)
        {
            ++eduEmails;
        }
        if (nonEmail_match)
        {
            ++nonEduEmails;
        }
        if (phoneNumber_match)
        {
            ++num_phoneNumbers;
        }
    }
}


htmlText.close();
cout << "Emails ending with .edu : " << eduEmails << endl;
cout << "Emails ending with .com : " << nonEduEmails << endl;
cout << "Number of Phone Numbers: " << num_phoneNumbers << endl;


system("pause");
return 0;
}
BigDuke6
  • 117
  • 1
  • 13

1 Answers1

4
int _tmain(int argc, _TCHAR* argv[])
{
ifstream htmlText;
string line;
string eduEmail = "(\\w+)(\\.|_)?(\\w*)@(\\w+)(\\.(\\w+))+";


int testNum = 0;

list<string> l;


htmlText.open("ltudirectory.txt");
if (htmlText.good())
{
    while (getline(htmlText, line))
    {
        regex e(eduEmail); // the pattern
        bool match = regex_search(line, e);
        if (match) {
          ++testNum;
       }
    }
}

htmlText.close();

system("pause");
return 0;
}
Gumboy
  • 467
  • 4
  • 8
  • 1
    is there a faster way search because the text file is kinda long – BigDuke6 Nov 29 '16 at 03:03
  • 1
    You could read the file into memory, then perform your search: http://stackoverflow.com/questions/17925051/fast-textfile-reading-in-c – Gumboy Nov 29 '16 at 03:57