I am trying to use C's PCRE library to then use regex for matching multiple links to USA Senator's from the webpage.
So to do this I need the regex to be able to return 100 matches for me so I can then print out the web addresses to the emails.
From my research, it looks like the PCRE library is going to be the way to do this but I don't know how to get multiple matches from a string.
This is the regex pattern that I am going to be using
Contact:\s+<a\s+(?:[^>]*?\s+)?href=(["'])(.*?)\1
Here is my current code that I am going to be working with
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <unistd.h>
#include <pcre.h>
int main() {
// initiate all used Variables
FILE *file;
char *buffer;
long size;
//Wget on Senate webpage
system("wget -q http://www.senate.gov/general/contact_information/senators_cfm.cfm");
// Attempt to open file
file = fopen("senators_cfm.cfm", "r");
if(file == NULL){
printf("Was unable to open file \n");
return 1;
}
//Attempt to read to end of file
fseek(file, 0L, SEEK_END);
//Determine the number of bytes that were in the file
size = ftell(file);
//Attempt to allocate the number of bytes needed
buffer = (char*) calloc(size, sizeof(char));
if(buffer == NULL){
printf("Unable to allocate memory needed \n");
return 1;
}
//Reset the reader to start of file
rewind(file);
//Read whole file into buffer
fread(buffer, sizeof(char), size, file);
//Close file
fclose(file);
//Free all information that we allocated memory for
free(buffer);
unlink("senators_cfm.cfm");
return 0;
}