This'll work for you:
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <regex.h>
int main(void) {
const char tests[2][4] = {"-l4", "-lm"};
const char match[] = "-l[[:digit:]]+";
regex_t rmatch;
if ( regcomp(&rmatch, match, REG_EXTENDED) != 0 ) {
perror("Error compiling regex");
return EXIT_FAILURE;
}
for ( int i = 0; i < 2; ++i ) {
if ( regexec(&rmatch, tests[i], 0, NULL, 0) != 0 ) {
printf("No match for '%s'.\n", tests[i]);
} else {
printf("Matched '%s'.\n", tests[i]);
}
}
return 0;
}
Output:
paul@local:~/src/c/scratch$ ./regex
Matched '-l4'.
No match for '-lm'.
paul@local:~/src/c/scratch$
EDIT: In the code you posted, you've got a couple of problems:
This line:
if(regcomp(®ex,"-l[[digit:]]+",0)){
should be:
if( regcomp(®ex, "-l[[:digit:]]+", REG_EXTENDED) ) {
since you're using extended regular expressions. If you change this line, your pattern will successfully match.
Your segmentation fault is actually nothing to do with your regular expressions, and comes from calling this:
fclose(f);
when on an execution path where you never successfully opened a file. You should change:
FILE *f;
to:
FILE *f = NULL;
and change:
fclose(f);
to:
if ( f ) {
fclose(f);
}
Making yourself familiar with gdb will go a long, long way towards getting you able to track these things down yourself.
Here's a modified version of your own code that'll work and includes some basic error-checking:
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
void reversetext(FILE * f);
int main(int argc, char *argv[]) {
regex_t rmatch;
FILE *f = NULL;
int c;
if ( argc < 2 ) {
printf("You need to enter at least one command line argument.\n");
return EXIT_FAILURE;
}
if ( regcomp(&rmatch, "-l[[:digit:]]+", REG_EXTENDED) ) {
printf("Could not compile regex.\n");
return EXIT_FAILURE;
}
if ( strcmp(argv[1], "-r") == 0 && argc > 2 ) {
printf("argv[1] is -r\n");
if ( (f = fopen(argv[2], "r")) == NULL ) {
fprintf(stderr, "Couldn't open file %s\n", argv[2]);
return EXIT_FAILURE;
}
reversetext(f);
} else if (regexec(&rmatch, argv[1], 0, NULL, 0) == 0) {
printf("Matched '%s' to regex\n", argv[1]);
} else {
if ( (f = fopen(argv[1], "r")) == NULL ) {
fprintf(stderr, "Couldn't open file %s\n", argv[1]);
return EXIT_FAILURE;
}
while ( (c = getc(f)) != EOF) {
printf("%c", c);
}
}
if ( f ) {
fclose(f);
}
}
void reversetext(FILE * f) {
int c = getc(f);
if (c == EOF) {
return;
}
reversetext(f);
printf("%c", c);
}
Output:
paul@local:~/src/c/scratch$ ./regex2 -l4
Matched '-l4' to regex
paul@local:~/src/c/scratch$ ./regex2 -r fakefile
argv[1] is -r
Couldn't open file fakefile
paul@local:~/src/c/scratch$ ./regex2 -tribbles
Couldn't open file -tribbles
paul@local:~/src/c/scratch$ ./regex2 testfile
This is a test.
paul@local:~/src/c/scratch$ ./regex2 -r testfile
argv[1] is -r
.tset a si sihTpaul@local:~/src/c/scratch$
Note than when you're using getc()
and friends, they use int
s, not char
s. This is necessary in order to be able to store EOF
.
EDIT 2: Per the question in your comment, you need to do four things to match a sub-group, in this case, the numeric part of the match.
Set up an array of type regmatch_t
. You'll need at least two elements, since the first will match the entire regex, and you'll need at least a second for one sub-group. In the code below, I've added:
#define MAX_MATCHES 10
regmatch_t m_group[MAX_MATCHES];
Put parentheses around the part of the regex you want to extract. In the code below, I've changed:
"-l[[:digit:]]+"
to:
"-l([[:digit:]]+)"
Pass your regmatch_t
array to regexec()
when you call it, along with the size. In the code below, I've changed:
} else if (regexec(&rmatch, argv[1], 0, NULL, 0) == 0) {
to:
} else if (regexec(&rmatch, argv[1], MAX_MATCHES, m_group, 0) == 0) {
Cycle through the array and deal with each match. Everytime the rm_so
member of a regmatch_t
array element is not -1
, then you have a match. All I'm doing here is copying them to a buffer and printing them out:
} else if ( regexec(&rmatch, argv[1], MAX_MATCHES, m_group, 0) == 0 ) {
printf("Matched '%s' to regex\n", argv[1]);
for ( int i = 0; i < MAX_MATCHES && m_group[i].rm_so != -1; ++i ) {
char buffer[1000] = {0};
char * match_start = &argv[1][m_group[i].rm_so];
size_t match_size = m_group[i].rm_eo - m_group[i].rm_so;
size_t match_len = match_size > 999 ? 999 : match_size;
strncpy(buffer, match_start, match_len);
printf("Matched group %d was '%s'\n", i, buffer);
}
}
Here's updated code:
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
#define MAX_MATCHES 10
void reversetext(FILE * f);
int main(int argc, char *argv[]) {
regex_t rmatch;
regmatch_t m_group[MAX_MATCHES];
FILE *f = NULL;
int c;
if ( argc < 2 ) {
printf("You need to enter at least one command line argument.\n");
return EXIT_FAILURE;
}
if ( regcomp(&rmatch, "-l([[:digit:]])+", REG_EXTENDED) ) {
printf("Could not compile regex.\n");
return EXIT_FAILURE;
}
if ( strcmp(argv[1], "-r") == 0 && argc > 2) {
printf("argv[1] is -r\n");
if ( (f = fopen(argv[2], "r")) == NULL ) {
fprintf(stderr, "Couldn't open file %s\n", argv[2]);
return EXIT_FAILURE;
}
reversetext(f);
} else if ( regexec(&rmatch, argv[1], MAX_MATCHES, m_group, 0) == 0 ) {
printf("Matched '%s' to regex\n", argv[1]);
for ( int i = 0; i < MAX_MATCHES && m_group[i].rm_so && ; ++i ) {
char buffer[1000] = {0};
char * match_start = &argv[1][m_group[i].rm_so];
size_t match_size = m_group[i].rm_eo - m_group[i].rm_so;
size_t match_len = match_size > 999 ? 999 : match_size;
strncpy(buffer, match_start, match_len);
printf("Matched group %d was '%s'\n", i, buffer);
}
} else {
if ( (f = fopen(argv[1], "r")) == NULL ) {
fprintf(stderr, "Couldn't open file %s\n", argv[1]);
return EXIT_FAILURE;
}
while ( (c = getc(f)) != EOF) {
printf("%c", c);
}
}
if ( f ) {
fclose(f);
}
}
void reversetext(FILE * f) {
int c = getc(f);
if (c == EOF) {
return;
}
reversetext(f);
printf("%c", c);
}
Outputs:
paul@local:~/src/c/scratch$ ./regex2 -l4
Matched '-l4' to regex
Matched group 0 was '-l4'
Matched group 1 was '4'
paul@local:~/src/c/scratch$