My program emulates the behavior of the grep command. That is, when executing ./main grep <pattern> <file.txt>
, it will search line by line using a buffer and write the line number where the match was found, as well as the content of the line. The program works correctly when executed without threads, but when executed with threads, it writes the matches found in an infinite loop and the thread counter grows infinitely.
You only need to compile the program with gcc -o main main.c
and execute it with ./main grep <pattern> <file.txt>
, where the pattern is the word or part of it that you want to search for and see in which line number and line it appears, and the file can be any .txt file that contains information, such as sample file.
#include "regex.h"
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define BUFFER_SIZE 100
#define NUM_THREADS 5
typedef struct {
char *pattern;
char *filename;
} Parameter_Handler;
typedef struct {
char buffer[BUFFER_SIZE];
int current_offset;
FILE *file;
int num_line;
int char_count;
regex_t regex;
Parameter_Handler ph;
pthread_mutex_t mutex;
int stop;
} File_Handler;
typedef struct {
int id;
File_Handler *fh;
} Thread_manager;
void init_parameter_handler(Parameter_Handler *ph, char *args[], int n) {
if (n < 4) {
printf("Parameters number is inconsistent");
exit(1);
}
if (strcmp(args[1], "grep")) {
printf("Invalid command %s", args[1]);
exit(1);
}
ph->pattern = args[2];
ph->filename = args[3];
}
void init_file_handler(File_Handler *fh, int argc, char *argv[]) {
fh->current_offset = fh->num_line = fh->char_count = fh->stop = 0;
fh->ph = (Parameter_Handler){"", ""};
init_parameter_handler(&fh->ph, argv, argc);
pthread_mutex_init(&fh->mutex, NULL);
fh->file = fopen(fh->ph.filename, "r");
if (fh->file == NULL) {
printf("Could not open the file\n");
exit(1);
}
}
void check_match(File_Handler *fh) {
int ret = regcomp(&fh->regex, fh->ph.pattern, REG_EXTENDED);
if (ret != 0) {
printf("Failed to compile regex.\n");
exit(1);
}
char *token = strtok(fh->buffer, "\n");
while (token != NULL) {
fh->num_line++;
ret = regexec(&fh->regex, token, 0, NULL, 0);
if (ret == 0)
printf("[%d] %s\n", fh->num_line, token);
token = strtok(NULL, "\n");
}
regfree(&fh->regex);
}
void restore_offset(File_Handler *fh, int offset) {
do {
offset--;
} while (fh->buffer[offset - 1] != '\n');
fh->current_offset += offset;
strrchr(fh->buffer, '\n')[1] = '\0';
fseek(fh->file, fh->current_offset, SEEK_SET);
}
int read_fragment(File_Handler *fh) {
memset(fh->buffer, 0, sizeof(fh->buffer));
int offset = fread(fh->buffer, sizeof(char), BUFFER_SIZE, fh->file);
if (offset == BUFFER_SIZE) {
if (fh->buffer[offset - 1] != '\n') {
restore_offset(fh, offset);
} else
fh->current_offset += offset;
check_match(fh);
return 0;
} else {
check_match(fh);
return 1;
}
}
void *func(void *arg) {
File_Handler *fh = (File_Handler *)arg;
int status;
while (1) {
pthread_mutex_lock(&fh->mutex);
status = read_fragment(fh);
pthread_mutex_unlock(&fh->mutex);
if (status) {
break;
}
}
}
int main(int argc, char *argv[]) {
File_Handler fh;
init_file_handler(&fh, argc, argv);
pthread_t thread[NUM_THREADS];
int t_id[NUM_THREADS];
for (int i = 0; i < NUM_THREADS; i++) {
pthread_create(&thread[i], NULL, func, (void *)&fh);
}
for (int i = 0; i < NUM_THREADS; i++)
pthread_join(thread[i], NULL);
pthread_mutex_destroy(&fh.mutex);
return 0;
}
I want the threads to access the File_Handler, process a part of the file each, and when the file is finished, all of them together generate and show the results.