I'm very new to C and am struggling with proper memory management and am coming across numerous seg faults with my program.
My ultimate goal here is to take a text file and number of threads entered as user arguments, get the file size, and then split the file based on number of threads entered by the user.
Each thread will then read a portion of the file, and then extract tokens from the chunk its reading. If the token is larger than 5 chars, it is to be added to an array along with the number of times that token has occurred in the entire text. So eventually I am hoping to get a list of top n words of > 5 used in the text.
However this is maybe my third time ever using C and I am struggling quite a bit and am getting quite a few errors. I've been trying to work through this myself using valgrind and other debugging tools, but I'm pretty lost.
From valgrind, I get the following messages:
==27634== 1 errors in context 1 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634== at 0x50B7B4C: vfprintf (vfprintf.c:1642)
==27634== by 0x50BFF25: printf (printf.c:33)
==27634== by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634==
==27634==
==27634== 1 errors in context 2 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634== at 0x50B7014: vfprintf (vfprintf.c:1642)
==27634== by 0x50BFF25: printf (printf.c:33)
==27634== by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634==
==27634==
==27634== 1 errors in context 3 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634== at 0x50B3875: _itoa_word (_itoa.c:179)
==27634== by 0x50B6F0D: vfprintf (vfprintf.c:1642)
==27634== by 0x50BFF25: printf (printf.c:33)
==27634== by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634==
==27634==
==27634== 1 errors in context 4 of 5:
==27634== Use of uninitialised value of size 8
==27634== at 0x50B386B: _itoa_word (_itoa.c:179)
==27634== by 0x50B6F0D: vfprintf (vfprintf.c:1642)
==27634== by 0x50BFF25: printf (printf.c:33)
==27634== by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634==
==27634==
==27634== 1 errors in context 5 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634== at 0x50B78DA: vfprintf (vfprintf.c:1642)
==27634== by 0x50BFF25: printf (printf.c:33)
==27634== by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634==
==27634== ERROR SUMMARY: 5 errors from 5 contexts (suppressed: 0 from 0)
I am also seeing the message that "Address xxx is 0 bytes after a block size 60 alloc'd"
I believe my issues lies within the line:
if (strcmp(words[i].word, token) == 0) {
inArray = i;
}
Is this because I am not properly allocating memory for wordStruct words? I'm not sure how to fix it, any help would be appreciated.
Thanks
Full code below
#include <time.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
typedef struct {
char word[50];
int count;
int totalWords;
} wordsStruct ;
struct argStruct {
FILE *file;
int start;
int end;
int count;
wordsStruct *words;
};
int stringLength(char s[]) {
int c = 0;
while (s[c] != '\0')
c++;
return c;
}
void groomString(char *line){
for (int i = 0; line[i] != '\0'; ++i) {
line[i] = tolower(line[i]);
while (!((line[i] >= 'a' && line[i] <= 'z') || line[i] == '\0')) {
for (int j = i; line[j] != '\0'; j++) {
line[j] = line[j + 1];
line[j] = '\0';
}
}
}
}
int counter(int n){
static int test;
test = n;
if(n = 0){
return test;
} else {
n++;
return n;
}
}
void processFile(void *input) {
struct argStruct params = *(struct argStruct *) input;
wordsStruct *words = params.words;
FILE *textFile = params.file;
int start = params.start;
int end = params.end;
int count = params.count;
int size = (end - start) + 10;
char delim[] = " \t\v\n\r";
char *readFile = (malloc(sizeof(char) * size +10));
fread(readFile, 1, size, textFile);
char *copy = (malloc(sizeof(char) * size +10));
strcpy(copy, readFile);
char *saveptr;
int inArray;
int length;
static int added;
char *token = strtok_r(copy, delim, &saveptr);
while (token) {
groomString(token);
length = stringLength(token);
if (length > 5) {
inArray = 0;
for (int i = 0; i < added; i++) {
if (strcmp(words[i].word, token) == 0) {
inArray = i;
}
}
if (inArray == 0) {
added++;
strcpy(words[added].word, token);
words[added].count = 1;
} else {
words[inArray].count++;
}
}
token = strtok_r(NULL, delim, &saveptr);
}
words->totalWords = added;
free(token);
free(readFile);
}
int main(int argc, char *argv[])
{
FILE *pfile;
int threadCount = 0, fileSize = 0, divide = 0;
wordsStruct *allWords = (wordsStruct *) malloc( sizeof(wordsStruct));
if (argc > 2)
{
pfile = fopen( argv[1], "r");
if (pfile == NULL){
perror("FILE OPEN FAILURE");
}
threadCount = atoi(argv[2]);
pthread_t * thread = malloc(sizeof(pthread_t)* threadCount *10);
fseek(pfile, 0, SEEK_END);
fileSize= ftell(pfile);
fseek(pfile, 0, SEEK_SET);
divide = (fileSize/threadCount);
struct argStruct arguments;
arguments.file = pfile;
arguments.words = allWords;
int j = 0;
for(int i = 0; i < threadCount; i++) {
arguments.start = j;
arguments.end = j+divide;
arguments.count = i;
struct argStruct *passArgs = malloc(sizeof *passArgs);
*passArgs = arguments;
pthread_create(&thread[i], NULL, (void *) processFile, passArgs);
j+=divide;
}
for (int i = 0; i < threadCount +1; i++){
pthread_join(thread[i], NULL);
}
fclose(pfile);
} else {
printf("Please enter text file name and number of threads");
}
return 0;
}