0

I have a code called unscramble that takes two files, Jumbled.txt and dictionary.txt and finds if any words contain the same characters in both the files or not, for instance, here is a sample input for

Jumbled.txt:                                  
Hello
Wassup
Rigga
Boyka
Popeye

dictionary.txt:
olleH
Yello
elloH
lloeH
aggiR
ggiRa
giRag
yokaB
Bakoy
kaBoy
eyePop
poePye

and the output of the code above is:

Hello: olleH elloH lloeH
Wassup: NO MATCHES
Rigga: aggiR ggiRa giRag
Boyka: yokaB Bakoy kaBoy
Popeye: eyePop poePye

Here is my code that attempts to solve it:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_WORD_LENGTH 50
#define MAX_NUM_WORDS 500000

int compare_char(const void *a, const void *b) {
    return *(char*)a - *(char*)b;
}

void sort_word(char *word) {
    qsort(word, strlen(word), sizeof(char), compare_char);
}

int is_valid_word(char *jumbled_word, char *word) {
    char sorted_jumbled_word[MAX_WORD_LENGTH];
    char sorted_word[MAX_WORD_LENGTH];
    strcpy(sorted_jumbled_word, jumbled_word);
    strcpy(sorted_word, word);
    sort_word(sorted_jumbled_word);
    sort_word(sorted_word);
    return strcmp(sorted_jumbled_word, sorted_word) == 0;
}

int main(int argc, char *argv[]) {
    if (argc != 3) {
        printf("Usage: unscramble <dictionary> <jumbles>\n");
        exit(1);
    }
    char *dict_filename = argv[1];
    char *jumbles_filename = argv[2];

    char dictionary[MAX_NUM_WORDS][MAX_WORD_LENGTH];
    int num_words = 0;
    FILE *dict_file = fopen(dict_filename, "r");
    if (dict_file == NULL) {
        printf("Error: Could not open dictionary file %s\n", dict_filename);
        exit(1);
    }
    char line[MAX_WORD_LENGTH];
    while (fgets(line, sizeof(line), dict_file) != NULL) {
        // Remove trailing newline character
        line[strcspn(line, "\n")] = '\0';
        // Copy word into dictionary
        strcpy(dictionary[num_words], line);
        num_words++;
    }
    fclose(dict_file);

    // Loop over jumbled words file
    FILE *jumbles_file = fopen(jumbles_filename, "r");
    if (jumbles_file == NULL) {
        printf("Error: Could not open jumbled words file %s\n", jumbles_filename);
        exit(1);
    }
    while (fgets(line, sizeof(line), jumbles_file) != NULL) {
        line[strcspn(line, "\n")] = '\0';
        char sorted_word[MAX_WORD_LENGTH];
        strcpy(sorted_word, line);
        sort_word(sorted_word);
        int found_match = 0;
        for (int i = 0; i < num_words; i++) {
            if (is_valid_word(sorted_word, dictionary[i])) {
                if (!found_match) {
                    printf("%s:", line);
                    found_match = 1;
                }
                printf(" %s", dictionary[i]);
            }
        }
        if (!found_match) {
            printf("%s: NO MATCHES", line);
        }
        printf("\n");
    }
    fclose(jumbles_file);

    return 0;
}

However, after converting it into executable format and checking that Jumbled.txt AND dictionary.txt is available in the same directory, I get this error message:

xxxxxxxxx@LAPTOP-xxxxxxxx:~$ gcc -Wall -W -pedantic -o unscramble unscramble.c  
xxxxxxxxx@LAPTOP-xxxxxxxx:~$ vim Jumbled.txt
xxxxxxxxx@LAPTOP-xxxxxxxx:~$ vim dictionary.txt
xxxxxxxxx@LAPTOP-xxxxxxxx:~$ vim unscramble.c
xxxxxxxxx@LAPTOP-xxxxxxxx:~$ ./unscramble dictionary.txt Jumbled.txt
Segmentation fault
xxxxxxxxx@LAPTOP-xxxxxxxx:~$ 

what should I change and what is my problem?

Edit What I have so far:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_WORD_LENGTH 50
#define MAX_NUM_WORDS 500000

int compare_char(const void *a, const void *b) {
    return *(const char*)a - *(const char*)b;
}

void sort_word(char *word) {
    qsort(word, strlen(word), sizeof(char), compare_char);
}

int is_valid_word(const char *jumbled_word, const char *word) {
    char sorted_jumbled_word[MAX_WORD_LENGTH];
    char sorted_word[MAX_WORD_LENGTH];
    strcpy(sorted_jumbled_word, jumbled_word);
    strcpy(sorted_word, word);
    sort_word(sorted_jumbled_word);
    sort_word(sorted_word);
    return strcmp(sorted_jumbled_word, sorted_word) == 0;
}

int main(int argc, char *argv[]) {
    if (argc != 3) {
        printf("Usage: unscramble <dictionary> <jumbles>\n");
        return 1;
    }
    char *dict_filename = argv[1];
    char *jumbles_filename = argv[2];

    char (*dictionary)[MAX_WORD_LENGTH] = malloc(MAX_NUM_WORDS * sizeof(*dictionary));
    if(!dictionary) {
        printf("Error: malloc failed\n");
        return 1;
    }

    int num_words = 0;
    FILE *dict_file = fopen(dict_filename, "r");
    if (dict_file == NULL) {
        printf("Error: Could not open dictionary file %s\n", dict_filename);
        return 1;
    }

    char line[MAX_WORD_LENGTH];
    while (fgets(line, sizeof(line), dict_file) != NULL && num_words < MAX_NUM_WORDS) {
        // Remove trailing newline character
        line[strcspn(line, "\n")] = '\0';
        // Copy word into dictionary
        strncpy(dictionary[num_words], line, MAX_WORD_LENGTH - 1);
        num_words++;
    }
    fclose(dict_file);

    // Loop over jumbled words file
    FILE *jumbles_file = fopen(jumbles_filename, "r");
    if (jumbles_file == NULL) {
        printf("Error: Could not open jumbled words file %s\n", jumbles_filename);
        return 1;
    }

    while (fgets(line, sizeof(line), jumbles_file) != NULL) {
        // Remove trailing newline character
        line[strcspn(line, "\n")] = '\0';
        if (strlen(line) > MAX_WORD_LENGTH - 1) {
            printf("Error: Jumbled word %s is too long\n", line);
            continue;
        }
        char sorted_word[MAX_WORD_LENGTH];
        strcpy(sorted_word, line);
        sort_word(sorted_word);
        int found_match = 0;
        for (int i = 0; i < num_words; i++) {
            if (is_valid_word(sorted_word, dictionary[i])) {
                if (!found_match) {
                    printf("%s:", line);
                    found_match = 1;
                }
                printf(" %s", dictionary[i]);
            }
        }
        if (!found_match) {
            printf("%s: NO MATCHES", line);
        }
        printf("\n");
    }
    fclose(jumbles_file);

    free(dictionary);

    return 0;
}
John Kugelman
  • 349,597
  • 67
  • 533
  • 578
Deezel
  • 111
  • 6
  • I have already seen a question with this exact set of words, but the attempted solution was totally different. Have you tried this before, or was it your classmate? Just curious. – n. m. could be an AI Feb 19 '23 at 07:17
  • @n.m. it was me lol, I asked the same question previously but I wrote up my own code, now I'm dealing with a seg error – Deezel Feb 19 '23 at 07:20

2 Answers2

4

The first step is to run your program in a debugger to figure out where it segfaults:

$ gcc -g3 -Wall -W -pedantic -o unscramble unscramble.c
$ gdb ./unscramble
(gdb) set args dictionary.txt Jumbled.txt
(gdb) r
(gdb) bt
Program received signal SIGSEGV, Segmentation fault.
0x00005555555552c6 in main (argc=<error reading variable: Cannot access memory at address 0x7ffffe82657c>, argv=<error reading variable: Cannot access memory at address 0x7ffffe826570>) at 1.c:26
26      int main(int argc, char *argv[]) {

This is strange, i.e. memory corruption, before even starting, so you look at allocations and you see:

    char dictionary[MAX_NUM_WORDS][MAX_WORD_LENGTH];

which is 500k * 50 bytes or 25 MB. The default stack on my system is 8 MB. You could up your stack size with, say, ulimit -s 30000, and your program would run as it. It would be better to reduce memory usage, or use malloc() to allocate space on the heap instead:

    char (*dictionary)[MAX_WORD_LENGTH] = malloc(MAX_NUM_WORDS * sizeof(*dictionary));
    if(!dictionary) {
        printf("malloc failed\n");
        return 1;
    }
    // ...
    free(dictionary);

and it now returns:

Hello: olleH elloH lloeH
Wassup: NO MATCHES
Rigga: aggiR ggiRa giRag
Boyka: yokaB Bakoy kaBoy
Popeye: eyePop poePye
Allan Wind
  • 23,068
  • 5
  • 28
  • 38
  • I've done as you said but there is still a segmentation fault, I've tried to debug my code as well and did some error handling but to no avail. I've edited my original question to show what I have now after the error handling and after putting in what you've said. It's still segmentation error when I run it on Ubuntu – Deezel Feb 19 '23 at 06:22
  • I cannot reproduce the segfault with revised code. Are you sure you rebuild your program? If so, then I suggest you submit a new question with a stacktrace as it's a different problem with revised code. I added details above on how to get a stacktrace. – Allan Wind Feb 19 '23 at 07:13
  • did you create two text files, Jumbled.txt and dictionary.txt and then do ./unscramble dictionary.txt Jumbled.txt ? – Deezel Feb 19 '23 at 07:23
  • 1
    also I did create a new question as you said – Deezel Feb 19 '23 at 07:24
  • Yes, both directly and via gdb as noted above. – Allan Wind Feb 19 '23 at 07:25
  • Good job, from my perspective, you can accept the question but it also makes sense that you wait of the outcome of the new question. I suggest you remove the "Edit What I have so far:" and revised code from the question as not to confuse others. – Allan Wind Feb 19 '23 at 07:31
1

You put the dictionary array in main, so this array is going to live on main' s function stack; that's approximately 23.8M on the stack, over the usual stack size limit of almost all the operating systems (8M on Linux).

If you still want to use a bi-dimensional array, put this array outside main as a global variable, so it won't live on main's stack frame anymore. Alternatively use malloc to allocate this array, so it lives in the program's heap. See What and where are the stack and heap?

Harlan Wei
  • 374
  • 7