1

I'm trying to use child processes in my quicksort, such that the left half sorts in one child and the right half in another child. I had it working prior to the shmget implementation but now I believe I'm corrupting the array somewhere because all my values become zero after printing the array. Sorry if I'm making some dumb mistake somewhere, I'm trying to learn how to use fork and shmget but have been running into some trouble. I'm trying to take in a text file as a command line argument and given a delimiter such as ';' I have to remove the delimiter and identify the numbers in between, place them in an array and sort them using child processes. I have the parsing working, and the quicksort worked but now that I'm trying to implement the shared memory I've ran into some problems.

Thanks

I've looked at a few different examples, but the one that this is mostly based off is the geeksforgeeks example with mergesorting with fork. https://www.geeksforgeeks.org/concurrent-merge-sort-in-shared-memory/

#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include "fileParser.h"
#include "dataManagement.h"

int main(int argc, char *argv[]){
    char *file = argv[1];
    char delimiter = argv[2][0];
    MyArray *theArray = getArray(file, delimiter);

    size_t SHM_SIZE = theArray->length;

    theArray->key = IPC_PRIVATE;


    if((theArray->shmid = shmget(theArray->key, SHM_SIZE, IPC_CREAT | 0666)) < 0){
        perror("shmget");
        _exit(-1);
    }

    if ((theArray->shm_array = shmat(theArray->shmid, NULL, 0)) == (int *) -1)
    {
        perror("shmat");
        _exit(1);
    }

    printArray(theArray, theArray->length);
    quickSortFork(theArray, 0, theArray->length-1);
    printArray(theArray, theArray->length);

    if (shmdt(theArray->shm_array) == -1)
    {
        perror("shmdt");
        _exit(1);
    }

    if (shmctl(theArray->shmid, IPC_RMID, NULL) == -1)
    {
        perror("shmctl");
        _exit(1);
    }

    return 0;
}

dataManagement.h


#include <unistd.h>
#include <sys/wait.h>
#include "fileParser.h"

int partition(MyArray *arr, int low, int high);
void quickSortFork(MyArray *arr, int low, int high);
void swap(MyArray *arr, int a, int b);


void printArray(MyArray *arr, int length) {
    for(int i = 0; i < length; i++){
        printf("%d  ", arr->shm_array[i]);
    }
    printf("\n");
}


void quickSortFork(MyArray *arr, int low, int high){
    pid_t lpid, rpid;
    rpid = fork();
    if(low < high){
        int partitionIndex = partition(arr,low, high);
        if(rpid < 0){
            perror("Right child not created.\n");
            exit(-1);
        } else if(rpid == 0 ){
            printf("I am the right child!\tMy process id: %d\n",getpid());
            quickSortFork(arr, partitionIndex + 1, high);
            exit(EXIT_SUCCESS);
        } else {
            lpid = fork();
            if(lpid < 0){
                perror("Left child not created.\n");
            } else if (lpid == 0) {
                quickSortFork(arr, low, partitionIndex - 1);
                printf("I am the left child!\tMy process id: %d\n", getpid());
                exit(EXIT_SUCCESS);
            }
        }

        int status;

        waitpid(rpid, &status, 0);
        waitpid(lpid, &status, 0);
    }
}


int partition(MyArray *arr, int low, int high){
    int i = low, j = high;
    int pivot = arr->shm_array[(low+high)/2];
    while(i < j){
        while(arr->shm_array[i] < pivot)
            i++;
        while(arr->shm_array[j] > pivot)
            j--;
        if(i < j){
            swap(arr,i,j);
        }
    }
    return i;
}


void swap(MyArray *arr, int a, int b){
    int temp = arr->shm_array[a];
    arr->shm_array[a] = arr->shm_array[b];
    arr->shm_array[b] = temp;
}

fileParser.h


int findFileLength(FILE *inFile, char delimiter);
int *parseFileToArray(FILE *inFile, char delimiter, int length);

typedef struct {
    int shmid;
    key_t key;
    int length;
    int *shm_array;
} MyArray;


MyArray *getArray(char *fileName, char delimiter){
    FILE *numberFile = fopen(fileName, "r"); // open for reading

    if (numberFile == NULL) // unable to open file
        return NULL;
    MyArray *array = malloc(sizeof(MyArray));
    array->length = findFileLength(numberFile, delimiter);
    array->shm_array = parseFileToArray(numberFile, delimiter,array->length);

    return array;

}


int findFileLength(FILE *inFile, char delimiter){
    char c;
    int length = 0;
    c = fgetc(inFile);
    while(c != EOF){
        if(c != delimiter && c != '\n'){
            length++;
            while((c = fgetc(inFile)) != EOF && c != '\n' && c != delimiter);

        } else {
            c = fgetc(inFile);
        }
    }
    rewind(inFile); // resets the file pointer to the start
    return length;
}


int *parseFileToArray(FILE *inFile, char delimiter, int length){
    int *parsedFile = malloc(sizeof(int) * length);
    char c;
    char *stringInt = malloc(sizeof(char) * 100); // string that is used to combine multiple characters and convert to an integer
    int stringIntP = 0, parsedArrayP = 0; // pointers for our arrays, the first is for the string that determines the integer, the second is for our resulting array
    c = fgetc(inFile);
    while(c != EOF){
        if(c != delimiter && c != '\n'){

            for(;c != '\n' && c != delimiter; (c = fgetc(inFile)) != EOF){
                stringInt[stringIntP++] = c;
            }
            stringIntP = 0;
            parsedFile[parsedArrayP++] = atoi(stringInt); // convert string number to integer value
            memset(stringInt, 0, 100);  // clear the string that builds the integer value from chars
        } else {
            c = fgetc(inFile);
        }
    }
    for(int i = 0; i < length; i++){
        printf("%d ", parsedFile[i]);
    }
    printf("\n");
    fclose(inFile); // close the file after using
    free(stringInt);
    return parsedFile;
}

Expected output: the array passed in unsorted first. Then the array sorted.

Actual output: array filled with 0's and program doesn't finish executing

Camilo
  • 85
  • 8
  • Well, to start, in your fork routine, you're doing `rpid = fork();` _above_ `if(low < high){` so if that expression is false, you're creating a zombie process that you never wait for. I think you should move that below the `if` – Craig Estey Jan 29 '19 at 21:26
  • okay I'll correct that, thank you for the advice. – Camilo Jan 29 '19 at 21:30
  • 1
    Why are you using `shm*()` for this? Since you `fork()`, `mmap()` is the obvious (sane) choice. – EOF Jan 29 '19 at 21:37
  • Note, you are reading the file twice. Once to determine the number of delimiters/element and then again to read the values. That's very inefficient since file I/O is orders of magnitude slower than memory operations. You should dynamically allocate with `realloc` as needed and make only a single pass through the file. (using `strtol, strtod, etc..` to convert from the text to numeric values) – David C. Rankin Jan 30 '19 at 03:20

2 Answers2

5

There are several bugs. I was able to [finally] find them all and a working version is below.

Here's a summary:

  1. In the fork/sort function, the rpid = fork(); is done above the main if statement. If that if is false, a zombie process is created.
  2. The size of the shared area is too small. It is only the number of elements and not sizeof(int) * number_of_elements
  3. The data is read into a non-shared area. The shared area is then created and the pointer to the actual [non-shared] data is lost. There is no copy of the data into the shared area
  4. In the fork/sort function, the call to the partition function is done after the first fork call, so it is called by both parent and child, so they conflict/race.
  5. There are way too many processes being created and some of the fork calls fail because there are no more free process slots.

(1) As I mentioned above rpid = fork(); needs to go after if (low < high) to prevent the creation of a zombie process if the if statement is false. More about this below in section (4).


(2) Your shared memory area is too small. It causes a segfault during final printing.

This is incorrect:

size_t SHM_SIZE = theArray->length;

It needs to be:

size_t SHM_SIZE = sizeof(int) * theArray->length;

(3) You are creating theArray in non-shared memory from the call to getArray.

It sets shm_array from a call to parseFileToArray. This is still in non-shared memory.

Later, to get the shared area, you do:

theArray->shm_array = shmat(theArray->shmid, NULL, 0)

This returned value of shm_array is now in shared memory, but the data is still in the old value of shm_array [again, in non-shared memory]. The pointer to the actual data is lost forever.

To fix this, you'll need something like:

int *shm_array;
if ((shm_array = shmat(theArray->shmid, NULL, 0)) == (int *) -1) {
    perror("shmat");
    _exit(1);
}

int *oldptr = theArray->shm_array;
for (int idx = 0;  idx < theArray->length;  ++idx)
    shm_array[idx] = oldptr[idx];
free(oldptr);

theArray->shm_array = shm_array;

Of course, when you get the program working, it would be better to move the shm* calls into the low level function that does the [non-shared] malloc for shm_array, so you can eliminate the extra copy operation.


(4) In your fork routine, you are calling:

int partitionIndex = partition(arr, low, high);

You do this after the fork, so both parent and rpid child are trying to do the partition operation so they are conflicting.

So, quickSortFork needs to start out with:

if (low < high) {
    int partitionIndex = partition(arr, low, high);

    rpid = fork();

(5) You are creating way too many processes and the fork calls start to fail due to unavailable process slots.

This is probably why the program appears to hang.

This would probably not be observable with a small number of array elements, but would occur if the array is large enough (e.g. 100,000 elements)


Here is a working version [with some extra debug code].

To solve the final fork problem, I created a quickSortStd that does not use fork and called that instead.

One way to handle the issue of too many fork calls is to have quickSortFork keep track of the recursion depth and call the non-fork version after the depth gets high enough.

As a general rule, adding more processes/threads after a certain number becomes counter productive because the overhead of switching between processes overshadows the benefits of parallelism. This is a tuning option.

I added a simple version of that idea to quickSortFork and it seems to work, so adjust the depth limit to suit your needs.

#include <unistd.h>
#include <sys/wait.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>

typedef struct {
    int shmid;
    key_t key;
    int length;
    int *shm_array;
} MyArray;

int findFileLength(FILE * inFile, char delimiter);
int *parseFileToArray(FILE * inFile, char delimiter, int length);
int partition(MyArray * arr, int low, int high);
void quickSortFork(MyArray * arr, int low, int high);
void quickSortStd(MyArray * arr, int low, int high);
void swap(MyArray * arr, int a, int b);

void
prtnice(const char *who,int *arr,int length)
{
    int hang = 0;

    printf("%s: LENGTH %d\n",who,length);

    for (int i = 0; i < length; i++) {
        if (hang == 0)
            printf("%s/%8.8d:",who,i);

        printf(" %d", arr[i]);

        ++hang;
        if (hang == 10) {
            printf("\n");
            hang = 0;
        }
    }
    printf("\n");
}

MyArray *
getArray(char *fileName, char delimiter)
{
    FILE *numberFile = fopen(fileName, "r");    // open for reading

    if (numberFile == NULL)             // unable to open file
        return NULL;
    MyArray *array = malloc(sizeof(MyArray));

    array->length = findFileLength(numberFile, delimiter);
    array->shm_array = parseFileToArray(numberFile, delimiter, array->length);

    return array;
}

int
findFileLength(FILE * inFile, char delimiter)
{
    char c;
    int length = 0;

    c = fgetc(inFile);
    while (c != EOF) {
        if (c != delimiter && c != '\n') {
            length++;
            while ((c = fgetc(inFile)) != EOF && c != '\n' && c != delimiter);

        }
        else {
            c = fgetc(inFile);
        }
    }
    rewind(inFile);                     // resets the file pointer to the start

    return length;
}

int *
parseFileToArray(FILE * inFile, char delimiter, int length)
{
    int *parsedFile = malloc(sizeof(int) * length);
    char c;
    char *stringInt = malloc(sizeof(char) * 100);   // string that is used to combine multiple characters and convert to an integer
    int stringIntP = 0,
        parsedArrayP = 0;               // pointers for our arrays, the first is for the string that determines the integer, the second is for our resulting array

    c = fgetc(inFile);
    while (c != EOF) {
        if (c != delimiter && c != '\n') {

            for (; c != '\n' && c != delimiter; (c = fgetc(inFile)) != EOF) {
                stringInt[stringIntP++] = c;
            }
            stringIntP = 0;
            parsedFile[parsedArrayP++] = atoi(stringInt);   // convert string number to integer value
            memset(stringInt, 0, 100);  // clear the string that builds the integer value from chars
        }
        else {
            c = fgetc(inFile);
        }
    }

    prtnice("INP",parsedFile,length);

    fclose(inFile);                     // close the file after using
    free(stringInt);

    return parsedFile;
}

void
printArray(const char *who,MyArray * arr, int length)
{
    prtnice(who,arr->shm_array,length);
}

void
quickSortFork(MyArray * arr, int low, int high)
{
    pid_t lpid,
     rpid;

    static int depth = 0;
    if (depth++ > 5) {
        quickSortStd(arr,low,high);
        --depth;
        return;
    }

    printf("Fork: ENTER low=%d high=%d\n",low,high);

    if (low < high) {
        int partitionIndex = partition(arr, low, high);

        rpid = fork();
        if (rpid < 0) {
            perror("Right child not created.\n");
            exit(-1);
        }

        if (rpid == 0) {
            printf("I am the right child!\tMy process id: %d\n", getpid());
            quickSortFork(arr, partitionIndex + 1, high);
            exit(EXIT_SUCCESS);
        }

        lpid = fork();
        if (lpid < 0) {
            perror("Left child not created.\n");
            exit(-1);
        }

        if (lpid == 0) {
            quickSortFork(arr, low, partitionIndex - 1);
            printf("I am the left child!\tMy process id: %d\n", getpid());
            exit(EXIT_SUCCESS);
        }

        int status;
        printf("Fork: WAIT rpid=%d\n",rpid);
        waitpid(rpid, &status, 0);
        printf("Fork: WAIT lpid=%d\n",lpid);
        waitpid(lpid, &status, 0);
    }

    --depth;

    printf("Fork: EXIT low=%d high=%d\n",low,high);
}

void
quickSortStd(MyArray * arr, int low, int high)
{
    pid_t lpid,
     rpid;

    printf("Std: ENTER low=%d high=%d\n",low,high);

    if (low < high) {
        int partitionIndex = partition(arr, low, high);
        quickSortStd(arr, partitionIndex + 1, high);
        quickSortStd(arr, low, partitionIndex - 1);
    }

    printf("Std: EXIT low=%d high=%d\n",low,high);
}

int
partition(MyArray * arr, int low, int high)
{
    int i = low,
        j = high;
    int pivot = arr->shm_array[(low + high) / 2];

    while (i < j) {
        while (arr->shm_array[i] < pivot)
            i++;
        while (arr->shm_array[j] > pivot)
            j--;
        if (i < j) {
            swap(arr, i, j);
        }
    }
    return i;
}

void
swap(MyArray * arr, int a, int b)
{
    int temp = arr->shm_array[a];

    arr->shm_array[a] = arr->shm_array[b];
    arr->shm_array[b] = temp;
}

int
main(int argc, char *argv[])
{
    char *file = argv[1];
    char delimiter = argv[2][0];
    MyArray *theArray = getArray(file, delimiter);

#if 0
    size_t SHM_SIZE = theArray->length;
#else
    size_t SHM_SIZE = sizeof(int) * theArray->length;
#endif

    setlinebuf(stdout);

    theArray->key = IPC_PRIVATE;

    if ((theArray->shmid = shmget(theArray->key, SHM_SIZE, IPC_CREAT | 0666)) < 0) {
        perror("shmget");
        _exit(-1);
    }

    printArray("BEF",theArray, theArray->length);

    int *shm_array;
    if ((shm_array = shmat(theArray->shmid, NULL, 0)) == (int *) -1) {
        perror("shmat");
        _exit(1);
    }

    int *oldptr = theArray->shm_array;
    for (int idx = 0;  idx < theArray->length;  ++idx)
        shm_array[idx] = oldptr[idx];
    free(oldptr);

    theArray->shm_array = shm_array;

    printArray("SHM",theArray, theArray->length);
#if 1
    quickSortFork(theArray, 0, theArray->length - 1);
#else
    quickSortStd(theArray, 0, theArray->length - 1);
#endif
    printArray("AFT",theArray, theArray->length);

    if (shmdt(theArray->shm_array) == -1) {
        perror("shmdt");
        _exit(1);
    }

    if (shmctl(theArray->shmid, IPC_RMID, NULL) == -1) {
        perror("shmctl");
        _exit(1);
    }

    return 0;
}
Craig Estey
  • 30,627
  • 4
  • 24
  • 48
  • Is there any way to send messages on stack overflow? I'd love to learn more from you if you have time. – Camilo Jan 30 '19 at 18:55
  • Yes, in a way. It is possible to set up a chat. If we posted _enough_ comments, SO would automatically admonish about too many comments and ask if you wanted to move the discussion to chat. See: https://stackoverflow.com/a/54356751/5382650 and follow the link for the chat for an example. Chat can be set up for a variety of purposes. – Craig Estey Jan 30 '19 at 19:28
  • A question about your program here. Do you have your heart set on using multiple _processes_ and `shm*` [shared memory]. In particular, `fork` and `waitpid` are somewhat expensive (i.e. creating and destroying processes). A better model is to have a fixed number of "worker" processes that pull request blocks for the sub-arrays off a common queue. So, the processes are created at program start and only destroyed at program end. A much more efficient way is to use multiple _threads_ as well [and you can eliminate `shm*` because the normal `malloc` area is shared]. So, you'd do `pthread_create` – Craig Estey Jan 30 '19 at 19:35
  • Okay, thank you. I really appreciate the help, I've wanted to learn more about operating system concepts and kernel development but have found it difficult to jump into it. I'm fairly new to C as well so I appreciate you helping me. Regarding the shm*, I'm not set on using shm* I'll try rewriting it with threads instead, I need to look into threads in C a bit more. I was also wondering when I should use shm* vs. mmap. I tried using mmap and was able to get it to work somewhat but I wen't back to shm* because your answer explained it well. – Camilo Jan 30 '19 at 20:16
  • If you use threads, you don't need _either_ `shm*` _or_ `mmap`. Simple `malloc` will work, because when you use threads, _all_ memory is shared by default. Only memory that has the `__thread` attribute is non-shared (aka "thread local storage"). So, if a thread needed some private memory (e.g.) `__thread int my_private_array[100];` Then, if thread A changes `my_private_array` only its copy is changed. `my_private_array` in (e.g.) thread B is _not_ changed. They are _both_ in the _same_ address space but at different memory locations within that space. – Craig Estey Jan 30 '19 at 20:39
  • Let us [continue this discussion in chat](https://chat.stackoverflow.com/rooms/187605/discussion-between-craig-estey-and-camilo). – Craig Estey Jan 30 '19 at 20:39
0

An immediate problem is that in

void quickSortFork(MyArray *arr, int low, int high){
    pid_t lpid, rpid;
    rpid = fork();
    if(low < high){
        int partitionIndex = partition(arr,low, high);

both parent and child start partitioning the same range, obviously stepping on each other toes.

Let the parent do partitioning, and only then fork.

user58697
  • 7,808
  • 1
  • 14
  • 28