0

I have a programm which creates 1000 child processes. Each process should access to a int variable, which is stored in a shared memory segment. To protect the int variable I have created a semaphore:

#define _XOPEN_SOURCE

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/sem.h>
#include <sys/ipc.h>
#include <sys/shm.h>

#define KEY 1000
#define LOCK -1
#define UNLOCK 1

int *ptr;
int pid;
int shm_id;
int sem_id;

struct sembuf sema;

int main()
{
    if( ( sem_id = semget( KEY, 1, IPC_CREAT | 0666 ) ) < 0 )
    {
        printf( "semid error\n" );
        exit( EXIT_SUCCESS );
    }
    sema.sem_num = 1;
    sema.sem_op = 0;
    sema.sem_flg = SEM_UNDO;
    if( ( shm_id = shmget( KEY, 1, IPC_CREAT | 0666 ) ) < 0 )
    {
        printf( "ERROR\n" );
        exit( EXIT_SUCCESS );
    }
    ptr = shmat( shm_id, NULL, 0 );
    *ptr = 0;
    for( int i = 0; i < 10; ++i )
    {

        pid = fork();
        if( pid == 0 )
        {
            // critical part
            sema.sem_op = LOCK;
            if( semop( sem_id, &sema, 1 ) < 0 )
            {
                printf( "ERROR\n" );
            }
            ++( *ptr );
            sema.sem_op = UNLOCK;
            if( semop( sem_id, &sema, 1 ) < 0 )
            {
                printf( "ERROR\n" );
            }
            // end of the critical part
            exit( EXIT_SUCCESS );
        }
    }
    int return_stat;
    enum { debug = 1 };
    int corpse;

    while ( ( corpse = waitpid( ( pid_t )-1, &return_stat, 0 ) ) > 0 )
        if ( debug )
            printf( "PID %d died 0x%.4X\n", corpse, return_stat ); 
    //while( waitpid( pid, &return_stat, 0 ) == 0 );
    printf( "value   %d\n", *ptr );
    shmdt( NULL );
    semctl( sem_id, 1, IPC_RMID, 0 );
}

Here is an example output:

PID 7288 died 0x0000
PID 7289 died 0x0000
PID 7290 died 0x0000
PID 7291 died 0x0000
PID 7292 died 0x0000
PID 7293 died 0x0000
PID 7294 died 0x0000
PID 7295 died 0x0000
PID 7296 died 0x0000
PID 7297 died 0x0000
value   9

PID 7276 died 0x0000
PID 7277 died 0x0000
PID 7278 died 0x0000
PID 7279 died 0x0000
PID 7280 died 0x0000
PID 7281 died 0x0000
PID 7282 died 0x0000
PID 7283 died 0x0000
PID 7284 died 0x0000
PID 7285 died 0x0000
value   10

The output should be 1000 every time, but the output vary. I do not know why this piece of code does not work properly. Can somebody help me with my problem? Thank you

Zagatho
  • 523
  • 1
  • 6
  • 22

2 Answers2

1

Your process cleanup loop is wrong:

while( waitpid( pid, &return_stat, 0 ) == 0 );

Since waitpid() returns the PID it is reporting on, that's not the loop you want -- it only waits for one PID to die. This might be what you need:

enum { debug = 1 };
int corpse;

while ((corpse = waitpid((pid_t)-1. &return_stat, 0)) > 0)
{
    if (debug)
        printf("PID %d died 0x%.4X\n", corpse, return_stat);
}

You can set debug = 0 when you're satisfied it is working correctly.


Reviewing example output

Some more of your problem is in the child code:

    if( pid == 0 )
    {
        // critical part
        sema.sem_op = LOCK;
        if( semop( sem_id, &sema, 1 ) < 0 )
        ++( *ptr );
        sema.sem_op = UNLOCK;
        if( semop( sem_id, &sema, 1 ) < 0 )
        // end of the critical part
        exit( EXIT_SUCCESS );
    }

You increment the pointer only if the first semop() fails; you exit (successfully) only if the second semop() fails.

You must exit unconditionally. You should only do the increment if the first semop() succeeds, and you should only do the second semop() if the first succeeds. You probably want some error reporting code after the if statements.


Another version

The residual problem I'm seeing is that you have the LOCK and UNLOCK values inverted.

#define _XOPEN_SOURCE

#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ipc.h>
#include <sys/sem.h>
#include <sys/shm.h>
#include <sys/wait.h>
#include <unistd.h>

#define KEY    1000
#define LOCK   +1
#define UNLOCK -1

static const char *arg0 = 0;
static void err_setarg0(char *argv0)
{
    arg0 = argv0;
}

static void err_syserr(const char *msg)
{
    int errnum = errno;
    fprintf(stderr, "%s: %s", arg0, msg);
    if (errnum != 0)
        fprintf(stderr, " (%d: %s)", errnum, strerror(errnum));
    fputc('\n', stderr);
    exit(EXIT_FAILURE);
}

int main(int argc, char **argv)
{
    int *ptr;
    int pid;
    int shm_id;
    int sem_id;
    struct sembuf sema;

    err_setarg0(argv[argc-argc]);

    if ((sem_id = semget(KEY, 1, IPC_CREAT | 0666)) < 0)
        err_syserr("semget()");
    sema.sem_num = 0;
    sema.sem_op = 0;
    sema.sem_flg = SEM_UNDO;
    if ((shm_id = shmget(KEY, 1, IPC_CREAT | 0666)) < 0)
        err_syserr("shmget()");
    ptr = shmat(shm_id, NULL, 0);
    if (ptr == (int *)-1)
        err_syserr("shmat()");
    *ptr = 0;

    printf("Looping\n");
    for (int i = 0; i < 10; ++i)
    {
        pid = fork();
        if (pid < 0)
            err_syserr("fork()");
        else if (pid == 0)
        {
            // critical part
            sema.sem_op = LOCK;
            if (semop(sem_id, &sema, 1) < 0)
                err_syserr("semop() lock");
            ++(*ptr);
            sema.sem_op = UNLOCK;
            if (semop(sem_id, &sema, 1) < 0)
                err_syserr("semop() unlock");
            // end of the critical part
            exit(EXIT_SUCCESS);
        }
    }
    printf("Looped\n");

    int return_stat;
    enum { debug = 1 };
    int corpse;

    while ((corpse = waitpid((pid_t)-1, &return_stat, 0)) > 0)
    {
        if (debug)
            printf("PID %d died 0x%.4X\n", corpse, return_stat);
    }
    printf("value   %d\n", *ptr);
    if (shmdt(ptr) == -1)
        err_syserr("shmdt()");
    if (semctl(sem_id, 1, IPC_RMID, 0) == -1)
        err_syserr("semctl()");
    if (shmctl(shm_id, IPC_RMID, 0) == -1)
        err_syserr("shmctl()");
    return 0;
}

Example run:

$ ./semop
Looping
Looped
PID 17976 died 0x0000
PID 17977 died 0x0000
PID 17978 died 0x0000
PID 17979 died 0x0000
PID 17980 died 0x0000
PID 17981 died 0x0000
PID 17982 died 0x0000
PID 17983 died 0x0000
PID 17984 died 0x0000
PID 17985 died 0x0000
value   10
$

Note the use of err_syserr() to simplify error reporting. Along with err_setarg0(), it is a part of a larger package of error reporting functions that I use routinely. In fact, my normal version is a printf-like function with a format string and a variable argument list, but this simple version is adequate for this program and is simpler.

Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
  • Thank you Jonathan, but it did not solve my problem. The output of the program is the same as before. I also checked the return values of the functions, but i do not get any errors. – Zagatho Jun 17 '14 at 21:05
  • Did you see 1000 PID died messages? (Have you considered testing your program with 10 or 20 in place of 1000?) Also note that there is no semicolon after my while loop, unlike yours. – Jonathan Leffler Jun 17 '14 at 21:06
  • yes I tried it with 10 processes. I edit my original post with all changes – Zagatho Jun 17 '14 at 21:30
  • Yes, you are right. I edited it in the post. Now i see, that all semop fails, but i do not know why. – Zagatho Jun 17 '14 at 21:56
  • My best guess at this point would be that `semop()` sets `errno` to `EFBIG`. However, much more to the point, you can find out why it fails by printing `errno` (and/or `strerror(errno)`) after the `semget()` or `semop()` fails. This will tell you what the o/s thinks is the trouble. Most probably, the semaphore numbers run from 0 to `nsems-1` (zero-based indexing, rather than one-based indexing). – Jonathan Leffler Jun 17 '14 at 22:33
  • Yes, the semaphore numbers run from 0 to n-1. The error from errno is **file too large** If i change `sema.sem_num = 0`, the programm don't terminate and it stops in front of the first `semop()`. – Zagatho Jun 17 '14 at 22:50
  • Are you saying LOCK =1 and UNLOCK=-1? or has the original been changed. If it hasn't been changed then is the opposite of what the man page says: -->> "If sem_op is a positive integer, the operation adds this value to the semaphore value (semval). .... This operation can always proceed—it never forces a thread to wait." – Jamie Pate May 17 '17 at 01:10
0

I tried your code (after solving the join of all children), and all children wait forever on the semop -1. This is because semaphore is created with an initial value set to 0 but it need to be 1 in order to let one child to run.

From the Linux semget manpage :

   The values of the semaphores in a newly created set are indeterminate.  (POSIX.1-2001  is  explicit  on  this  point.)   Although
   Linux,  like  many  other  implementations, initializes the semaphore values to 0, a portable application cannot rely on this: it
   should explicitly initialize the semaphores to the desired values.

In order to initialize you can use :

if(semctl(sem_id, 0, SETVAL, 1) == -1)
{
    perror("semctl");
    exit(0);
}   

This could also be achieve using semop +1 if the initial value is 0.

Note that you can avoid interaction with other programs or previous run using IPC_PRIVATE as sem/shm key.

mpromonet
  • 11,326
  • 43
  • 62
  • 91