0

Parent has 'm' messages to send to 'n' child. In each iteration it sends one message to all its child. If all replies "Received" it sends the next message. Below is my code

#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/wait.h>
#include <stdio.h>
#include <string.h>
#include <signal.h>
#include <stdlib.h>
#include <unistd.h>

#define MAX_LEN 50

struct details
{
    int no;
    int id;
    char msg_text[MAX_LEN];
};

typedef struct msg_buf
{
    long int msg_type;
    struct details d;
} msg;

char message[20][50] =
{
    " ",
    "Message1",
    "Message2",
    "Message3",
    "Message4",
    "Message5",
    "Message6",
    "Message7",
    "Message8"
};

int main(int argc, char *argv[])
{
    int id1, id2, n, m, i, j, status, p_id;
    pid_t pid;
    msg snd, rcvd;

    if (argc < 2)
    {
        perror("Too few arguments");
        return -1;
    }
    n = atoi(argv[1]);
    m = atoi(argv[2]);

    id1 = msgget((key_t)78, IPC_CREAT | 0666);
    id2 = msgget((key_t)56, IPC_CREAT | 0666);

    if (id1 == -1 || id2 == -1)
    {
        printf("Error in creating message queue\n");
        return -1;
    }
    p_id = getpid();

    printf("\nQueues are created.\n\n");
    for (i = 1; i <= n; i++)
    {
        pid = fork();
        if (pid == 0)
            break;
    }

    if (pid == 0)
    {
        for (j = 1; j <= m; j++)
        {
            printf("Process %d waiting for message %d\n", getpid(), j);
            while (msgrcv(id1, &rcvd, sizeof(struct details), j + 1, IPC_NOWAIT) < 0)
            {
            }

            srand(time(0));
            strcpy(snd.d.msg_text, "Received");
            snd.msg_type = rcvd.msg_type;
            snd.d.no = rcvd.d.no;
            snd.d.id = getpid();

            if (msgsnd(id2, &snd, sizeof(struct details), IPC_NOWAIT) < 0)
            {
                perror("msgsnd");
            }
            else
            {
                printf("Reply sending successful for message %d for process %d\n", snd.d.no, snd.d.id);
            }
        }
    }

    if (p_id == getpid())
    {
        for (j = 1; j <= m; j++)
        {
            strcpy(snd.d.msg_text, message[j]);
            snd.msg_type = j + 1;
            snd.d.no = j;

            if (msgsnd(id1, &snd, sizeof(struct details), 0) < 0)
            {
                perror("msgsnd");
            }
            else
            {
                printf("Message %d sent- \t\t\t\t%s\n", j, snd.d.msg_text);
            }

            for (i = 1; i <= n; i++)
            {
                sleep(i);
                if (msgrcv(id2, &rcvd, sizeof(struct details), j + 1, 0) >= 0)
                {
                    printf("Reply received from child %d for message %d - \t%s\n", rcvd.d.id, j, rcvd.d.msg_text);
                }
            }
        }
        wait(NULL);
        printf("Communication End.\n\n");
    }
}

The output I get is for n=2, m=2 is:

Queues are created.

Message 1 sent- Message1
Process 14804 waiting for message 1
Process 14805 waiting for message 1
Reply sending successful for message 1 for process 14804
Process 14804 waiting for message 2
Reply received from child 14804 for message 1 - Received

Nothing gets printed after this. There is no progress. What is the issue? How to solve this?

Hamsa
  • 476
  • 5
  • 23
  • Please learn how to indent your code for readability. – Jonathan Leffler Apr 03 '23 at 18:19
  • Where is `MAX_LEN` is defined?? – Darth-CodeX Apr 03 '23 at 18:23
  • 1
    Tangential: Using `perror()` is vaguely appropriate when a system function that sets `errno` fails (it's better than a kick in the teeth, but I don't like what it does), but not for a general message like "too few arguments". You either get a message like "Too few arguments: No error" or you get a random error reported such as "Too few arguments: not a typewriter". Neither is very helpful. Use `fprintf(stderr, "Usage: %s n m\n", argv[0]);` or expand on the meanings of `n` and `m` (such as `"Usage: %s num_kids num_msgs\n"`). – Jonathan Leffler Apr 03 '23 at 18:27
  • 1
    You child code has the loop `while (msgrcv(id1, &rcvd, sizeof(struct details), j + 1, IPC_NOWAIT) < 0) { /* Nothing! */ }` — surely the loop body needs to be the code after that? – Jonathan Leffler Apr 03 '23 at 18:29
  • @JonathanLeffler My argument is the child should wait until there is a message of type j+1 – Hamsa Apr 04 '23 at 02:49
  • 1
    @Darth-CodeX Sorry the line #define MAX_LEN 50 got omitted. I have edited the code – Hamsa Apr 04 '23 at 02:52
  • Another tangential comment: see [Why call `srand()` just once?](https://stackoverflow.com/q/7343833/15168). It affects the 'randomness' (or lack thereof) of the random numbers — but is otherwise not material to your problems. – Jonathan Leffler Apr 04 '23 at 02:53
  • For debugging purposes, at the very least you should print any messages that the child receives in the loop. You need to know what's going on. With multiple processes at work, each message should probably be prefixed with the PID of the process generating it. You should report on the PID of each process that's created. This stuff makes it easier to track what's going on. – Jonathan Leffler Apr 04 '23 at 02:55
  • @JonathanLeffler There is a id field in the message. The child populates it with its own PID before sending it to parent. And, yes I printed the message received from the parent, removed it here to be less clumsy. – Hamsa Apr 04 '23 at 03:00
  • You have the code `if (argc < 2)\ { perror("Too few arguments");\ return -1; } n = atoi(argv[1]); m = atoi(argv[2]);` — you should probably test `if (argc != 3)` and print the appropriate message (not using `perror()` as stated before). Be precise in specifying your expectations and checking. – Jonathan Leffler Apr 04 '23 at 03:06
  • AFAICS, you send one message per iteration, so at most one child can read it. You do not send one message per child. I'm still working through your code. – Jonathan Leffler Apr 04 '23 at 03:08
  • Let us [continue this discussion in chat](https://chat.stackoverflow.com/rooms/252958/discussion-between-hamsa-and-jonathan-leffler). – Hamsa Apr 04 '23 at 03:28

1 Answers1

1

The biggest single problem is one alluded to in a comment:

AFAICS, you send one message per iteration, so at most one child can read it. You do not send one message per child.

Any given message can be received by (at most) one process. For n processes to receive a message, you have to send n messages.

The other issues are mostly cosmetic. I've used logging code available in my SOQ (Stack Overflow Questions) repository on GitHub as files stderr.c and stderr.h in the src/libsoq sub-directory. Some of the messages are written to stdout — there isn't a function that takes just a file stream, so I used err_logmsg() which takes a file stream, control options and an exit status (but the function shouldn't exit, so that's unused) as well as the format and arguments.

The result is:

/* SO 7592-2454 */
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/wait.h>
#include <unistd.h>

#include "stderr.h"

#define MAX_LEN 50

struct details
{
    int no;
    int id;
    char msg_text[MAX_LEN];
};

typedef struct msg_buf
{
    long int msg_type;
    struct details d;
} msg;

static char message[20][50] =
{
    " ",
    "Message1",
    "Message2",
    "Message3",
    "Message4",
    "Message5",
    "Message6",
    "Message7",
    "Message8"
};

static const char usestr[] = "num_kids num_msgs";

static void dump_message(const char *tag, const msg *info)
{
    err_logmsg(stdout, err_getlogopts(), 0, "%s: type %ld (details: no %d, id %d, text [%s])\n",
               tag, info->msg_type, info->d.no, info->d.id, info->d.msg_text);
}

int main(int argc, char *argv[])
{
    err_setarg0(argv[0]);

    if (argc != 3)
        err_usage(usestr);
    int n = atoi(argv[1]);
    int m = atoi(argv[2]);

    int id1 = msgget((key_t)78, IPC_CREAT | 0666);
    int id2 = msgget((key_t)56, IPC_CREAT | 0666);

    if (id1 == -1 || id2 == -1)
        err_syserr("failed to create a message queue: ");

    pid_t p_id = getpid();

    err_setlogopts(ERR_PID|ERR_MILLI);
    err_logmsg(stdout, err_getlogopts(), 0, "Queues are created.\n");

    pid_t pid;
    for (int i = 1; i <= n; i++)
    {
        pid = fork();
        if (pid == 0)
            break;
    }

    msg snd;
    msg rcvd;

    if (pid == 0)
    {
        for (int j = 1; j <= m; j++)
        {
            err_logmsg(stdout, err_getlogopts(), 0, "Waiting for message %d\n", j);
            while (msgrcv(id1, &rcvd, sizeof(struct details), j + 1, IPC_NOWAIT) < 0)
                err_syserr("msgrcv() returned with error status: ");

            dump_message("Message received", &rcvd);

            strcpy(snd.d.msg_text, "Received");
            snd.msg_type = rcvd.msg_type;
            snd.d.no = rcvd.d.no;
            snd.d.id = getpid();

            if (msgsnd(id2, &snd, sizeof(struct details), IPC_NOWAIT) < 0)
                err_sysrem("msgsnd() failed: ");
            else
                dump_message("Message sent", &snd);
        }
        err_logmsg(stdout, err_getlogopts(), 0, "Child process complete\n");
        exit(EXIT_SUCCESS);
    }

    if (p_id == getpid())
    {
        for (int j = 1; j <= m; j++)
        {
            strcpy(snd.d.msg_text, message[j]);
            snd.msg_type = j + 1;
            snd.d.no = j;

            for (int i = 0; i < n; i++)
            {
                if (msgsnd(id1, &snd, sizeof(struct details), 0) < 0)
                    err_sysrem("msgsnd() failed: ");
                else
                    dump_message("Message sent", &snd);
            }

            for (int i = 1; i <= n; i++)
            {
                err_logmsg(stdout, err_getlogopts(), 0, "Dozing for %d seconds\n", i);
                sleep(i);
                if (msgrcv(id2, &rcvd, sizeof(struct details), j + 1, 0) >= 0)
                    dump_message("Reply received", &rcvd);
                else
                    err_syserr("msgrcv() failed: ");
            }
        }

        int corpse;
        int status;
        while ((corpse = wait(&status)) > 0)
            err_remark("Child %d exited with status 0x%.4X\n", corpse, status);
        err_logmsg(stdout, err_getlogopts(), 0, "Communication End.\n\n");
    }

    return 0;
}

There is copious logging, because I need to see what's happening. The code should (but doesn't) validate the values for n and m. They should be at least 1 and not more than some faintly sane number (e.g. 10), but there'd be one or two values — enumerations or defined constants — to limit the acceptable range.

Here's a sample output (source code msg79.c, program msg79):

$ ./msg79 2 3
msg79: 2023-04-03 21:36:38.215 - pid=67247: Queues are created.
msg79: 2023-04-03 21:36:38.217 - pid=67247: Message sent: type 2 (details: no 1, id 1, text [Message1])
msg79: 2023-04-03 21:36:38.217 - pid=67247: Message sent: type 2 (details: no 1, id 1, text [Message1])
msg79: 2023-04-03 21:36:38.217 - pid=67247: Dozing for 1 seconds
msg79: 2023-04-03 21:36:38.217 - pid=67248: Waiting for message 1
msg79: 2023-04-03 21:36:38.217 - pid=67249: Waiting for message 1
msg79: 2023-04-03 21:36:38.217 - pid=67248: Message received: type 2 (details: no 1, id 1, text [Message1])
msg79: 2023-04-03 21:36:38.217 - pid=67249: Message received: type 2 (details: no 1, id 1, text [Message1])
msg79: 2023-04-03 21:36:38.217 - pid=67248: Message sent: type 2 (details: no 1, id 67248, text [Received])
msg79: 2023-04-03 21:36:38.217 - pid=67248: Waiting for message 2
msg79: 2023-04-03 21:36:38.217 - pid=67249: Message sent: type 2 (details: no 1, id 67249, text [Received])
msg79: 2023-04-03 21:36:38.218 - pid=67248: Message received: type 3 (details: no 2, id 1, text [Message2])
msg79: 2023-04-03 21:36:38.218 - pid=67249: Waiting for message 2
msg79: 2023-04-03 21:36:38.218 - pid=67249: Message received: type 3 (details: no 2, id 1, text [Message2])
msg79: 2023-04-03 21:36:38.218 - pid=67248: Message sent: type 3 (details: no 2, id 67248, text [Received])
msg79: 2023-04-03 21:36:38.218 - pid=67249: Message sent: type 3 (details: no 2, id 67249, text [Received])
msg79: 2023-04-03 21:36:38.218 - pid=67248: Waiting for message 3
msg79: 2023-04-03 21:36:38.219 - pid=67249: Waiting for message 3
msg79: 2023-04-03 21:36:38.219 - pid=67248: Message received: type 4 (details: no 3, id 1, text [Message3])
msg79: 2023-04-03 21:36:38.219 - pid=67249: Message received: type 4 (details: no 3, id 1, text [Message3])
msg79: 2023-04-03 21:36:38.219 - pid=67248: Message sent: type 4 (details: no 3, id 67248, text [Received])
msg79: 2023-04-03 21:36:38.219 - pid=67249: Message sent: type 4 (details: no 3, id 67249, text [Received])
msg79: 2023-04-03 21:36:38.219 - pid=67248: Child process complete
msg79: 2023-04-03 21:36:38.219 - pid=67249: Child process complete
msg79: 2023-04-03 21:36:39.219 - pid=67247: Reply received: type 2 (details: no 1, id 67248, text [Received])
msg79: 2023-04-03 21:36:39.219 - pid=67247: Dozing for 2 seconds
msg79: 2023-04-03 21:36:41.220 - pid=67247: Reply received: type 2 (details: no 1, id 67249, text [Received])
msg79: 2023-04-03 21:36:41.220 - pid=67247: Message sent: type 3 (details: no 2, id 1, text [Message2])
msg79: 2023-04-03 21:36:41.220 - pid=67247: Message sent: type 3 (details: no 2, id 1, text [Message2])
msg79: 2023-04-03 21:36:41.220 - pid=67247: Dozing for 1 seconds
msg79: 2023-04-03 21:36:42.221 - pid=67247: Reply received: type 3 (details: no 2, id 67248, text [Received])
msg79: 2023-04-03 21:36:42.221 - pid=67247: Dozing for 2 seconds
msg79: 2023-04-03 21:36:44.223 - pid=67247: Reply received: type 3 (details: no 2, id 67249, text [Received])
msg79: 2023-04-03 21:36:44.223 - pid=67247: Message sent: type 4 (details: no 3, id 1, text [Message3])
msg79: 2023-04-03 21:36:44.223 - pid=67247: Message sent: type 4 (details: no 3, id 1, text [Message3])
msg79: 2023-04-03 21:36:44.223 - pid=67247: Dozing for 1 seconds
msg79: 2023-04-03 21:36:45.227 - pid=67247: Reply received: type 4 (details: no 3, id 67248, text [Received])
msg79: 2023-04-03 21:36:45.227 - pid=67247: Dozing for 2 seconds
msg79: 2023-04-03 21:36:47.227 - pid=67247: Reply received: type 4 (details: no 3, id 67249, text [Received])
msg79: 2023-04-03 21:36:47.227 - pid=67247: Child 67249 exited with status 0x0000
msg79: 2023-04-03 21:36:47.227 - pid=67247: Child 67248 exited with status 0x0000
msg79: 2023-04-03 21:36:47.228 - pid=67247: Communication End.
$
Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
  • Thanks for the help. The issue was silly. I wish I had figured it out. Thanks for the suggestions. I will try to improve. – Hamsa Apr 04 '23 at 06:16