4

I was trying to write a simple program communicating between kernel and user space using Netlink. Basically here's what I wanted to achieve:

  1. User space program starts binding to a user defined multicast group.
  2. Insert kernel module
  3. Kernel module sends a message to this multicast group
  4. User space program receives the message

Here's my code:

======User space program======

#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include<sys/socket.h>
#include<linux/netlink.h>
#include<sys/types.h>
#include<unistd.h>

#define MYPROTO NETLINK_USERSOCK
#define MYMGRP 0x21 //User defined group, consistent in both kernel prog and user prog

int open_netlink()
{
        int sock = socket(AF_NETLINK,SOCK_RAW,MYPROTO);
        struct sockaddr_nl addr;

        memset((void *)&addr, 0, sizeof(addr));

        if (sock<0)
                return sock;
        addr.nl_family = AF_NETLINK;
        addr.nl_pid = getpid();
        addr.nl_groups = MYMGRP;
        if (bind(sock,(struct sockaddr *)&addr,sizeof(addr))<0)
                return -1;
        return sock;
}

int read_event(int sock)
{
        struct sockaddr_nl nladdr;
        struct msghdr msg;
        struct iovec iov[2];
        struct nlmsghdr nlh;
        char buffer[65536];
        int ret;
        iov[0].iov_base = (void *)&nlh;
        iov[0].iov_len = sizeof(nlh);
        iov[1].iov_base = (void *)buffer;
        iov[1].iov_len = sizeof(buffer);
        msg.msg_name = (void *)&(nladdr);
        msg.msg_namelen = sizeof(nladdr);
        msg.msg_iov = iov;
        msg.msg_iovlen = sizeof(iov)/sizeof(iov[0]);
        ret=recvmsg(sock, &msg, 0);
        if (ret<0) {
                return ret;
        }
        printf("Received message payload: %s\n", NLMSG_DATA(&nlh));
}

int main(int argc, char *argv[])
{
        int nls = open_netlink();
        if (nls<0) {
                err(1,"netlink");
        }

        while (1)
                read_event(nls);
        return 0;
}

======Kernel module======

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <net/sock.h>
#include <linux/socket.h>
#include <linux/net.h>
#include <asm/types.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <linux/delay.h>

#define NETLINK_USER 31
#define MYGRP 0x21 //User defined group, consistent in both kernel prog and user prog

struct sock *nl_sk = NULL;

static void send_to_user() {
    struct sk_buff *skb_out;
    struct nlmsghdr *nlh;
    int msg_size;
    char *msg = "Hello from kernel";
    int res;

    printk(KERN_INFO "Entering: %s\n", __FUNCTION__);
    msg_size = strlen(msg);
    skb_out = nlmsg_new(msg_size, 0);

    if (!skb_out) {
        printk(KERN_ERR "Failed to allocate new skb\n");
        return;
    }
    nlh = nlmsg_put(skb_out, 0, 1, NLMSG_DONE, msg_size, 0);
    //NETLINK_CB(skb_out).dst_group = 1; /* Multicast to group 1, 1<<0 */
    strncpy(nlmsg_data(nlh), msg, msg_size);

    res = nlmsg_multicast(nl_sk, skb_out, 0, MYGRP, 0);
    if (res < 0) {
        printk(KERN_INFO "Error while sending bak to user, err id: %d\n", res);
    }
}

static int __init
hello_init(void) {

    struct netlink_kernel_cfg cfg = {
            .groups = MYGRP,
    };
    printk("Entering: %s\n", __FUNCTION__);
    nl_sk = netlink_kernel_create(&init_net, NETLINK_USER, &cfg);
    if (!nl_sk) {
        printk(KERN_ALERT "Error creating socket.\n");
        return -10;
    }

    send_to_user();

    return 0;
}

static void __exit
hello_exit(void) {

    printk(KERN_INFO "exiting hello module\n");
    netlink_kernel_release(nl_sk);
}

module_init(hello_init);
module_exit(hello_exit);

Since the kernel module will only send the message once during initialization, thus I run listening program first and then insert module, although I always got this error:

Error while sending bak to user, err id: -3

When track down to err id, it's reflected in this piece of code in netlink/af_netlink.c:

if (info.delivery_failure) {
    kfree_skb(info.skb2);
    return -ENOBUFS;
}
consume_skb(info.skb2);

if (info.delivered) {
    if (info.congested && (allocation & __GFP_WAIT))
    yield();
    return 0;
}
return -ESRCH;

I presume it's not delivery_failure but still not delivered for some reasons.

I was referring to this example in which author's program keeps listening routes change. Although I would like to use a user defined multicast group.

Any ideas? Thanks in advance!

guoger
  • 197
  • 1
  • 2
  • 9
  • I also had similar issue long back; you can check following things: 1: see that 31 value of NETLINK_USER is not already used by some other components; if it is then choose some unused value. I guess 31 is assigned to LTT component. 2: While creating socket at user space, use the same value of NETLINK_USER as used defined in kernel . `int sock = socket(AF_NETLINK,SOCK_RAW, NETLINK_USER);` – Gyan Gupta Mar 28 '14 at 09:15
  • Thanks for your reply! Although as far as I understood, netlink_kernel_create() is the one to create a socket in kernel space rather than socket(), or am I missing something? – guoger Mar 28 '14 at 09:49
  • I meant in your user space program; when open the socket with `socket(AF_NETLINK,SOCK_RAW,MYPROTO);` do it with the same value of NETLINK_USER as u defined in kernel. So it could be ` #define NETLINK_USER 30`
    `socket(AF_NETLINK,SOCK_RAW, NETLINK_USER);`
    – Gyan Gupta Mar 28 '14 at 10:04
  • I remember how I tried to do this. There are various examples and tutorials over internet and most of them is not worked for me. So I ended up with using generic netlink family and libnl. You can see my working example on https://github.com/dzeban/keymon – Alexander Dzyoba Mar 31 '14 at 08:50

2 Answers2

10

These are the two key problems I found in your code:

  1. Both protocol family and multicast group need to be consistent in both kernel prog and user prog. Your protocol family is NETLINK_USERSOCK (2) in userspace and NETLINK_USER (31) in kernelspace.
  2. addr.nl_groups = MYMGRP; doesn't work for some reason. This does, though: setsockopt(sock, 270, NETLINK_ADD_MEMBERSHIP, &group, sizeof(group)).

Not fatal:

  1. In this case, the module is not listening to the group messages, therefore you don't need to include the multicast group in the netlink_kernel_create() params.

Also, Not really netlink-related but useful anyway:

  1. strlen() does not include the null chara. During message allocations, you should probably add a byte to make up for this.
  2. In this case, NLMSG_DATA(&nlh) is undefined behaviour. This is because your header and data are in separate memory chunks, not guaranteed to be glued, and all the macro does is access the chunk of memory after nlh:

#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0)))

This is my version of your code:

Userspace program:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <linux/netlink.h>
#include <unistd.h>

/* Protocol family, consistent in both kernel prog and user prog. */
#define MYPROTO NETLINK_USERSOCK
/* Multicast group, consistent in both kernel prog and user prog. */
#define MYMGRP 21

int open_netlink(void)
{
    int sock;
    struct sockaddr_nl addr;
    int group = MYMGRP;

    sock = socket(AF_NETLINK, SOCK_RAW, MYPROTO);
    if (sock < 0) {
        printf("sock < 0.\n");
        return sock;
    }

    memset((void *) &addr, 0, sizeof(addr));
    addr.nl_family = AF_NETLINK;
    addr.nl_pid = getpid();
    /* This doesn't work for some reason. See the setsockopt() below. */
    /* addr.nl_groups = MYMGRP; */

    if (bind(sock, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
        printf("bind < 0.\n");
        return -1;
    }

    /*
     * 270 is SOL_NETLINK. See
     * http://lxr.free-electrons.com/source/include/linux/socket.h?v=4.1#L314
     * and
     * http://stackoverflow.com/questions/17732044/
     */
    if (setsockopt(sock, 270, NETLINK_ADD_MEMBERSHIP, &group, sizeof(group)) < 0) {
        printf("setsockopt < 0\n");
        return -1;
    }

    return sock;
}

void read_event(int sock)
{
    struct sockaddr_nl nladdr;
    struct msghdr msg;
    struct iovec iov;
    char buffer[65536];
    int ret;

    iov.iov_base = (void *) buffer;
    iov.iov_len = sizeof(buffer);
    msg.msg_name = (void *) &(nladdr);
    msg.msg_namelen = sizeof(nladdr);
    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;

    printf("Ok, listening.\n");
    ret = recvmsg(sock, &msg, 0);
    if (ret < 0)
        printf("ret < 0.\n");
    else
        printf("Received message payload: %s\n", NLMSG_DATA((struct nlmsghdr *) &buffer));
}

int main(int argc, char *argv[])
{
    int nls;

    nls = open_netlink();
    if (nls < 0)
        return nls;

    while (1)
        read_event(nls);

    return 0;
}

And this is the kernel module:

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/netlink.h>
#include <net/netlink.h>
#include <net/net_namespace.h>

/* Protocol family, consistent in both kernel prog and user prog. */
#define MYPROTO NETLINK_USERSOCK
/* Multicast group, consistent in both kernel prog and user prog. */
#define MYGRP 21

static struct sock *nl_sk = NULL;

static void send_to_user(void)
{
    struct sk_buff *skb;
    struct nlmsghdr *nlh;
    char *msg = "Hello from kernel";
    int msg_size = strlen(msg) + 1;
    int res;

    pr_info("Creating skb.\n");
    skb = nlmsg_new(NLMSG_ALIGN(msg_size + 1), GFP_KERNEL);
    if (!skb) {
        pr_err("Allocation failure.\n");
        return;
    }

    nlh = nlmsg_put(skb, 0, 1, NLMSG_DONE, msg_size + 1, 0);
    strcpy(nlmsg_data(nlh), msg);

    pr_info("Sending skb.\n");
    res = nlmsg_multicast(nl_sk, skb, 0, MYGRP, GFP_KERNEL);
    if (res < 0)
        pr_info("nlmsg_multicast() error: %d\n", res);
    else
        pr_info("Success.\n");
}

static int __init hello_init(void)
{
    pr_info("Inserting hello module.\n");

    nl_sk = netlink_kernel_create(&init_net, MYPROTO, NULL);
    if (!nl_sk) {
        pr_err("Error creating socket.\n");
        return -10;
    }

    send_to_user();

    netlink_kernel_release(nl_sk);
    return 0;
}

static void __exit hello_exit(void)
{
    pr_info("Exiting hello module.\n");
}

module_init(hello_init);
module_exit(hello_exit);

MODULE_LICENSE("GPL");

Tested in kernel 3.13.

(May I suggest that people uses libnl-3 instead of raw sockets for the userspace program. Its multicast Netlink documentation is actually decent.)

Yd Ahhrk
  • 1,088
  • 12
  • 24
  • what should be `netlink_kernel_create()` in case of older kernel versions? – Nitinkumar Ambekar Jan 29 '16 at 11:45
  • @NTN As far as I've seen, it's always the same arguments, just rearranged. See [this example](https://github.com/NICMx/Jool/blob/f18310cf1e5e2750f1975ee83dbddd0c0308274c/mod/common/nl_handler.c#L1081). You can also [see `netlink_kernel_create()`'s definition](http://lxr.free-electrons.com/source/include/linux/netlink.h?v=3.13#L56). (hack the kernel version in the URL to see different versions of the file; for example change "v=3.13" to "v="3.10") – Yd Ahhrk Jan 29 '16 at 23:15
  • @NTN to answer your question directly, that would be `netlink_kernel_create(&init_net, NETLINK_USERSOCK, 0, NULL, NULL, THIS_MODULE)` (assuming your kernel is 3.5 or lower). – Yd Ahhrk Jan 29 '16 at 23:20
  • would you like to answer my [this question](http://stackoverflow.com/q/35098943/2706918)? – Nitinkumar Ambekar Jan 30 '16 at 06:51
  • Do you maybe know an answer to the question over [here](https://stackoverflow.com/questions/35876323/netlink-multicast-in-kernels-above-4)? – user1252280 Mar 10 '16 at 18:38
  • 2
    Specifying nl_groups isn't working because nl_groups is a bitmask of allowed groups while NETLINK_ADD_MEMBERSHIP takes an individual group number. I think in this case you'd need to set bit 20 (for value 0x100000). – Matt Jun 09 '16 at 16:48
  • I get error:`setsockopt < 0`. My kernel: 4.4.0-127-generic x64 @YdAhhrk – river Jun 29 '18 at 08:12
  • @river Try printing `errno`: https://www.systutorials.com/docs/linux/man/2-setsockopt/ – Yd Ahhrk Jun 29 '18 at 23:43
  • 1
    @river Here's a guess: Try running the program with `sudo`. – Yd Ahhrk Jun 29 '18 at 23:49
  • `sudo` make it works. Seems the code is OK. @Yd Ahhrk – river Jul 05 '18 at 06:47
0

According to the connector driver:

If there are no listeners for given group -ESRCH can be returned.

So your userspace program was not doing right when your kernel sent the message.

seamlik
  • 173
  • 1
  • 6