0

While playing around with the code below I noticed that a network namespace can be kept alive without neither a process attached to it nor any "direct" reference by an open file descriptor to the nsfs inode.

How can I discover the network namespace an open (RT)NETLINK socket refers to, given only the NETLINK socket?

Running the following code with go run -exec sudo main.go will first create a new network namespace in a separate OS-locked goroutine. The same thread will then open a NETLINK socket for RTNETLINK in this new network namespace. The file descriptor is passed back to the initial (and also locked) thread/goroutine and the separate goroutine/thread terminates. This will leave the process with a set of threads (tasks) that are attached to the original network namespace when starting the code. There is now no thread anymore directly attached to the new network namespace nor any direct namespace fd reference.

There finally is only the NETLINK/RTNETLINK socket left and only indirectly references the new network namespace. Yet, the network namespace is kept alive, as can be seen from the fact that on each query every 30s the code can still successfully list the network interfaces in the new network namespace.

package main

import (
    "runtime"
    "strings"
    "syscall"
    "time"

    "github.com/vishvananda/netlink"
    "golang.org/x/sys/unix"
)

// creator creates a new network namespace, enters it and passed back a
// reference to it, then waits to be signalled to exit the network namespace so
// it might collapse if everything goes to plan. (which plan???)
//
// This function must be called on its own goroutine.
func creator(done <-chan struct{}, handle chan<- *netlink.Handle) {
    println("creator thread:", syscall.Gettid())
    // Make sure to lock this goroutine to its current OS-level thread, as the
    // unshare syscall will affect only the thread it is called from. Switching
    // threads would be ... slightly bad. We don't unlock, so upon return and
    // fallout off the edge of our disk world this OS thread will be killed and
    // never reused.
    runtime.LockOSThread()
    println("creator: locked OS-level thread")

    err := syscall.Unshare(syscall.CLONE_NEWNET)
    if err != nil {
        panic("cannot create and enter new network namespace: " + err.Error())
    }
    println("creator: in new network namespace")

    nlHandle, err := netlink.NewHandle(unix.NETLINK_ROUTE)
    if err != nil {
        panic("cannot open RTNETLINK connection: " + err.Error())
    }
    println("creator: sending RTNETLINK handle")
    handle <- nlHandle
    println("creator: handle sent")

    <-done // wait for channel to be closed.
    println("creator: falling off")
    // ...simply fall off the edge.
}

func main() {
    runtime.LockOSThread()
    println("main thread:", syscall.Gettid())

    done := make(chan struct{})
    handle := make(chan *netlink.Handle)
    go creator(done, handle)
    nlHandle := <-handle
    println("received RTNETLINK handle")

    println("telling creator to stop")
    close(done)

    for {
        links, err := nlHandle.LinkList()
        if err != nil {
            panic("RTNETLINK failed: " + err.Error())
        }
        var names []string
        for _, link := range links {
            names = append(names, link.Attrs().Name)
        }
        println("nifs found:", strings.Join(names, ", "))
        time.Sleep(30 * time.Second)
    }
}
TheDiveO
  • 2,183
  • 2
  • 19
  • 38

2 Answers2

1

We can use bpf to trace a kprobe netlink_sendmsg which is the entry function of all netlink syscalls.

Here's the source code extracting from my Linux DESKTOP-GUGSID8 5.10.16.3-microsoft-standard-WSL2:

static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)

A simple investigation of struct socket will reveal the fact that sock->sk->__sk_common.skc_net.net->ns.inum denotes the netns id, and sock->file->f_inode->i_ino denotes the inode number.

For instance, my

ls -l /proc/self/ns/net

outputs

lrwxrwxrwx 1 gray gray 0 Oct 31 19:40 /proc/self/ns/net -> 'net:[4026531992]'

and the 4026531992 above is the netns id.

Therefore we can compose a easy bpftrace oneliner to catch every netlink_sendmsg call with its comm, pid, nsid and inode_no:

sudo bpftrace -e 'k:netlink_sendmsg {$sk=*(int64*)(arg0+24); $net=*(int64*)($sk+48); $nsid=*(int64*)($net+136); $file=*(int64*)(arg0+16); $inode=*(int64*)($file+32); $ino=*(int64*)($inode+64); printf("pid %d, comm %s, inode %llu, nsid %u\n", pid, comm, $ino, $nsid)}'

And this is the output on my laptop:

Attaching 1 probe...
pid 3091, comm sudo, inode 21283, nsid 4026531992
pid 3091, comm sudo, inode 21283, nsid 4026531992
pid 1285, comm netlink_ns, inode 26694, nsid 4026532211
pid 1285, comm netlink_ns, inode 26694, nsid 4026532211
^C

We can confirm the inode 26694 is the netlink socket created in a separate netns using lsof(8):

$ sudo lsof -p $(pidof netlink_ns) | grep NETLINK
netlink_n 1049 root    3u     sock    0,7      0t0  26694 protocol: NETLINK

Such information provides enough clues to tell what netns a netlink socket is linking.

  • I see value of this answer in explorative and forensic system analysis. Unfortunately, attaching a kprobe to netlink_sendmsg looks very heavy handed to me, with severe performance penalty, so something not acceptable on many production systems, unfortunately. – TheDiveO Apr 14 '23 at 20:55
0

The clue to a less heavy-handed solution (than kprobes) is somewhat hidden inside user313992's answer to How to get the Linux network namespace for a tap/tun device referenced in /proc/[PID]/fdinfo/[FD]?: the (at this time new) ioctl SIOCGSKNS. This can be applied to (not only) NETLINK sockets, where the socket fds can be duplicated across processes and then successfully queried using the SIOCGSKNS ioctl.

The proof-of-concept code (that relies on some ugly unsafe reflection "foodoo", thanks to How to access unexported struct fields?):

package main

import (
    "os"
    "reflect"
    "runtime"
    "strconv"
    "strings"
    "syscall"
    "time"
    "unsafe"

    "github.com/vishvananda/netlink"
    "github.com/vishvananda/netlink/nl"
    "golang.org/x/sys/unix"
)

// creator creates a new network namespace, enters it and passed back a
// reference to it, then waits to be signalled to exit the network namespace so
// it might collapse if everything goes to plan. (which plan???)
//
// This function must be called on its own goroutine.
func creator(done <-chan struct{}, handle chan<- *netlink.Handle) {
    println("creator thread:", syscall.Gettid())
    // Make sure to lock this goroutine to its current OS-level thread, as the
    // unshare syscall will affect only the thread it is called from. Switching
    // threads would be ... slightly bad. We don't unlock, so upon return and
    // fallout off the edge of our disk world this OS thread will be killed and
    // never reused.
    runtime.LockOSThread()
    println("creator: locked OS-level thread")

    err := syscall.Unshare(syscall.CLONE_NEWNET)
    if err != nil {
        panic("cannot create and enter new network namespace: " + err.Error())
    }
    netnslnk, err := os.Stat("/proc/thread-self/ns/net")
    if err != nil {
        panic("cannot determine task's new network namespace: " + err.Error())
    }
    println("creator: in new network namespace net:[" +
        strconv.FormatUint(netnslnk.Sys().(*syscall.Stat_t).Ino, 10) + "]")

    nlHandle, err := netlink.NewHandle(unix.NETLINK_ROUTE)
    if err != nil {
        panic("cannot open RTNETLINK connection: " + err.Error())
    }
    println("creator: sending RTNETLINK handle")
    handle <- nlHandle
    println("creator: handle sent")

    <-done // wait for channel to be closed.
    println("creator: falling off")
    // ...simply fall off the edge.
}

func main() {
    runtime.LockOSThread()
    println("main thread:", syscall.Gettid())

    done := make(chan struct{})
    handle := make(chan *netlink.Handle)
    go creator(done, handle)
    nlHandle := <-handle
    println("received RTNETLINK handle")

    netnslnk, err := os.Stat("/proc/thread-self/ns/net")
    if err != nil {
        panic("cannot determine main task's network namespace: " + err.Error())
    }
    println("main is still in network namespace net:[" +
        strconv.FormatUint(netnslnk.Sys().(*syscall.Stat_t).Ino, 10) + "]")

    rNlhandle := reflect.ValueOf(nlHandle).Elem().FieldByName("sockets")
    rNlhandle = reflect.NewAt(rNlhandle.Type(), unsafe.Pointer(rNlhandle.UnsafeAddr())).Elem()
    for _, sock := range rNlhandle.Interface().(map[int]*nl.SocketHandle) {
        rSocket := reflect.ValueOf(sock.Socket).Elem().FieldByName("fd")
        rSocket = reflect.NewAt(rSocket.Type(), unsafe.Pointer(rSocket.UnsafeAddr())).Elem()
        fd := rSocket.Interface().(int32)
        netnsfd, err := unix.IoctlRetInt(int(fd), unix.SIOCGSKNS)
        if err != nil {
            panic("cannot query netns fd of RTNETLINK fd: " + err.Error())
        }
        var stat unix.Stat_t
        if err := unix.Fstat(netnsfd, &stat); err != nil {
            panic("cannot stat netns fd: " + err.Error())
        }
        println("received netlink socket is connected to net:[" +
            strconv.FormatUint(stat.Ino, 10) + "]")
    }

    println("telling creator to stop")
    close(done)

    for {
        links, err := nlHandle.LinkList()
        if err != nil {
            panic("RTNETLINK failed: " + err.Error())
        }
        var names []string
        for _, link := range links {
            names = append(names, link.Attrs().Name)
        }
        println("nifs found:", strings.Join(names, ", "))
        time.Sleep(30 * time.Second)
    }
}

The output should be similar to:

go run -exec sudo .
main thread: 60063
creator thread: 60067
creator: locked OS-level thread
creator: in new network namespace net:[4026532257]
creator: sending RTNETLINK handle
creator: handle sent
received RTNETLINK handle
main is still in network namespace net:[4026531840]
received netlink socket is connected to net:[4026532257]
telling creator to stop
creator: falling off
nifs found: lo
TheDiveO
  • 2,183
  • 2
  • 19
  • 38