15

As I know, to inform the user space from kernel space, one way is to using poll. That means kernel driver should provide poll method first. Below code is found from internet, and it really works!

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/proc_fs.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <asm/uaccess.h>
 
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Fortune Cookie Kernel Module");
MODULE_AUTHOR("M. Tim Jones");
 
#define MAX_COOKIE_LENGTH       PAGE_SIZE
 
static struct proc_dir_entry *proc_entry;
static char *cookie_buf;  // Space for fortune strings
static int write_index;   // Index to write next fortune
static int read_index;    // Index to read next fortune
 
ssize_t fortune_write( struct file *filp, const char __user *buff,
                        unsigned long len, void *data )
// Refer to: ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
{
  int space_available = (MAX_COOKIE_LENGTH-write_index);
 
  if (len > space_available) {
    printk(KERN_INFO "fortune: cookie buffer is full!\n");
    return -ENOSPC;
  }
 
  if (copy_from_user( &cookie_buf[write_index], buff, len )) {
    return -EFAULT;
  }
 
  write_index += len;
  cookie_buf[write_index-1] = 0;
 
  return len;
}
 
ssize_t fortune_read(struct file *file, char *buf, size_t count, loff_t *f_pos){
// Refer to: ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
    int len;
 
    //there's no fortune or a fortune has already been read
    //the *f_pos > 0 hack is needed because `cat /proc/fortune` would otherwise
    //display every thing in the cookie_buf
    if(write_index == 0 || *f_pos > 0){
        return 0;
    }
 
    // cicle through fortunes
    if(read_index >= write_index){
        read_index = 0;
    }
 
    len = sprintf(buf, "%s\n", &cookie_buf[read_index]);
 
    read_index += len;
    *f_pos += len;
 
    return len;
}
 
static const struct file_operations proc_test_fops = {
   .owner        = THIS_MODULE,
//    .open        = led_proc_open,
   .read        = fortune_read,
//    .llseek        = seq_lseek,
//    .release    = single_release,
   .write        = fortune_write,
//    unsigned int (*poll) (struct file *, struct poll_table_struct *);
//    int (*fasync) (int, struct file *, int);
};
 
int __init init_fortune_module( void )
{
    int ret = 0;
    cookie_buf = (char *)vmalloc( MAX_COOKIE_LENGTH );
    if (!cookie_buf) {
        ret = -ENOMEM;
    } else {
        memset( cookie_buf, 0, MAX_COOKIE_LENGTH );
//        proc_entry = create_proc_entry( "fortune", 0644, NULL );
        proc_entry = proc_create( "fortune", 0644, NULL, &proc_test_fops );
 
        if (proc_entry == NULL) {
            ret = -ENOMEM;
            vfree(cookie_buf);
            printk(KERN_INFO "fortune: Couldn't create proc entry\n");
        } else {
            write_index = 0;
            read_index = 0;
            printk(KERN_INFO "fortune: Module loaded.\n");
        }
    }
 
    return ret;
}
 
void __exit exit_fortune_module( void )
{
//    remove_proc_entry("fortune", &proc_entry);
    proc_remove(proc_entry);
    vfree(cookie_buf);
    printk(KERN_INFO "fortune: Module unloaded.\n");
}
 
module_init( init_fortune_module );
module_exit( exit_fortune_module );

I can do like this to make it work:

echo "hello" > /proc/fortune

And then

cat /proc/fortune

to see the result.

But how to add poll method to it? I tried some times, but still failed.

halfer
  • 19,824
  • 17
  • 99
  • 186
Tom Xue
  • 3,169
  • 7
  • 40
  • 77

2 Answers2

16

You can find some good examples in kernel itself. Take a look at next files:

To add poll() function to your code follow next steps.

  1. Include needed headers:

     #include <linux/wait.h>
     #include <linux/poll.h>
    
  2. Declare waitqueue variable:

     static DECLARE_WAIT_QUEUE_HEAD(fortune_wait);
    
  3. Add fortune_poll() function and add it (as .poll callback) to your file operations structure:

     static unsigned int fortune_poll(struct file *file, poll_table *wait)
     {
         poll_wait(file, &fortune_wait, wait);
         if (new-data-is-ready)
             return POLLIN | POLLRDNORM;
         return 0;
     }
    
     static const struct file_operations proc_test_fops = {
         ....
         .poll = fortune_poll,
     };
    

    Note that you should return POLLIN | POLLRDNORM if you have some new data to read, and 0 in case there is no new data to read (poll() call timed-out). See man 2 poll for details.

  4. Notify your waitqueue once you have new data:

     wake_up_interruptible(&fortune_wait);
    

That's the basic stuff about implementing poll() operation. Depending on your task, you may be needed to use some waitqueue API in your .read function (like wait_event_interruptible()).


See also related question: Implementing poll in a Linux kernel module.

Sam Protsenko
  • 14,045
  • 4
  • 59
  • 75
  • This question was very helpful to me, but I'm still confused. I have a related question here: https://stackoverflow.com/questions/34027366/implementing-poll-in-a-linux-kernel-module – zmb Dec 01 '15 at 18:22
  • This is a great overview! There is a LOT more info in this "Linux Device Drivers" chapter: https://www.oreilly.com/library/view/linux-device-drivers/0596005903/ch06.html – Nick Crews Feb 01 '19 at 17:43
  • Is there a reason why `new-data-is-ready` was deliberately left as pseudo-code rather than a real variable/function? – Ken Lin Jul 10 '20 at 08:42
  • @KenLin The way to check for a new data might be different, depending on each particular case. Usually the variable is used for that, which can be set to `true` in interrupt handler (which signals the new data has arrived), and it can be set to `false` in `read()` function, when all data is consumed. But in theory there might be the cases when new data should be checked in other way (e.g. by checking circular buffer). The code in my answer is not complete, of course, as there is no single use case for the `poll()` mechanism. – Sam Protsenko Jul 10 '20 at 09:22
  • Is it safe to say that if we were to set the variable as `true` in the interrupt handler, we would also notify the appropriate waitqueue using `wake_up_interruptible` (as per step 4. in your answer) in the interrupt handler? These two things seem have similar goals: let the system know there is new data. – Ken Lin Jul 10 '20 at 19:56
  • By the way, I know this answer is really old but the links at the top are 404 not found now! – Ken Lin Jul 10 '20 at 19:57
  • 1
    @KenLin Updated the links, thanks for letting me know. Answering your question: yes, I forgot to mention it. Please take a look at this [example](https://github.com/joe-skb7/kernel-lectures/blob/master/lecture17-hw2/materials/module3/hw3.c) I wrote some time ago for my students. In `hw3_poll()` the `poll_wait()` sleeps and waits to be awaken, then `data_ready` is checked to be sure we were awaken because there is actually new data available to read. And in `hw3_btn_isr()` (interrupt handler) the `data_ready` is set to `true`, and then `wake_up_interruptible()` called to wake up the wait queue. – Sam Protsenko Jul 12 '20 at 11:06
9

Minimal runnable example

GitHub upstream with QEMU + Buildroot boilerplate:

In this simplified example, we generate poll events from a separate thread. In real life, poll events will likely be triggered by interrupts, when the hardware has finished some job, and new data became available for userland to read.

The main point to remember is that if poll returns zero, the kernel calls it again: Why do we need to call poll_wait in poll?

poll.ko

#include <linux/debugfs.h>
#include <linux/delay.h> /* usleep_range */
#include <linux/errno.h> /* EFAULT */
#include <linux/fs.h>
#include <linux/jiffies.h>
#include <linux/kernel.h> /* min */
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/poll.h>
#include <linux/printk.h> /* printk */
#include <linux/uaccess.h> /* copy_from_user, copy_to_user */
#include <linux/wait.h> /* wait_queue_head_t, wait_event_interruptible, wake_up_interruptible  */
#include <uapi/linux/stat.h> /* S_IRUSR */

static int ret0 = 0;
module_param(ret0, int, S_IRUSR | S_IWUSR);
MODULE_PARM_DESC(i, "if 1, always return 0 from poll");

static char readbuf[1024];
static size_t readbuflen;
static struct dentry *debugfs_file;
static struct task_struct *kthread;
static wait_queue_head_t waitqueue;

static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off)
{
    ssize_t ret;
    if (copy_to_user(buf, readbuf, readbuflen)) {
        ret = -EFAULT;
    } else {
        ret = readbuflen;
    }
    /* This is normal pipe behaviour: data gets drained once a reader reads from it. */
    /* https://stackoverflow.com/questions/1634580/named-pipes-fifos-on-unix-with-multiple-readers */
    readbuflen = 0;
    return ret;
}

/* If you return 0 here, then the kernel will sleep until an event
 * happens in the queue. and then call this again, because of the call to poll_wait. */
unsigned int poll(struct file *filp, struct poll_table_struct *wait)
{
    pr_info("poll\n");
    /* This doesn't sleep. It just makes the kernel call poll again if we return 0. */
    poll_wait(filp, &waitqueue, wait);
    if (readbuflen && !ret0) {
        pr_info("return POLLIN\n");
        return POLLIN;
    } else {
        pr_info("return 0\n");
        return 0;
    }
}

static int kthread_func(void *data)
{
    while (!kthread_should_stop()) {
        readbuflen = snprintf(
            readbuf,
            sizeof(readbuf),
            "%llu",
            (unsigned long long)jiffies
        );
        usleep_range(1000000, 1000001);
        pr_info("wake_up\n");
        wake_up(&waitqueue);
    }
    return 0;
}

static const struct file_operations fops = {
    .owner = THIS_MODULE,
    .read = read,
    .poll = poll
};

static int myinit(void)
{
    debugfs_file = debugfs_create_file(
        "lkmc_poll", S_IRUSR | S_IWUSR, NULL, NULL, &fops);
    init_waitqueue_head(&waitqueue);
    kthread = kthread_create(kthread_func, NULL, "mykthread");
    wake_up_process(kthread);
    return 0;
}

static void myexit(void)
{
    kthread_stop(kthread);
    debugfs_remove(debugfs_file);
}

module_init(myinit)
module_exit(myexit)
MODULE_LICENSE("GPL");

poll.out userland:

#define _XOPEN_SOURCE 700
#include <assert.h>
#include <fcntl.h> /* creat, O_CREAT */
#include <poll.h> /* poll */
#include <stdio.h> /* printf, puts, snprintf */
#include <stdlib.h> /* EXIT_FAILURE, EXIT_SUCCESS */
#include <unistd.h> /* read */

int main(int argc, char **argv) {
    char buf[1024];
    int fd, i, n;
    short revents;
    struct pollfd pfd;

    if (argc < 2) {
        fprintf(stderr, "usage: %s <poll-device>\n", argv[0]);
        exit(EXIT_FAILURE);
    }
    fd = open(argv[1], O_RDONLY | O_NONBLOCK);
    if (fd == -1) {
        perror("open");
        exit(EXIT_FAILURE);
    }
    pfd.fd = fd;
    pfd.events = POLLIN;
    while (1) {
        puts("poll");
        i = poll(&pfd, 1, -1);
        if (i == -1) {
            perror("poll");
            assert(0);
        }
        revents = pfd.revents;
        printf("revents = %d\n", revents);
        if (revents & POLLIN) {
            n = read(pfd.fd, buf, sizeof(buf));
            printf("POLLIN n=%d buf=%.*s\n", n, n, buf);
        }
    }
}

Usage:

insmod poll.ko
mount -t debugfs none /sys/kernel/debug
./kernel_modules/poll.out /sys/kernel/debug/lkmc_poll

Outcome: jiffies gets printed to stdout every second from userland, e.g.:

poll
<6>[    4.275305] poll
<6>[    4.275580] return POLLIN
revents = 1
POLLIN n=10 buf=4294893337
poll
<6>[    4.276627] poll
<6>[    4.276911] return 0
<6>[    5.271193] wake_up
<6>[    5.272326] poll
<6>[    5.273207] return POLLIN
revents = 1
POLLIN n=10 buf=4294893588
poll
<6>[    5.276367] poll
<6>[    5.276618] return 0
<6>[    6.275178] wake_up
<6>[    6.276370] poll
<6>[    6.277269] return POLLIN
revents = 1
POLLIN n=10 buf=4294893839

Force the poll file_operation to return 0 to see what happens more clearly:

insmod poll.ko ret0=1

Sample output:

poll
<6>[   85.674801] poll
<6>[   85.675788] return 0
<6>[   86.675182] wake_up
<6>[   86.676431] poll
<6>[   86.677373] return 0
<6>[   87.679198] wake_up
<6>[   87.680515] poll
<6>[   87.681564] return 0
<6>[   88.683198] wake_up

From this we see that control is not returned to userland: the kernel just keeps calling the poll file_operation again and again.

Tested on Linux 5.4.3.

Ciro Santilli OurBigBook.com
  • 347,512
  • 102
  • 1,199
  • 985
  • "This gets called again every time an event happens in the wait queue." Wouldn't this mean the same `poll_wait(filp, &waitqueue, wait);` gets called multiple times? Isn't that bad because the same `flip` and `waitqueue` will be added to the poll table? – Ken Lin Jul 10 '20 at 08:06
  • Or maybe the idea is that when `poll` "gets called again when an event happens in the wait queue", it should return a nonzero value to inform the kernel that the device is ready for I/O operation. Thus, the kernel will deallocate the poll table and return control to the userland?? – Ken Lin Jul 10 '20 at 08:08
  • @KenLin I'm not sure how it works internally, I'm mostly copying examples from other sources most likely :-) But I think a quick look into poll syscall definition will quickly clarify that, but I'm lazy now. Just from this, it does seem that the kernel allocates/deallocates something automatically. Let me know if you manage understand it better later on or find a problem with the current setup. I added a gazillion prints as well to the example to make things clearer. – Ciro Santilli OurBigBook.com Jul 11 '20 at 16:08
  • 1
    Thank you so much! I actually also tried more-or-less the same of what you mentioned in the updated answer and came to the same conclusions. Nothing beats trying things out yourself I guess! – Ken Lin Jul 11 '20 at 18:59