setjmp longjmp with Stack

Question

I have to implement a user-level thread library as a homework using setjmp/longjmp. This is the code I wrote:

#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <sys/select.h>
#include <time.h>
#include <errno.h>
#include <signal.h>
#include <setjmp.h>



#define STACKSIZE   128*1024    //approriate stack size

typedef void (*ult_func)();
struct tcb_str; //forward declaration

typedef struct tcb_str {
    //fill this struct with statusinformations
    stack_t stack;  //stack for local vars
    jmp_buf buf;
    int id;
    void (*threadfunc)();
    int waitingfor; //ID of Thread/ Filediscriptor the Thread is currently waiting for
    int waitingtype; //0 = Thread, 1 = fd
    int exitnumber; //Is set on exit
} tcb;

typedef struct queue queue;

struct queue *runqueue;
struct queue *blockedqueue;
struct queue *zombiequeue;

jmp_buf backtonormal;

int counter = 0;

struct queue {
    struct queue_node *start;
    struct queue_node *end;
};
struct queue_node {
    struct tcb_str *element;
    struct queue_node *next;
};

struct queue_node* pop(struct queue *qu) {

  if (qu == NULL || qu->start == NULL) {
    return NULL;
  }

  struct queue_node *node = qu->start;

  qu->start = node->next;
  if (qu->start == NULL) {
    qu->end = NULL;

  }
  node->next = NULL;
  return node;

}


int push(struct queue *qu, struct queue_node *node) {

  if (qu == NULL) {
    return -1;
  }
  node->next = NULL;
  if (qu->end == NULL) {
    qu->start = qu->end = node;
  } else {
    qu->end->next = node;
    qu->end = node;
  }
  return 1;
}


struct queue_node* removeByTid(struct queue *qu, int tid) {
    struct queue_node* tmp = qu->start;
    struct queue_node* previous = qu->start;
    if(tmp->element->id == tid) {
        pop(qu);
        return tmp;
    }

    do {
        if(tmp->element->id == tid) {
            //What if first and only
            previous->next = tmp->next;
            //What if only one left after
            tmp->next = NULL;
            if(qu->start->next == NULL) {
                qu->end = qu->start;
            }
            return tmp;
        }
        previous = tmp;
    }
    while((tmp = tmp->next));
    return NULL;
}

struct queue *initqueue() {
      queue *qu = malloc(sizeof(*qu));

      if (qu == NULL) {
        return NULL;
      }
      qu->start =  qu->end = NULL;
      return qu;
}
int checkfordata(int fd) {
    int data; //returns != 0 if data is available
    struct timeval tv_str;
    fd_set fds;
    FD_ZERO(&fds);


    if (!FD_ISSET(fd, &fds)) {
        FD_SET(fd, &fds);       //Creating fd_set for select()
    }
    tv_str.tv_sec = 0;
    tv_str.tv_usec = 0;
    //is data available?
    data = select(fd + 1, &fds, NULL, NULL, &tv_str);
    FD_CLR(fd, &fds);
    return data;
}

void schedulerThread() {
    while(1) {
    //Check blocked Threads
    struct queue_node* tmp = blockedqueue->start;
    fd_set fds;
    FD_ZERO(&fds);

    if(tmp != NULL) {

        //Go through blocked Threads
        do {
                int data = checkfordata(tmp->element->waitingfor);
                if(data > 0) {
                    removeByTid(blockedqueue, tmp->element->id);
                    //Add to running queue (at start)
                    tmp->next = runqueue->start;
                    runqueue->start = tmp;
                    return;
                }
                else {

                    FD_SET(tmp->element->waitingfor, &fds);
                }
        }
        while((tmp = tmp->next));
    }
    if(runqueue->start == NULL) {
        if(blockedqueue->start == NULL) {
            free(runqueue);
            free(blockedqueue);
            struct queue_node* qu;
            while((qu = pop(zombiequeue)) != NULL) {
                free(qu->element->stack.ss_sp);
                free(qu);
            }
            free(zombiequeue);
            return;
        }
        else {  
                    struct timeval tv_str;
                    tv_str.tv_sec = 0;
                    tv_str.tv_usec = 800 * 1000;
                    //We have to copy fds, as select will mess it up
                    fd_set fdset = fds;
                    select(FD_SETSIZE, &fdset, NULL, NULL, &tv_str);
        }
    }
    else {
        return;
    }
    }
}
/*
 This function only exists to tell the process to use an empty stack for the thread
 */
void signalHandlerSpawn( int arg ) {

    if ( setjmp( runqueue->start->element->buf ) ) {
        runqueue->start->element->threadfunc();
        longjmp(backtonormal, 1);
    }
    return;
}

int ult_spawn(ult_func f) {
    struct tcb_str* tcb = malloc(sizeof(struct tcb_str));
    tcb->threadfunc = f;
    tcb->waitingfor = -1;
    tcb->waitingtype = -1;
    tcb->id = ++counter;
    tcb->stack.ss_flags = 0;
    tcb->stack.ss_size = STACKSIZE;
    tcb->stack.ss_sp = malloc(STACKSIZE);
    if ( tcb->stack.ss_sp == 0 ) {
        perror( "Could not allocate stack." );
        exit( 1 );
    }
    stack_t oldStack;
    sigaltstack( &(tcb->stack), 0 );
    struct sigaction sa;
    struct sigaction oldHandler;
    sa.sa_handler = &signalHandlerSpawn;
    sa.sa_flags = SA_ONSTACK;
    sigemptyset( &sa.sa_mask );
    sigaction( SIGUSR1, &sa, &oldHandler );

    struct queue_node* node = malloc(sizeof(struct queue_node));
    node->element = tcb;
    push(runqueue, node);
    struct queue_node* q = runqueue->start;
    runqueue->start = runqueue->end;
    raise( SIGUSR1 );

    /* Restore the original stack and handler */
    sigaltstack( &oldStack, 0 );
    sigaction( SIGUSR1, &oldHandler, 0 );

    runqueue->start = q;

    return tcb->id;
}

void ult_yield() {
    if(runqueue->start == NULL) {
        exit(1);
        //TODO clean up
    }
    //We're the only one, so no need to schedule
    if(runqueue->start == runqueue->end && blockedqueue->start == NULL && runqueue->start != NULL) {
        return;
    }
    else {
        if (setjmp(runqueue->start->element->buf))
                 return;
        else {
            struct queue_node* tmp = pop(runqueue);
            push(runqueue, tmp);
            longjmp(backtonormal, 1);
        }
    }
}

int ult_read(int fd, void *buf, int count) {
        if (setjmp(runqueue->start->element->buf)) {
            return read(fd, buf, count);
        }
        else {
            struct queue_node* tmp = pop(runqueue);
            tmp->element->waitingfor = fd;
            tmp->element->waitingtype = 1;
            push(blockedqueue, tmp);
            longjmp(backtonormal, 1);
        }
    return -1;
}
void ult_init(ult_func f) {
    runqueue = initqueue();
    blockedqueue = initqueue();
    zombiequeue = initqueue();
    ult_spawn(f);
    while(1) {
        if(setjmp(backtonormal))
            continue;
        else {
            schedulerThread();
            if(runqueue->start == NULL)
                return; //TODO clean up
            longjmp(runqueue->start->element->buf, 1);
        }
    }
}


void threadA()
{
    int fd;
    char *inpt = "/dev/random";
    char buf[8];
    fd = open(inpt, O_RDONLY, O_NONBLOCK);

    if (fd == -1)
    {
        perror("open()");
    }
    while(1)
    {
        memset(buf, 0, 8);
        ult_read(fd, &buf, sizeof(buf));
    }
}

void threadB()
{
    char input[512] = {0};
    while(1)
    {
        memset(input, 0, 512);
        ult_read(STDIN_FILENO, &input, 512);
        if(strcmp(input, "stats\n") == 0)
        {
           //print stats
            continue;
        }
    }
}

void myInit()
{
    int status;
    ult_spawn(&threadA);
    ult_spawn(&threadB);
    while(1) {
        ult_yield();
    }
}
int  main() {
    ult_init(&myInit);
    return 0;
}

The problem occurs in ult_read. Once this function is called, it will jump back to the scheduler first. The scheduler checks whether data is available (to prevent the whole process from blocking) and jumps back to the function once there is data to read. Now when the function returns I get a segmentation fault. Valgrind is telling me:

Jump to the invalid address stated on the next line
==20408==    at 0x0: ???
==20408==  Address 0x0 is not stack'd, malloc'd or (recently) free'd

Ult_yield works just fine although it is using the same technique. I checked this question SetJmp/LongJmp: Why is this throwing a segfault?, but I think that's a different problem, as I'm creating a separate stack for each "thread". Can anyone explain to me, what the problem is and how to fix it?

Note: there is also sigsetjmp(). And I don't think you should jump in/out of signal handlers (unless you know what you are doing, in which case it is implementation-dependent) — wildplasser, Jun 26 '16 at 12:46
My code is based on this article: http://www.evanjones.ca/software/threading.html The author uses the same approach in the chapter "Implementing Fibers Using longjmp" — Simon, Jun 26 '16 at 13:58

score 1 · Accepted Answer · edited May 23 '17 at 12:32

1

I can't see anything obviously wrong with your code, but it is not an MVCE, so it is tough to tell -- there may be something off in your scheduler or your push and pop functions.

One thing that seems questionable is the tests in ult_yield and ult_read:

if(runqueue->start == NULL && blockedqueue->start == NULL) ...

These should both be:

if (runqueue->start == NULL) {
    printf("Scheduler queue corrupted");
    abort(); }

since when these functions are called, runqueue->start MUST point at the current thread's tcb/queue node.

Your valgrind error looks like it is trying to longjmp through an invalid jmp_buf, so try to backtrack to see where it came from and how it got into that state.

You should also probably unset the signal handler at the end of ult_spawn (or use SA_RESETHAND), lest a spurious SIGUSR1 from somewhere cause corruption of things.

edited May 23 '17 at 12:32

Community

1
1

answered Jun 26 '16 at 20:00

Chris Dodd

119,907
13
134
226

I edited the code section in my question to show the fullI code and also fixed the problem with the signal handler. I will check whether jmp_buf is set correctly, although I couldn't find anything wrong with it so far. But I'm kind of a newbie to C so the problem might be somewhere else. Thanks for your answer. – Simon Jun 26 '16 at 20:40
There's no `main` function, so its still not an [MVCE](http://stackoverflow.com/help/mcve) – Chris Dodd Jun 26 '16 at 20:53
I've added a sample main function. – Simon Jun 26 '16 at 21:14
I'm not sure exactly what is going wrong, but if you put in a check on the return value of `sigaltstack`, you will see that it is failing... – Chris Dodd Jun 27 '16 at 03:16
You're right. When the function ult_spawn(&myInit) is called, an alternative signal handler stack is set. Within the myInit function ult_spawn does not work properly, as sigaltstack can't change the stack when there is already an alternative stack active(EPERM error). So to fix it, I had to jump out of the signal handler context before calling sigaltstack again. – Simon Jun 27 '16 at 13:02

stark · Answer 2 · 2016-06-26T18:41:03.137

0

setjmp gets called once but returns twice. The problem is putting it in a signal handler. The second return (after longjmp) has no place to return to.

Modify your code so that setjmp is in the proper environment for the long jump.

Note that the signal handler environment being saved by setjmp is not the normal runtime. You can't call many common functions. I am not sure what the resulting environment will be after the longjmp.

One thing to try would be calling setjmp in your main routine and only setting a flag in your signal handler. Call longjmp when you check the flag and it is set.

edited Jun 26 '16 at 18:41

answered Jun 26 '16 at 12:53

stark

12,615
3
33
50

So the `longjmp(backtonormal, 1)` already returns ult_read? But is it not almost the same as in ult_yield where it works fine? Sorry I think I didn't fully get you yet. – Simon Jun 26 '16 at 13:54
In ult_yield longjmp is being called in the same context as setjmp. – stark Jun 26 '16 at 14:12
Thank you very much. I'll do some more research about this and will post the correct code once it's working. – Simon Jun 26 '16 at 14:33

setjmp longjmp with Stack

2 Answers2