I have a multi-threaded application where multiple threads access the same variable - to a pointer. volatile would not be used for the purpose of ensuring atomic access (which doesn't exists), rather to ensure compile doesn't kick in hard optimization because it thinks that one thread cannot affect thread 2 and says no way it can change....
I've seen several volatile is useless in multi-threaded applications post. I have a linked list, that is accessed in 2
threads. Effectively does the following:
Thread 1
- Allocates memory for structure
- Puts the thread to linked list
- Processes the linked list and checks when some value is set to
xyz
Thread 2
- Reads the linked list
- Does some processing
- Sets the value to
xyz
All operations to linked list are protected by system mutex (FreeRTOS), but this doesn't ensure thread 2
sees the same value as thread 1
from start linked list pointer.
There are in my opinion 2
critical parts in the code:
- What threads see from
list_starts
pointer - What threads see in the
e->data
value inside the pointer
This is the code, that is done wrongly - it is a reference code, not used in the production.
#include <stdint.h>
#include <stdlib.h>
#include <stdatomic.h>
typedef struct mytype {
struct mytype* next;
int data;
} mytype_t;
typedef _Atomic mytype_t mytype_atomic_t;
mytype_t* list_starts;
//mytype_atomic_t* list_starts;
void
thread1() {
//Start thread2 here...
//start_thread(thread2);
//Run the thread here...
while (1) {
//mutex_lock();
/* Create the entry */
mytype_t* entry = malloc(sizeof(*entry));
if (entry != NULL) {
entry->next = list_starts;
list_starts = entry;
}
//mutex_unlock();
//mutex_lock();
start_over:
for (mytype_t* e = list_starts, *prev = NULL; e != NULL; prev = e, e = e->next) {
//mutex_unlock();
//Simply wait to become 3
while (e->data != 3) {}
//mutex_lock();
//Remove from the list
if (prev == NULL) {
list_starts = e->next;
} else {
prev->next = e->next;
}
free(e);
goto start_over;
}
//mutex_unlock();
}
}
Godbolt with -mcpu=cortex-m4 -O2
produces: https://godbolt.org/z/T84K3x3G4
thread1:
push {r4, lr}
ldr r4, .L12
.L4:
movs r0, #8
bl malloc
cbz r0, .L2
ldr r3, [r4]
str r3, [r0]
.L6:
//Critical part - loaded once
ldr r3, [r0, #4]
.L5:
//Comparison and BNE between self -> no reload
cmp r3, #3
bne .L5
ldr r3, [r0]
str r3, [r4]
bl free
ldr r0, [r4]
cmp r0, #0
bne .L6
b .L4
.L2:
ldr r0, [r4]
cmp r0, #0
beq .L4
b .L6
.L12:
.word .LANCHOR0
list_starts:
If we change the original code by using mytype_atomic_t*
type in the for
loop, like so
for (mytype_atomic_t* e = list_starts, *prev = NULL; e != NULL; prev = e, e = e->next) {
while (e->data != 3) {}
}
Then godbolt produces: https://godbolt.org/z/94an17x1G
thread1:
push {r3, r4, r5, lr}
ldr r4, .L13
ldr r5, [r4]
.L4:
movs r0, #8
bl malloc
cbz r0, .L2
str r5, [r0]
str r0, [r4]
.L6:
adds r2, r0, #4
.L5:
//Barrier
dmb ish
//Load first...
ldr r3, [r2]
dmb ish
cmp r3, #3
//Jump back to pre-load
bne .L5
dmb ish
ldr r3, [r0]
dmb ish
str r3, [r4]
bl free
ldr r0, [r4]
cmp r0, #0
bne .L6
.L7:
movs r5, #0
b .L4
.L2:
cmp r5, #0
beq .L7
mov r0, r5
b .L6
.L13:
.word .LANCHOR0
list_starts:
The ultimate question - is it enough to simply:
- declare custom type with
_Atomic
keyword? - Do not use volatile at all in such case?