Just to provide some empirical evidence for these theoretical arguments:
Here is a test case where several threads use xadd
to increment a shared counter. On an i7-8565U with 4 cores, it outputs
unlocked: counter = 1633267, expected 4000000
locked: counter = 4000000, expected 4000000
which clearly shows that xadd
without lock
is NOT atomic.
The code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <pthread.h>
unsigned long counter = 0;
#define COUNTS_PER_THREAD 1000000UL
#define THREADS 4
void *unlocked_worker(void *unused) {
(void)unused;
for (unsigned long i = 0; i < COUNTS_PER_THREAD; i++) {
unsigned long inc = 1;
asm volatile("xaddq %0, %1" : "+r" (inc), "+m" (counter));
}
return NULL;
}
void *locked_worker(void *unused) {
(void)unused;
for (unsigned long i = 0; i < COUNTS_PER_THREAD; i++) {
unsigned long inc = 1;
asm volatile("lock; xaddq %0, %1" : "+r" (inc), "+m" (counter));
}
return NULL;
}
void run_threads(int lock) {
void *(*worker)(void *) = lock ? locked_worker : unlocked_worker;
counter = 0;
pthread_t th[THREADS];
for (int i = 0; i < THREADS; i++) {
int err = pthread_create(&th[i], NULL, worker, NULL);
if (err != 0) {
fprintf(stderr, "pthread_create: %s\n", strerror(err));
exit(1);
}
}
for (int i = 0; i < THREADS; i++) {
int err = pthread_join(th[i], NULL);
if (err != 0) {
fprintf(stderr, "pthread_join: %s\n", strerror(err));
exit(1);
}
}
printf("%s: counter = %lu, expected %lu\n",
lock ? "locked" : "unlocked",
counter, COUNTS_PER_THREAD * THREADS);
}
int main(void) {
run_threads(0);
run_threads(1);
return 0;
}