Hello,
Currently I am learning how to write multithreaded programs and I tried to implement spinlock in x86-64 assembly. It works by checking L0C_A1 local variable, if it is equal to 1 then thread can enter critical section, if it is equal to 0 then thread will wait in a loop. In critical section thread reads x variable, then squares it and then stores new value back. Execution is delayed by nanosleep system call. I face the following problem: only 1st thread executes critical section. However if I set breakpoint in gdb at line "movb $1,.L0C_A1(%rip) # leave critical section" and step through it and then continue execution then next thread will enter critical section. Why does it work this way? Is it related to cache memory?
Source code:
C code:
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#define MAX_THREADS 10
extern void* func(void*);
unsigned long int x = 3;
static pthread_t thr[MAX_THREADS];
int main(int argc, char *argv[])
{
unsigned int i;
for(i = 0; i < MAX_THREADS; ++i)
{
pthread_create(&thr[i], NULL, func, NULL);
}
for(i = 0; i < MAX_THREADS; ++i)
{
pthread_join(thr[i], NULL);
}
printf("%lu\n", x);
exit(0);
}
Assembly code:
.globl func
.extern x
.section .text,"ax",@progbits
func:
shrb $1,.L0C_A1(%rip) # check if critical section can be entered
jnc func
movq x(%rip),%rax # load x
xorl %esi,%esi
pushq $10000000
pushq %rsi
movq %rsp,%rdi
pushq %rax
leal 35(%rsi),%eax
syscall # sleep 10 ms
pop %rax
addq $16,%rsp
xorl %edx,%edx
mulq %rax # x = x * x
movq %rax,x(%rip) # store new x
movb $1,.L0C_A1(%rip) # leave critical section
xorl %eax,%eax
ret
.section .data,"aw",@progbits
.L0C_A1: .byte 1