0
int cas(int exchange_value,volatile int *dest,int compare_value){
    __asm__ volatile("lock cmpxchgl %1,(%3)"
            :"=a"(exchange_value)
            :"r"(exchange_value),"a"(compare_value),"r"(dest)
            :"memory");
    return exchange_value;
}
void* mysum(){
    int i=0;
    while(1){
        int res=cas(sum+1,&sum,sum)-sum;
        if(res){
            i++;
            if(i>10000000){
                break;
            }
        }
   }
}
int main(){
pthread_t thrd,thrd1;
pthread_create(&thrd,NULL,mysum,NULL);
pthread_create(&thrd1,NULL,mysum,NULL);
pthread_join(thrd,NULL);
pthread_join(thrd1,NULL);
printf("sum=%d\n",sum);
return 0;
}

when this program in single cpu, all will be fine. when this program run in multicore, the answer it output is not i expect. i think value of sum should be equal to 2i,but it always less than 2i. which step did i go wrong?

Peter Cordes
  • 328,167
  • 45
  • 605
  • 847
  • 1
    `cas(sum+1,&sum,sum)-sum` is undefined behaviour. It is undefined whether `cas` is called first (modifying `sum`) or whether the value of `sum` is evaluated first in the `-sum`. – kaylum Jun 14 '21 at 03:31
  • Separately from that bug, you could use `"+m"(*dest)` and avoid the `"memory"` clobber (if you want a "relaxed" compile time memory order). That would also let the compiler pick an addressing mode, instead of having to actually get the final address into a register. – Peter Cordes Jun 14 '21 at 07:58

0 Answers0