I have recently implemented a fair reader-writer ticket-spinlock in C++. The code is fairly simple and I thought it was working great. I have integrated the spinlock into a larger application and I noticed that on some rare occasions, the code is just running extremely slowly while most of the time, it works really fast. I know it is due to the spinlock because if I replace it immediately with a simple reader-writer spinlock (not fair and no ticket), the code suddenly just runs much faster. It happened a few times on different machines. I know that those kind of locks can run slowly if you run them with more threads than cores but I ran it with 16 threads on a machine with 48 cores. I couldn't reproduce the issue on my laptop with 4 threads and 4 cores. Here is the code:
inline size_t rndup(size_t v) {
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v |= v >> 32;
v++;
return v;
}
class SpinLockRW_MCS {
public:
SpinLockRW_MCS(const size_t nb_readers) : writer(nullptr), lock_pool(nullptr), it_lock_pool(0),
load_lock_pool(0), mask_it(rndup(2 * nb_readers + 1) - 1),
padding1{0}, padding2{0}, padding3{0}, padding4{0} {
if (nb_readers <= std::thread::hardware_concurrency()){
lock_pool = new Lock[mask_it + 1];
lock_pool[0].is_locked = false;
}
}
~SpinLockRW_MCS() {
clear();
}
inline void clear() {
if (lock_pool != nullptr){
delete[] lock_pool;
lock_pool = nullptr;
}
writer = nullptr;
it_lock_pool = 0;
load_lock_pool = 0;
}
inline void acquire_reader() {
uint_fast32_t retry = 0;
const size_t prev_reader_id = it_lock_pool.fetch_add(1) & mask_it;
const size_t new_reader_id = (prev_reader_id + 1) & mask_it;
while (lock_pool[prev_reader_id].is_locked){
if (++retry > 100) this_thread::yield();
}
++load_lock_pool;
lock_pool[prev_reader_id].is_locked = true;
lock_pool[new_reader_id].is_locked = false;
}
inline void release_reader() {
--load_lock_pool;
}
inline void acquire_writer() {
uint_fast32_t retry = 0;
const size_t prev_reader_id = it_lock_pool.fetch_add(1) & mask_it;
const size_t new_reader_id = (prev_reader_id + 1) & mask_it;
while (lock_pool[prev_reader_id].is_locked){
if (++retry > 100) this_thread::yield();
}
while (load_lock_pool){
if (++retry > 100) this_thread::yield();
}
lock_pool[prev_reader_id].is_locked = true;
writer = &lock_pool[new_reader_id];
}
inline void release_writer() {
writer->is_locked = false;
}
inline void release_writer_acquire_reader() {
++load_lock_pool;
writer->is_locked = false;
}
private:
struct Lock {
std::atomic<bool> is_locked;
const int padding[15];
Lock() : is_locked(true), padding{0} {}
};
Lock* writer;
const int padding1[14];
Lock* lock_pool;
const int padding2[14];
const size_t mask_it;
const int padding3[14];
std::atomic<size_t> it_lock_pool;
const int padding4[14];
std::atomic<size_t> load_lock_pool;
};
Any suggestion would be greatly appreciated! Thanks!