I am running this code on Intel XEON Gold in order to measure the latency because of shared memory access. I have created 5 threads running on different cores and there is shared memory for inter core communication.
#define _GNU_SOURCE
#define _POSIX_C_SOURCE 200112L
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <linux/mman.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
#include <sched.h>
typedef uint64_t time_tst;
#define NUM_THREADS 5
struct thread_info {
int core_id;
int *addr;
};
time_tst time_tcv(void)
{
unsigned long low, high;
__asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
return (((uint64_t)high << 32) | low);
}
void* create_shared_memory(size_t size)
{
int fd = shm_open("carmv2shm", O_CREAT|O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH |
S_IWOTH);
if (!fd){
printf("shm open error \n");
return 0;
}
else
{
ftruncate(fd, 0x1000*size);
return mmap(NULL, 0x1000*size, PROT_READ | PROT_WRITE, MAP_LOCKED|MAP_SHARED_VALIDATE, fd, 0);
}
}
void* thread_func(void *args)
{
struct thread_info *thread_info = args;
pthread_t self = pthread_self();
const unsigned int core_id = thread_info->core_id;
cpu_set_t set;
CPU_ZERO(&set);
CPU_SET(core_id, &set);
if(pthread_setaffinity_np(self, sizeof(set), &set) < 0){
printf("Error setting affinity \n");
}
time_tst t1 = 0;
time_tst t2 = 0;
char message[] = "hello message";
t1 = time_tcv();
memcpy(thread_info->addr, message, sizeof(message));
t2 = time_tcv();
printf("thread id %u core id %u time diff 0x%" PRIu64 "\n", (unsigned int)self, core_id, (t2-t1));
return 0;
}
int main()
{
int i = 0;
pthread_mutex_init(&lock, NULL);
void* shmem = create_shared_memory(128);
struct thread_info *thread_info = calloc(NUM_THREADS, sizeof(struct thread_info));
thread_info->addr = shmem;
pthread_t tid[NUM_THREADS];
while(i<NUM_THREADS)
{
thread_info->core_id = i + 1;
pthread_create(&tid[i], NULL, thread_func, (void*)thread_info);
usleep(1);
i++;
}
i = 0;
while(i<NUM_THREADS)
{
pthread_join(tid[i], NULL);
i++;
}
return 1;
}
The output is :
thread id 2912491264 core id 1 time diff 0x6312
thread id 2904098560 core id 2 time diff 0x486
thread id 2895705856 core id 3 time diff 0x498
thread id 2753095424 core id 4 time diff 0x522
thread id 2818569984 core id 5 time diff 0x230
This time difference looks quite high to me. Could anyone suggest how to
reduce this difference.
Thanks