I am doing some time calculation tests in Linux. My kernel is Preempt-RT (however vanilla kernel gives similar results in my tests...)
I have two pthreads, running concurently in the same processor (affinitty given). They are real-time threads (prio 99).
I have a critical section protected by a spin lock, where two threads compete for the lock. Inside the critical section, I have a single increment operation and I try to calculate the elapsed time for this operation.
Code example with __rdtsc:
pthread_spin_lock(&lock);
start_time = __rdtsc();
++cnt; //shared ram variable, type is unsigned long long
stop_time = __rdtsc();
pthread_spin_unlock(&lock);
Code example with chrono timer:
pthread_spin_lock(&lock);
auto _start_time = std::chrono::high_resolution_clock::now();
++cnt; //shared ram variable, type is unsigned long long
auto _stop_time = std::chrono::high_resolution_clock::now();
pthread_spin_unlock(&lock);
Threads run in loop for a couple of million times and then terminate. After unlocking the spin lock, I log the the average elapsed time and the maximum elapsed time.
Now, here the things go interesting (at least for me):
Test 1: Threads have the scheduling policy as SCHED_RR:
Thread no: 0, Max Time: 34124, Avg Time: 28.114271, Run Cnt: 10000000
Thread no: 1, Max Time: 339256976, Avg Time: 74.781960, Run Cnt: 10000000
Test 2: Threads have the scheduling policy as SCHED_FIFO:
Thread no: 0, Max Time: 33114, Avg Time: 48.414173, Run Cnt: 10000000
Thread no: 1, Max Time: 38637, Avg Time: 24.327742, Run Cnt: 10000000
Test 3: Only single thread, the scheduling policy is SCHED_RR:
Thread no: 0, Max Time: 34584, Avg Time: 54.165470, Run Cnt: 10000000
Note: Main thread is a non-rt thread which has an affinity in a seperate processor. It has no important here.
Note1: All tests give approx. similar results everytime I run them.
Note2: The results given are output of the rdtsc. However, the chrono timer results are nearly similar to these.
So I think I may have a mis-understanding of the scheduler, so I need to ask these questions:
- How does the huge maximum time spikes occur in the test 1? Test 2&3 don't behave like it...
- Why is there a very large gap between the maximum and the average calculations? What causes this, an interrupt like timer?
My all test code is:
#include <stdio.h>
#include <stdlib.h>
#include "stdint.h"
#include <float.h>
#include <pthread.h>
#include <cxxabi.h>
#include <limits.h>
#include <sched.h>
#include <sys/mman.h>
#include <unistd.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <malloc.h>
#include <chrono>
/********* TEST CONFIG ************/
#define TEST_PTHREAD_RUN_CNT 10000000 //1000000000
#define NUM_OF_TEST_PTHREADS 2
#define MAIN_THREAD_CORE_INDEX 0
#define TEST_PTHREAD_PRIO 99
#define TEST_PTHREAD_POLICY SCHED_RR
#define TIME_RDTSC 1
#define TIME_CHRONO 0
/**********************************/
/**********************************/
struct param_list_s
{
unsigned int thread_no;
};
/**********************************/
/********* PROCESS RAM ************/
pthread_t threads[NUM_OF_TEST_PTHREADS];
struct param_list_s param_list[NUM_OF_TEST_PTHREADS];
unsigned long long max_time[NUM_OF_TEST_PTHREADS];
unsigned long long _max_time[NUM_OF_TEST_PTHREADS];
unsigned long long tot_time[NUM_OF_TEST_PTHREADS];
unsigned long long _tot_time[NUM_OF_TEST_PTHREADS];
unsigned long long run_cnt[NUM_OF_TEST_PTHREADS];
unsigned long long cnt;
pthread_spinlock_t lock;
/**********************************/
/*Proto*/
static void configureMemoryBehavior(void);
void create_rt_pthread(unsigned int thread_no);
/*
* Date............:
* Function........: main
* Description.....:
*/
int main(void)
{
cpu_set_t mask;
int i;
for (i = 0; i < NUM_OF_TEST_PTHREADS; ++i)
{
max_time[i] = 0;
tot_time[i] = 0;
run_cnt[i] = 0;
_max_time[i] = 0;
_tot_time[i] = 0;
}
cnt = 0;
printf("\nSetting scheduler affinity for the process...");
CPU_ZERO(&mask);
CPU_SET(MAIN_THREAD_CORE_INDEX, &mask);
sched_setaffinity(0, sizeof(mask), &mask);
printf("done.\n");
configureMemoryBehavior();
pthread_spin_init(&lock, PTHREAD_PROCESS_PRIVATE);
for (i = 0; i < NUM_OF_TEST_PTHREADS; ++i)
{
create_rt_pthread(i);
}
printf("Waiting threads to join\n...\n");
for (i = 0; i < NUM_OF_TEST_PTHREADS; i++)
{
pthread_join(threads[i], NULL);
#if(TIME_RDTSC == 1)
printf("Thread no: %d, Max Time: %llu, Avg Time: %f, Run Cnt: %llu\n", i, max_time[i], (float)((float)tot_time[i] / run_cnt[i]), run_cnt[i]);
#endif
#if(TIME_CHRONO == 1)
printf("Thread no: %d, Max Time: %lu, Avg Time: %f, Run Cnt: %lu\n", i, _max_time[i], (float)((float)_tot_time[i] / run_cnt[i]), run_cnt[i]);
#endif
}
printf("All threads joined\n");
printf("Shared Cnt: %llu\n", cnt);
return 0;
}
/*
* Date............:
* Function........: thread_func
* Description.....:
*/
void *thread_func(void *argv)
{
unsigned long long i, start_time, stop_time, latency = 0;
unsigned int thread_no;
thread_no = ((struct param_list_s *)argv)->thread_no;
i = 0;
while (1)
{
#if(TIME_RDTSC == 1)
pthread_spin_lock(&lock);
start_time = __rdtsc();
++cnt;
stop_time = __rdtsc();
pthread_spin_unlock(&lock);
if (stop_time > start_time)
{
latency = stop_time - start_time;
++run_cnt[thread_no];
tot_time[thread_no] += latency;
if (latency > max_time[thread_no])
max_time[thread_no] = latency;
}
#endif
#if(TIME_CHRONO == 1)
pthread_spin_lock(&lock);
auto _start_time = std::chrono::high_resolution_clock::now();
++cnt;
auto _stop_time = std::chrono::high_resolution_clock::now();
pthread_spin_unlock(&lock);
auto __start_time = std::chrono::duration_cast<std::chrono::nanoseconds>(_start_time.time_since_epoch()).count();
auto __stop_time = std::chrono::duration_cast<std::chrono::nanoseconds>(_stop_time.time_since_epoch()).count();
auto __latency = __stop_time - __start_time;
if (__stop_time > __start_time)
{
_tot_time[thread_no] += __latency;
++run_cnt[thread_no];
if (__latency > _max_time[thread_no])
{
_max_time[thread_no] = __latency;
}
}
#endif
if (++i >= TEST_PTHREAD_RUN_CNT)
break;
}
return 0;
}
/*
* Date............:
* Function........: create_rt_pthread
* Description.....:
*/
void create_rt_pthread(unsigned int thread_no)
{
struct sched_param param;
pthread_attr_t attr;
printf("Creating a new real-time thread\n");
/* Initialize pthread attributes (default values) */
pthread_attr_init(&attr);
/* Set a specific stack size */
pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN);
/* Set scheduler policy and priority of pthread */
pthread_attr_setschedpolicy(&attr, TEST_PTHREAD_POLICY);
param.sched_priority = TEST_PTHREAD_PRIO;
pthread_attr_setschedparam(&attr, ¶m);
/* Set the processor affinity*/
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(1, &cpuset);
pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
/* Use scheduling parameters of attr */
pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
param_list[thread_no].thread_no = thread_no;
if(pthread_create(&threads[thread_no], &attr, thread_func, (void *)¶m_list[thread_no]) != 0)
{
printf("Thread could not be created.\n");
exit(-1);
}
}
/*
* Date............:
* Function........: configureMemoryBehavior
* Description.....:
*/
static void configureMemoryBehavior(void)
{
printf("\nLocking memory...");
/* Now lock all current and future pages
from preventing of being paged */
if (mlockall(MCL_CURRENT | MCL_FUTURE))
perror("mlockall failed:");
/* Turn off malloc trimming.*/
mallopt(M_TRIM_THRESHOLD, -1);
/* Turn off mmap usage. */
mallopt(M_MMAP_MAX, 0);
printf("done.\n");
}