Ubuntu 14.04.4 LTS,
I use gcc,
my makefile is as follow
ifeq ($(RTE_SDK),)
$(error "Please define RTE_SDK environment variable")
endif
# Default target, can be overriden by command line or environment
RTE_TARGET ?= x86_64-native-linuxapp-gcc
include $(RTE_SDK)/mk/rte.vars.mk
# binary name
APP = Mahdi_test
INC += $(wildcard include/*.h)
# all source are stored in SRCS-y
SRCS-y := main.c
CFLAGS += $(WERROR_FLAGS) -I -S$(SRCDIR)/include -I/usr/local/include
# Most optimizations are only enabled if an -O level is set on the command line,
# otherwise they are disabled, even if individual optimization flags are specified.
# With -O, the compiler tries to reduce code size and execution time,
# without performing any optimizations that take a great deal of compilation time.
# -O3 Optimize yet more. -O3 turns on all optimizations specified by -O2
# EXTRA_CFLAGS += -O3 -S -Wno-error -std=c99
# After following line do make, go to ./build and run : objdump -d -M intel -S main.o >a.txt
EXTRA_CFLAGS += -O3 -g -Wno-error -std=c99
# rte.extapp.mk : External application
include $(RTE_SDK)/mk/rte.extapp.mk
CPU :
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 8
On-line CPU(s) list: 0-7
Thread(s) per core: 2
Core(s) per socket: 4
Socket(s): 1
NUMA node(s): 1
Vendor ID: GenuineIntel
CPU family: 6
Model: 42
Stepping: 7
CPU MHz: 1600.000
BogoMIPS: 6784.24
Virtualization: VT-x
L1d cache: 32K
L1i cache: 32K
L2 cache: 256K
L3 cache: 8192K
NUMA node0 CPU(s): 0-7
All code is in single file (I am using dpdk in order to use the benefits of this library),
#if __STDC_VERSION__ >= 199901L
#define _XOPEN_SOURCE 600
#else
#define _XOPEN_SOURCE 500
#endif /* __STDC_VERSION__ */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <rte_memory.h>
#include <rte_malloc.h>
#include <string.h>
#include <time.h>
#include <pthread.h>
#include <rte_ring.h>
#include <math.h>
#include <stdbool.h>
#include <sys/types.h>
#define EXCEL_OUTPUT
#ifndef EXCEL_OUTPUT
#define DIRECT_FILE_WRITE
#endif
#define CORE_MAX 3
#define BLOCK_MAX 20 // BKMG = 4, ~ 168.72 MB
#define COUNTERS_MAX 10000
#define ITERATION_MAX 100
#define Factor 1.5
#define BKMG 4
char* testNumber = "23";
/*
uint32_t sizes[BLOCK_MAX] = {
1*Factor*pow(2, 10)/4, 2*Factor*pow(2, 10)/4, 4*Factor*pow(2, 10)/4, 8*Factor*pow(2, 10)/4, 16*Factor*pow(2, 10)/4, 32*Factor*pow(2, 10)/4, 64*Factor*pow(2, 10)/4, 128*Factor*pow(2, 10)/4, 256*Factor*pow(2, 10)/4, 512*Factor*pow(2, 10)/4,
1*Factor*pow(2, 20)/4, 2*Factor*pow(2, 20)/4, 4*Factor*pow(2, 20)/4, 8*Factor*pow(2, 20)/4, 16*Factor*pow(2, 20)/4, 32*Factor*pow(2, 20)/4, 64*Factor*pow(2, 20)/4, 128*Factor*pow(2, 20)/4, 256*Factor*pow(2, 20)/4, 512*Factor*pow(2, 20)/4,
1*Factor*pow(2, 30)/4, 2*Factor*pow(2, 30)/4
};
*/
uint32_t sizes[BLOCK_MAX] = {
pow(Factor, 1)*pow(2, BKMG)/4, pow(Factor, 2)*pow(2, BKMG)/4, pow(Factor, 3)*pow(2, BKMG)/4, pow(Factor, 4)*pow(2, BKMG)/4, pow(Factor, 5)*pow(2, BKMG)/4, pow(Factor, 6)*pow(2, BKMG)/4, pow(Factor, 7)*pow(2, BKMG)/4, pow(Factor, 8)*pow(2, BKMG)/4, pow(Factor, 9)*pow(2, BKMG)/4, pow(Factor,10)*pow(2, BKMG)/4,
pow(Factor,11)*pow(2, BKMG)/4, pow(Factor,12)*pow(2, BKMG)/4, pow(Factor,13)*pow(2, BKMG)/4, pow(Factor,14)*pow(2, BKMG)/4, pow(Factor,15)*pow(2, BKMG)/4, pow(Factor,16)*pow(2, BKMG)/4, pow(Factor,17)*pow(2, BKMG)/4, pow(Factor,18)*pow(2, BKMG)/4, pow(Factor,19)*pow(2, BKMG)/4, pow(Factor,20)*pow(2, BKMG)/4,
pow(Factor,21)*pow(2, BKMG)/4, pow(Factor,22)*pow(2, BKMG)/4, pow(Factor,23)*pow(2, BKMG)/4, pow(Factor,24)*pow(2, BKMG)/4, pow(Factor,25)*pow(2, BKMG)/4, pow(Factor,26)*pow(2, BKMG)/4, pow(Factor,27)*pow(2, BKMG)/4, pow(Factor,28)*pow(2, BKMG)/4, pow(Factor,29)*pow(2, BKMG)/4, pow(Factor,30)*pow(2, BKMG)/4,
pow(Factor,31)*pow(2, BKMG)/4, pow(Factor,32)*pow(2, BKMG)/4, pow(Factor,33)*pow(2, BKMG)/4, pow(Factor,34)*pow(2, BKMG)/4, pow(Factor,35)*pow(2, BKMG)/4, pow(Factor,36)*pow(2, BKMG)/4, pow(Factor,37)*pow(2, BKMG)/4, pow(Factor,38)*pow(2, BKMG)/4, pow(Factor,39)*pow(2, BKMG)/4, pow(Factor,40)*pow(2, BKMG)/4,
pow(Factor,41)*pow(2, BKMG)/4, pow(Factor,42)*pow(2, BKMG)/4, pow(Factor,43)*pow(2, BKMG)/4, pow(Factor,44)*pow(2, BKMG)/4, pow(Factor,45)*pow(2, BKMG)/4, pow(Factor,46)*pow(2, BKMG)/4, pow(Factor,47)*pow(2, BKMG)/4, pow(Factor,48)*pow(2, BKMG)/4, pow(Factor,49)*pow(2, BKMG)/4, pow(Factor,50)*pow(2, BKMG)/4,
};
/*
char* names[BLOCK_MAX] = {
"1K", "2K", "4K", "8K", "16K", "32K", "64K", "128K", "256K", "512K",
"1M", "2M", "4M", "8M", "16M", "32M", "64M", "128M", "256M", "512M",
"1G", "2G"
};
*/
char* names[BLOCK_MAX] = {
"01", "02", "03", "04", "05", "06", "07", "08", "09", "10",
"11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
"21", "22", "23", "24", "25", "26", "27", "28", "29", "30",
"31", "32", "33", "34", "35", "36", "37", "38", "39", "40",
"41", "42", "43", "44", "45", "46", "47", "48", "49", "50",
};
// This struct keeps the inoput parameter for each single core (for 3 cores we have 3 of this struct)
struct lcore_params
{
struct data* valueMem; // This pointer is the address of one sample of data struct which include the address of memorty related to core and the size of that
int iteration; // This keeos the number of main iteratiopn, which block of memory now is processing
FILE* fp; // This keeps the handler address of opened file for related core, which via that we could write in mentioned file
int index; // This keeps the number of core, here we don't use it anymore
};
// Keeps the information regarding the memory which allocates to cores
struct data
{
uint32_t* value; // This keeps the memory address. This memory is allocated independent for each specific core
uint32_t count; // The variable 'count' shows the number of 32-bits taken memory.
};
struct tableEntry
{
int expectedVal;
double processTime;
bool allOk;
};
// This thread variavbles is using for coordination btw cores in order to prevent them interfereing each other while checking readWaitHandle and newIterWaitHandle
pthread_mutex_t mutexLock_;
// All slave cores wait here till the signal issues(via pthread_cond_signal(&newIterWaitHandle)) from master core in order to start new memory block
// Conversely going through newIterWaitHandle goes up here which master core wait till all slave finish their tasks
pthread_cond_t readWaitHandle, newIterWaitHandle;
bool canContinue_ = true;
int processedCount = 0;
#ifdef EXCEL_OUTPUT
//holds all outputs. we save them at the end of work
struct tableEntry outputTable[CORE_MAX][BLOCK_MAX][ITERATION_MAX];
#endif
// The Function which each core should do, now is counter (cnt = cnt + 1)
static int
lcore_recv(struct lcore_params *p)
{
unsigned lcore_id = rte_lcore_id();
printf("Starting core %u\n", lcore_id);
#ifndef EXCEL_OUTPUT
#ifndef DIRECT_FILE_WRITE
struct tableEntry outputTable[ITERATION_MAX];
#endif
#endif
while(canContinue_)
{
//printf("Starting core %u\n", lcore_id);
//int index=((lcore_id-p->baseIndex)-1+CORE_MAX)%CORE_MAX;
void * vp;
struct data * d = p->valueMem;
FILE* fp = p->fp;
//fprintf(fp, "Iteration %d ----------------------\n", p->iteration);
//int index = p->index;
struct timespec t1, t2;
for(int q = 0; q < ITERATION_MAX; q++)
{
double processTime = 0;
// TEST TEST ON
clock_gettime(1, &t1);
for(uint32_t p = 0; p <= COUNTERS_MAX - 1; p++)
{
for (int i = 0; i < d->count; i++)
{
d->value[i]++;
}
}
clock_gettime(1, &t2);
processTime = (t2.tv_sec*1e9 + t2.tv_nsec) - (t1.tv_sec*1e9 + t1.tv_nsec);/* nanoseconds */
// TEST TEST OFF
//Checks last value of each counter
int expectedVal = (q + 1) * COUNTERS_MAX;
#ifndef EXCEL_OUTPUT
#ifdef DIRECT_FILE_WRITE
fprintf(fp," Expected : %d\n", expectedVal);
#endif
#endif
bool allOk = true;
for (int i = 0; i < d->count; i++)
{
if(d->value[i]!=expectedVal)
{
if(allOk)
{
allOk = false;
#ifndef EXCEL_OUTPUT
#ifdef DIRECT_FILE_WRITE
fprintf(fp," Failed : ");
#endif
#endif
}
#ifndef EXCEL_OUTPUT
#ifdef DIRECT_FILE_WRITE
fprintf(fp,"%d ", i);
#endif
#endif
}
}
#ifdef EXCEL_OUTPUT
struct tableEntry* entry= &outputTable[p->index][p->iteration][q];
entry->allOk=allOk;
entry->expectedVal=expectedVal;
entry->processTime=processTime;
#else
#ifdef DIRECT_FILE_WRITE
if(allOk)
{
fprintf(fp,"All counters are ok \n");
}
else
{
fprintf(fp,"\n");
}
fprintf(fp, "*** Time = %f ns \n", processTime);
#else
struct tableEntry* entry= &outputTable[q];
entry->allOk=allOk;
entry->expectedVal=expectedVal;
entry->processTime=processTime;
#endif
#endif
}
#ifndef EXCEL_OUTPUT
#ifndef DIRECT_FILE_WRITE
for(int q = 0; q < ITERATION_MAX; q++)
{
struct tableEntry* entry= &outputTable[q];
fprintf(fp," Expected : %d\n", entry->expectedVal);
if(entry->allOk)
{
fprintf(fp,"All counters are ok \n");
}
else
{
fprintf(fp,"Failed \n");
}
fprintf(fp, "*** Time = %f ns \n", entry->processTime);
}
#endif
#endif
pthread_mutex_lock(&mutexLock_);
processedCount++;
pthread_cond_signal(&readWaitHandle);
pthread_cond_wait(&newIterWaitHandle, &mutexLock_);
pthread_mutex_unlock(&mutexLock_);
}
return 0;
}
// mem_alloc is used in order to release the allocated memory and resize the new memory with new size for it. This function is called for each separate core
static void
mem_alloc(struct data* valueMem, uint32_t newSize, uint32_t iteration)
{
valueMem->count = newSize;
if(valueMem->value)
{
rte_free(valueMem->value);
}
valueMem->value = (uint32_t *)rte_zmalloc(NULL, sizeof(uint32_t) * newSize, 0);
if(!valueMem->value)
{
printf("Memory Fail\n");
}
}
#ifdef EXCEL_OUTPUT
void saveToExcelFile()
{
char name[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
strcat(name, "output");
strcat(name, testNumber);
strcat(name, ".xml");
FILE* fp = fopen(name, "w");
// some setting of excel and xml file
fprintf(fp,"<?xml version=\"1.0\"?>\n\
<?mso-application progid=\"Excel.Sheet\"?>\n\
<Workbook xmlns=\"urn:schemas-microsoft-com:office:spreadsheet\"\n\
xmlns:o=\"urn:schemas-microsoft-com:office:office\"\n\
xmlns:x=\"urn:schemas-microsoft-com:office:excel\"\n\
xmlns:ss=\"urn:schemas-microsoft-com:office:spreadsheet\"\n\
xmlns:html=\"http://www.w3.org/TR/REC-html40\">\n\
<DocumentProperties xmlns=\"urn:schemas-microsoft-com:office:office\">\n\
<Author>m</Author>\n\
<LastAuthor>m</LastAuthor>\n\
<Created>2016-06-11T13:00:49Z</Created>\n\
<LastSaved>2016-06-11T13:01:30Z</LastSaved>\n\
<Version>15.00</Version>\n\
</DocumentProperties>\n\
<OfficeDocumentSettings xmlns=\"urn:schemas-microsoft-com:office:office\">\n\
<AllowPNG/>\n\
</OfficeDocumentSettings>\n\
<ExcelWorkbook xmlns=\"urn:schemas-microsoft-com:office:excel\">\n\
<WindowHeight>7755</WindowHeight>\n\
<WindowWidth>20490</WindowWidth>\n\
<WindowTopX>0</WindowTopX>\n\
<WindowTopY>0</WindowTopY>\n\
<ActiveSheet>0</ActiveSheet>\n\
<ProtectStructure>False</ProtectStructure>\n\
<ProtectWindows>False</ProtectWindows>\n\
</ExcelWorkbook>\n\
<Styles>\n\
<Style ss:ID=\"Default\" ss:Name=\"Normal\">\n\
<Alignment ss:Vertical=\"Bottom\"/>\n\
<Borders/>\n\
<Font ss:FontName=\"Calibri\" x:Family=\"Swiss\" ss:Size=\"11\" ss:Color=\"#000000\"/>\n\
<Interior/>\n\
<NumberFormat/>\n\
<Protection/>\n\
</Style>\n\
<Style ss:ID=\"s62\">\n\
<Font ss:FontName=\"Calibri\" x:Family=\"Swiss\" ss:Size=\"11\" ss:Color=\"#FF0000\"\n\
ss:Bold=\"1\"/>\n\
</Style>\n\
</Styles>\n");
for(int i=0; i < CORE_MAX; i++)
{
// starts a worksheet
fprintf(fp,"<Worksheet ss:Name=\"Sheet%d\">\n\
<Table ss:ExpandedColumnCount=\"%d\" ss:ExpandedRowCount=\"%d\" x:FullColumns=\"1\"\n\
x:FullRows=\"1\" ss:DefaultRowHeight=\"15\">\n", i + 1, BLOCK_MAX + 1, ITERATION_MAX + 4);
fprintf(fp, "<Column ss:Width=\"95.25\"/>\n");
fprintf(fp,"<Row ss:StyleID=\"s62\">\n");
for(int q=0; q < BLOCK_MAX; q++)
{
char s[10];
float f = (float)(pow(Factor,q+1)*pow(2.0, BKMG));
sprintf(s,"%0.3f", f);
if(q == 0)
{
fprintf(fp,"<Cell ss:Index=\"2\"><Data ss:Type=\"Number\">%s</Data></Cell>\n", s);
}
else
{
fprintf(fp,"<Cell><Data ss:Type=\"Number\">%s</Data></Cell>\n", s);
}
}
fprintf(fp,"</Row>\n");
for(int j = 0; j < ITERATION_MAX; j++)
{
fprintf(fp,"<Row>\n");
for(int q = 0; q < BLOCK_MAX; q++)
{
if(q == 0)
{
fprintf(fp,"<Cell ss:Index=\"2\"><Data ss:Type=\"Number\">%f</Data></Cell>\n", outputTable[i][q][j].processTime);
}
else
{
fprintf(fp,"<Cell><Data ss:Type=\"Number\">%f</Data></Cell>\n", outputTable[i][q][j].processTime);
}
}
fprintf(fp,"</Row>\n");
}
fprintf(fp,"<Row>\n");
fprintf(fp,"<Cell ss:StyleID=\"s62\"><Data ss:Type=\"String\">Mean</Data></Cell>\n");
for(int q = 0; q < BLOCK_MAX; q++)
{
fprintf(fp," <Cell ss:Formula=\"=AVERAGE(R[%d]C:R[-1]C)\"><Data ss:Type=\"Number\">0</Data></Cell>\n", -ITERATION_MAX);
}
fprintf(fp,"</Row>\n");
fprintf(fp,"<Row>\n");
fprintf(fp,"<Cell ss:StyleID=\"s62\"><Data ss:Type=\"String\">Standard Deviation</Data></Cell>\n");
for(int q=0; q<BLOCK_MAX; q++)
{
fprintf(fp," <Cell ss:Formula=\"=STDEV(R[%d]C:R[-1]C)\"><Data ss:Type=\"Number\">0</Data></Cell>\n", -(ITERATION_MAX + 1));
}
fprintf(fp,"</Row>\n");
fprintf(fp,"<Row>\n");
fprintf(fp,"<Cell ss:StyleID=\"s62\"><Data ss:Type=\"String\">Add Latency</Data></Cell>\n");
for(int q=0; q<BLOCK_MAX; q++)
{
fprintf(fp," <Cell ss:Formula=\"=R[-2]C/(2^4/4)/%d/%f^%d\"><Data ss:Type=\"Number\">0</Data></Cell>\n",COUNTERS_MAX, Factor, q + 1);
}
fprintf(fp,"</Row>\n");
//end of worksheet
fprintf(fp,"</Table>\n</Worksheet>\n");
}
//end of file
fprintf(fp,"</Workbook>");
fclose(fp);
}
#endif
int
main(int argc, char **argv)
{
mkdir("./Resaults", 0777);
int ret;
unsigned lcore_id;
pthread_attr_t attr;
pthread_mutex_init(&mutexLock_, NULL);
pthread_cond_init(&newIterWaitHandle, NULL);
pthread_cond_init(&readWaitHandle, NULL);
ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Cannot init EAL\n");
struct lcore_params params[CORE_MAX];
char numT[5];
sprintf(numT, "%d", CORE_MAX);
for(int i = 0; i < CORE_MAX; i++)
{
// Generates some structures to hold information of assinged job of each core
struct data* commonMem = (struct data*)rte_malloc(NULL, sizeof(struct data), 0);
#ifndef EXCEL_OUTPUT
char num[5];
sprintf(num, "%d", i);
char name3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
strcat(name3, "./Resaults/");
strcat(name3, testNumber);
mkdir(name3, 0777);
strcat(name3, "/R");
strcat(name3, num);
strcat(name3, "_");
strcat(name3, numT);
strcat(name3, "Core");
mkdir(name3, 0777);
char name2[] = {'/','R', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
strcat(name2, num);
strcat(name2, "_");
strcat(name2, names[0]);
strcat(name2, ".txt");
strcat(name3, name2);
params[i].fp = fopen(name3, "w");
#endif
mem_alloc(commonMem, sizes[0], 0);
params[i].valueMem = commonMem;
params[i].index = i;
params[i].iteration = 0;
commonMem->value[i] = NULL;
}
/*
printf("sleep ...\n");
for(int f=0;f<4; f++)
{
sleep(1);
}
*/
/*
double p=0;
for(double f=0;f<1e9; f+=0.3)
{
p+=0.1;
}*/
printf("Starting lcores ...\n");
printf("RTE_MAX_LCORE = %d\n", RTE_MAX_LCORE);
lcore_id = rte_get_next_lcore(-1, 1, 0);
processedCount = 0;
// Ask each core do the funtion lcore_recv
for(int i = 0; i < CORE_MAX; i++)
{
rte_eal_remote_launch((lcore_function_t*)lcore_recv, ¶ms[i], lcore_id);
lcore_id = rte_get_next_lcore(lcore_id, 0, 1);
}
// For each core do the function for "BLOCK_MAX" times
for(int j = 1; j <= BLOCK_MAX; j++)
{
printf("Iteration : %d\n", j);
pthread_mutex_lock(&mutexLock_);
while(processedCount < CORE_MAX)
{
pthread_cond_wait(&readWaitHandle, &mutexLock_);
}
for(int i = 0; i < CORE_MAX; i++)
{
#ifndef EXCEL_OUTPUT
fclose(params[i].fp);
if(j < BLOCK_MAX)
{
char num[5];
sprintf(num, "%d", i);
char name3[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
strcat(name3, "./Resaults/");
strcat(name3, testNumber);
mkdir(name3, 0777);
strcat(name3, "/R");
strcat(name3, num);
strcat(name3, "_");
strcat(name3, numT);
strcat(name3, "Core");
mem_alloc( params[i].valueMem, sizes[j], j);
char name2[] = {'/','R', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
strcat(name2, num);
strcat(name2, "_");
strcat(name2, names[j]);
strcat(name2, ".txt");
strcat(name3, name2);
params[i].fp = fopen(name3,"w");
params[i].iteration = j;
}
#else
mem_alloc( params[i].valueMem, sizes[j], j);
params[i].iteration = j;
#endif
}
if(j < BLOCK_MAX)
{
printf("%d : New Data Added ----------\n", j);
}
else
{
canContinue_ = false;
}
//Signal cores in order to start new iteration
processedCount = 0;
for(int i = 0; i < CORE_MAX; i++)
{
pthread_cond_signal(&newIterWaitHandle);
}
pthread_mutex_unlock(&mutexLock_);
}
printf("Waiting for lcores to finish ...\n");
#ifdef EXCEL_OUTPUT
saveToExcelFile();
#endif
rte_eal_mp_wait_lcore();
return 0;
}
and I run the source run.sh with this command line
!/bin/sh
./build/app/Mahdi_test -c 0x55 --master-lcore 0