in order to become more confortable with multithreading in the i have programmed a little c programm with "intensive" calculation. It is a picture of the mandelbrot set where each pixel is calculated seperately, then pixels buffered to rows. Each thread is getting an equal share of the total number of rows. thus for example having a picture calculated with a height of 1000 rows should end up in two 500 row packages if the number of threads chosen is two. Thus i have suggested that the speed kind of reduces by the factor of two, but no improvement. WHY??? I dont get it, because everything works and seems logical. If anybody can give me a hint, I would be very grateful. Below you see the main and a function for caluclation of the mandelbrot set called by main.
int main(int argc, char ** argv, char ** envp) {
if(argc != 4)
{
printf("Bitte genau 3 Argumente eingeben.\n");
return 1;
}
//Structs und Variablen für die Stopuhr
struct timeval start, ende;
long ttlende, ttlstart;
width = str2num(argv[1]);
height = str2num(argv[2]);
int y;
//char blueGreenRed[3];
//Ist Buffer für ganze Zeile: Breite * 3 wegen den 3 Bytes pro Pixel
//char zeile[width*3];
unsigned char info[BMPHEADER_SIZE] = {
//size
'B','M', 0,0,0,0, 0,0, 0,0, 54,0,0,0,
//width //height
40,0,0,0, 0,0,0,0, 0,0,0,0, 1,0, 24,0,
// datasize
0,0,0,0, 0,0,0,0
};
// BMP lines must be of lengths divisible by 4
char span[4] = "\0\0\0\0";
int spanBytes = 4 - ((width * 3) % 4);
if (spanBytes == 4) spanBytes = 0;
int psize = ((width * 3) + spanBytes) * height;
*( (int*) &info[2]) = BMPHEADER_SIZE + psize;
*( (int*) &info[18]) = width;
*( (int*) &info[22]) = height;
*( (int*) &info[34]) = psize;
write(1, (char *) info, BMPHEADER_SIZE);
//Stoppuhr starten, d.h. get time stamp
//create chunks
int threads= str2num(argv[3]);
int i;
int reminder = height%threads;
int blocksize = height/threads;
int rounds = height/blocksize;
int begin = 1;
//init structs
threadinfo *tinfoptr = getptr(rounds);
//threadinfo tinfo = *tinfoptr;
for (i=1; i<=rounds; ++i){
int res = blocksize*i;
if((i==rounds)){
res = res+reminder;
}
//update parameters of tinfo
(*(tinfoptr+(i-1))).from = begin;
(*(tinfoptr+(i-1))).to = res;
(*(tinfoptr+(i-1))).span = span;
(*(tinfoptr+(i-1))).spanBytes = spanBytes;
(*(tinfoptr+(i-1))).width = width;
(*(tinfoptr+(i-1))).height = res-begin+1;
(*(tinfoptr+(i-1))).results = NULL;
(*(tinfoptr+(i-1))).threadno = i;
(*(tinfoptr+(i-1))).blocksizeperthread = -1;
//altes ende ist neuer start des nächsten blocks.
begin = res;
}
fprintf(stderr,"inti abgeschlossen, starte threads\n");
pthread_t myThread[rounds];
for (i=1; i<=rounds; ++i){
fprintf(stderr,"Rufe Thread %d auf\n",i);
if (pthread_create(&myThread[i-1], NULL, myDo2, (void*)(tinfoptr+. (i-1))) ) {
fprintf(stderr, "Error creating thread\n");
return 1;
}
}
gettimeofday(&start, NULL);
for (i=1; i<=rounds; ++i){
/* wait for the second thread to finish */
if (pthread_join(myThread[i-1], NULL)) {
fprintf(stderr, "Error joining thread\n");
return 2;
}
}
//Stoppuhr beenden, d.h. get time stamp, NULL per Doku.
gettimeofday(&ende,NULL);
//if the main thread arrives this position, restulptr containts all rows indexed by the threadnr.
for (i=1; i<=rounds; i++){
//noch countereinbauen
int l_blocksize = (tinfoptr+(i-1))->blocksizeperthread;
for (y=0; y <= l_blocksize; y++) {
//Zeilenweise nach stdout schreiben
write(1, (tinfoptr+(i-1))->results[y], width*3);
// BMP lines must be of lengths divisible by 4
write(1, span, spanBytes);
}
}
ttlende = ende.tv_sec * 1000000 + ende.tv_usec;
ttlstart = start.tv_sec * 1000000 + start.tv_usec;
fprintf(stderr, "\nDauer: %ld Mikrosekunden\n", (ttlende - ttlstart));
return 0;
}
And here the function called:
void* myDo2(void* tiptr){
threadinfo* mythread = (threadinfo*)tiptr;
//copy infos from struct to this thread
int l_from = mythread->from;
int l_to = mythread->to;
int l_width = mythread->width;
int l_height = mythread->height;
// char **container = createMatrix(l_width*3,l_height);
char **container = malloc (l_height * sizeof(char*));
for(int i = 0; i<l_height; i++){
container[i] = malloc(l_width*3*sizeof(char));
}
int x,y;
char iterate=0;
Complex c = {0,0};
Complex newz = {0,0};
float imageRelation = (float)l_width/(float)height;
char blueGreenRed[3];
//Ist Buffer für ganze Zeile: Breite * 3 wegen den 3 Bytes pro Pixel
char zeile[l_width*3];
int counter = 0;
for (y=l_from; y <= l_to; ++y)
{
for (x=1; x <= l_width; ++x) {
Complex z = {0,0};
float quad=0;
c.re = zoom * (-1.0 + imageRelation * ( (x-1.0) / (width-1.0)) );
c.im = zoom * ( 0.5 - (y-1.0) / (height-1.0) );
// iterate
for ( iterate=1; iterate < colorLimit && quad < quadLimit; ++iterate ) {
quad = z.re * z.re + z.im * z.im;
newz.re = (z.re * z.re) - (z.im * z.im) + c.re;
newz.im = z.re * z.im * 2.0 + c.im;
z = newz;
}
toRGB(iterate, blueGreenRed);
//Kopiere 3 Bytes von bgr nach zeile + (x-1)*3
//Beachte: Die Variable zeile ist ein character array daher wird (x-1)*3 benutzt um 3 Byte Pakete pro Pixel in die Zeile zu laden.
memcpy((zeile + (x-1)*3), blueGreenRed, 3);
}
memcpy(container[counter], zeile, l_width*3);
counter++;
}
mythread->blocksizeperthread = counter-1;
mythread->results = container;
fprintf(stderr, "Ich bin Thread-Nr. %d\n", mythread->threadno);
fprintf(stderr, "und habe eine Menge Zeilen von %d\n", mythread->blocksizeperthread);
fprintf(stderr, "und habe berechnet von %d\n", l_from);
fprintf(stderr, "und habe berechnet bis %d\n", l_to);
return NULL;
}
Thank you very much, yours jbug