How to optimize my c code?

Question

I tried to implement C code for Wavelet transform in FPGA (Zynq ZC 702) but the code get stuck and this is because of memory problem so I should optimize my code but I don't know how.

Can anyone please give me some ideas how to do that ?

This is the main of the code

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "wavemin.h"
#include "waveaux.h"
#include "waveaux.c"
#include "wavemin.c"

int main() {
    printf("Hello World1 \n\r");
    wave_object obj;
    wt_object wt;
    float *inp, *out;
    int N, i, J,k;
    float temp[1280] = {};
    char *name = "db4";
    obj = wave_init(name);

    printf("Hello World2 \n\r");
    N = 1280;
    inp = (float*)malloc(sizeof(float) * N);
    out = (float*)malloc(sizeof(float) * N);

    //wmean = mean(temp, N);
    for (i = 0; i < N; ++i) {
        inp[i] = temp[i];

        printf("Hello World3 \n\r");
        //printf("%g \n", inp[i]);
    }

    J = 4; //Decomposition Levels
    wt = wt_init(obj, "dwt", N, J);    // Initialize the wavelet transform object
    printf("Hello World4 \n\r");
    setDWTExtension(wt, "sym");       // Options are "per" and "sym". Symmetric  is the default option
    printf("Hello World5 \n\r");

    setWTConv(wt, "direct");
    printf("Hello World6 \n\r");
    dwt(wt, inp);     // Perform DWT
    printf("Hello World7 \n\r");

    //getDWTAppx(wt, out, wt->length[0]);
    //  printf("Approximation Coefficients Level 1 \n");
    //   for (i = 0; i < wt->length[0]; ++i) {
    // printf("%g ", out[i]);
    //  }
    // printf("\n\n");
    for (k = 1; k <= J; ++k) {
        getDWTDetail(wt, out, wt->length[k], k);
        printf("Detail Coefficients Level %d Length %d \n",
               k, wt - length[k]);
        for (i = 0; i < wt->length[k]; ++i) {
            printf("%g ", out[i]);
        }
        printf("\n\n");
    }
    wt_summary(wt);// Prints the full summary.
    printf("Hello World8 \n\r");
    wave_free(obj);
    wt_free(wt);
    free(inp);
    free(out);

    return 0;
}

The other part of the code where there is the function used in the main function:

#include "wavemin.h"

wave_object wave_init(char *wname) {
    wave_object obj = NULL;
    int retval;
    retval = 0;

    if (wname != NULL) {
        retval = filtlength(wname);
    }

    obj = (wave_object)malloc(sizeof(struct wave_set) + sizeof(float) * 4 * 
                              retval);

    obj->filtlength = retval;
    obj->lpd_len = obj->hpd_len = obj->lpr_len = obj->hpr_len = obj->filtlength;
    strcpy(obj->wname, wname);
    if (wname != NULL) {
        filtcoef(wname, obj->params, obj->params + retval, obj->params + 2 * 
                 retval, obj->params + 3 * retval);
    }
    obj->lpd = &obj->params[0];
    obj->hpd = &obj->params[retval];
    obj->lpr = &obj->params[2 * retval];
    obj->hpr = &obj->params[3 * retval];

    return obj;
}

wt_object wt_init(wave_object wave, char *method, int siglength, int J) {
    int size, i, MaxIter;
    wt_object obj = NULL;

    size = wave->filtlength;

    MaxIter = wmaxiter(siglength, size);

    if (!strcmp(method, "dwt") || !strcmp(method, "DWT")) {
        obj = (wt_object)malloc(sizeof(struct wt_set) + sizeof(float) * 
                                (siglength + 2 * J * (size + 1)));
        obj->outlength = siglength + 2 * J * (size + 1); // Default
        strcpy(obj->ext, "sym"); // Default
    }

    obj->wave = wave;
    obj->siglength = siglength;
    obj->J = J;
    obj->MaxIter = MaxIter;
    strcpy(obj->method, method);

    if (siglength % 2 == 0) {
        obj->even = 1;
    }
    else {
        obj->even = 0;
    }

    strcpy(obj->cmethod, "direct"); // Default
    obj->cfftset = 0;
    obj->lenlength = J + 2;
    obj->output = &obj->params[0];
    if (!strcmp(method, "dwt") || !strcmp(method, "DWT")) {
        for (i = 0; i < siglength + 2 * J * (size + 1); ++i) {
            obj->params[i] = 0.0;
        }
    }
    //wave_summary(obj->wave);

    return obj;
}


static void dwt_sym(wt_object wt, float *inp, int N, float *cA, int len_cA, 
                    float *cD, int len_cD) {
    int i, l, t, len_avg;

    len_avg = wt->wave->lpd_len;

    for (i = 0; i < len_cA; ++i) {
        t = 2 * i + 1;
        cA[i] = 0.0;
        cD[i] = 0.0;
        for (l = 0; l < len_avg; ++l) {
            if ((t - l) >= 0 && (t - l) < N) {
                cA[i] += wt->wave->lpd[l] * inp[t - l];
                cD[i] += wt->wave->hpd[l] * inp[t - l];
                printf("world1 \n\r");
            }
            else if ((t - l) < 0) {
                cA[i] += wt->wave->lpd[l] * inp[-t + l - 1];
                cD[i] += wt->wave->hpd[l] * inp[-t + l - 1];
                printf("world2 \n\r");
            }
            else if ((t - l) >= N) {
                cA[i] += wt->wave->lpd[l] * inp[2 * N - t + l - 1];
                cD[i] += wt->wave->hpd[l] * inp[2 * N - t + l - 1];

                printf("world3 \n\r");
            }
        }
    }
}

void dwt(wt_object wt, float *inp) {
    int i, J, temp_len, iter, N, lp;
    int len_cA;
    float *orig, *orig2;

    temp_len = wt->siglength;
    J = wt->J;
    wt->length[J + 1] = temp_len;
    wt->outlength = 0;
    wt->zpad = 0;
    orig = (float*)malloc(sizeof(float) * temp_len);
    orig2 = (float*)malloc(sizeof(float) * temp_len);

    for (i = 0; i < wt->siglength; ++i) {
        orig[i] = inp[i];
        printf("Hello1 \n\r");
    }

    if (wt->zpad == 1) {
        orig[temp_len - 1] = orig[temp_len - 2];
        printf("Hello2 \n\r");
    }

    N = temp_len;
    lp = wt->wave->lpd_len;

    if (!strcmp(wt->ext, "sym")) {
        //printf("\n YES %s \n", wt->ext);
        i = J;
        while (i > 0) {
            N = N + lp - 2;
            N = (int)ceil((float)N / 2.0);
            wt->length[i] = N;
            wt->outlength += wt->length[i];
            i--;
        }
        wt->length[0] = wt->length[1];
        wt->outlength += wt->length[0];
        N = wt->outlength;
        printf("Hello3 \n\r");

        for (iter = 0; iter < J; ++iter) {
            len_cA = wt->length[J - iter];
            N -= len_cA;
            dwt_sym(wt, orig, temp_len, orig2, len_cA, wt->params + N, len_cA);
            temp_len = wt->length[J - iter];
            printf("Hello4 \n\r");

            if (iter == J - 1) {
                for (i = 0; i < len_cA; ++i) {
                    wt->params[i] = orig2[i];
                    printf("Hello5 \n\r");
                }
            } else {
                for (i = 0; i < len_cA; ++i) {
                    orig[i] = orig2[i];
                    printf("Hello6 \n\r");
                }
            }
        }
    } else {
        printf("Signal extension can be either per or sym");
        exit(-1);
    }

    free(orig);
    free(orig2);
}

void setDWTExtension(wt_object wt, char *extension) {
    if (!strcmp(extension, "sym")) {
        strcpy(wt->ext, "sym");
    } else {
        printf("Signal extension can be either per or sym");
        exit(-1);
    }
}

void setWTConv(wt_object wt, char *cmethod) {
    if (!strcmp(cmethod, "direct")) {
        strcpy(wt->cmethod, "direct");
    }
}

void getDWTDetail(wt_object wt, float *detail, int N, int level) {
    /*
       returns Detail coefficents at the jth level where j = 1,2,.., J
       and Wavelet decomposition is stored as
       [A(J) D(J) D(J-1) ..... D(1)] in wt->output vector
       Use getDWTAppx() to get A(J)
       Level 1 : Length of D(J), ie N, is stored in wt->length[1]
       Level 2 :Length of D(J-1), ie N, is stored in wt->length[2]
       ....
       Level J : Length of D(1), ie N, is stored in wt->length[J]
     */
    int i, iter, J;
    J = wt->J;

    if (level > J) {
        printf("The decomposition only has %d levels", J);
    }

    iter = wt->length[0];

    for (i = 1; i < level; ++i) {
        iter += wt->length[i];
    }

    for (i = 0; i < N; ++i) {
        detail[i] = wt->output[i + iter];
    }
}

void getDWTAppx(wt_object wt, float *appx, int N) {
    /*
       Wavelet decomposition is stored as
       [A(J) D(J) D(J-1) ..... D(1)] in wt->output vector

       Length of A(J) , N = wt->length[0]
     */
    int i;

    for (i = 0; i < N; ++i) {
        appx[i] = wt->output[i];
    }
}

void wt_summary(wt_object wt) {
    int i;
    int J, t;
    J = wt->J;

    printf("Wavelet Coefficients are contained in vector : %s \n", "output");
    printf("\n");
    printf("Approximation Coefficients \n");
    printf("Level %d Access : output[%d] Length : %d \n",
           1, 0, wt->length[0]);
    printf("\n");
    printf("Detail Coefficients \n");
    t = wt->length[0];
    for (i = 0; i < J; ++i) {
        printf("Level %d Access : output[%d] Length : %d \n",
               i + 1, t, wt->length[i + 1]);
        t += wt->length[i + 1];
    }
    printf("\n");

}
void wave_free(wave_object object) {
    free(object);
}

void wt_free(wt_object object) {
    free(object);
}

enter image description here

Maybe the question is better suited for [code review](https://codereview.stackexchange.com/)? — xander, Aug 11 '17 at 09:12
In case your code is working and you are only looking for optimisation hints you are at the wrong place. StackOverflow is for solving specific problems with code. "Too slow" is too broad. Please check https://codereview.stackexchange.com/ — Yunnosch, Aug 11 '17 at 09:13
You found StackOverflow and posted your question. Do the same for the other site. Links are already in the comments above. — Yunnosch, Aug 11 '17 at 09:15
How do you know this is a memory problem? What is the output when you run the program? Have you printed the size used for each `malloc` to see how much memory you allocate? — Support Ukraine, Aug 11 '17 at 09:19
yes actually the code works but i was wondering how to change the function like malloc which reserve a memoy space — meri, Aug 11 '17 at 09:25
@4386427 i run the code with codeblocks and it works well but when i run it with FPGA it got stuck in the middle of the code this is why i add the printf "hello world " to track it where it stops .concerning malloc i dont now how to print the size used for each malloc? — meri, Aug 11 '17 at 09:31
`printf("Hello World1 \n\r");` Do not output the carriage return (`\r`). The C newline `\n` is automatically converted by the C library into the end of line sequence specific to the host system. Just write `printf("Hello World1\n");` — chqrlie, Aug 11 '17 at 09:35
Hiding pointers behind typedefs is a bad idea. It is difficult to read and understand your code without the structure definitions. — chqrlie, Aug 11 '17 at 09:43
'i dont now how to print the size used for each malloc?' umm.. ...OK. malloc() only takes one argument, 'size_t size'. Move those expressions like 'sizeof(struct wave_set) + sizeof(float) * 4 * retval' so as to load a temp var, then printf out the temp var, then malloc the temp var, or breakpoint with your debugger and inspect. This is just debugging 101:( — Martin James, Aug 11 '17 at 09:49
`strcpy(obj->wname, wname); if (wname != NULL) {` ---> Why test for `NULL`-ness _after_ calling `strcpy()`? `strcpy(..., NULL)` is undefined behavior.. — chux - Reinstate Monica, Aug 11 '17 at 10:20
@ chqrlie this is where i take the code from https://github.com/rafat/wavelib/wiki/DWT-Example-Code — meri, Aug 11 '17 at 10:55

score 2 · Answer 1 · answered Aug 11 '17 at 09:16

2

In your code

Always check if malloc has returned non NULL value
Check your stack and heap settings in the linker file as you declare massive local variables and do a lots of mallocs - I suspect the (nomen omen)stack overflow, or failed mallocs.

Is it a bare metal program or you run it under some kind of OS?

answered Aug 11 '17 at 09:16

0___________

60,014
4
34
74

1

I would add: Stop casting the output of the `malloc()`, this is C++ specific, NOT in C. – perror Aug 11 '17 at 09:17
But it does not harm as he includes `stdlib`, and even if compiler is pre C11 it will not cause program crashes. Its only about the style – 0___________ Aug 11 '17 at 09:18
1

Not only about style, it may also mask some errors that could be detected at compile time. Eg, see this [SO question](https://stackoverflow.com/questions/605845/do-i-cast-the-result-of-malloc) (last point of the first answer). – perror Aug 11 '17 at 09:39
@perror read my comment carefully. In this case he **is** including `stdlib`. Other question is that if he did not include it he will get another warning `Implicit declaration of .....`. If someone ignores warnings it does absolutely not matter if he cast or not, as he will ignore all of them :) – 0___________ Aug 11 '17 at 09:48
Well, then this is just me... I jump off my chair each time I see a typecast on a `malloc()` in C. For me, this is just non sense. Sorry for being pedantic. :-) – perror Aug 11 '17 at 09:51
@perror IMO the best sollution is to set your compiler to comply with C99 or C11 standard (for example by using `-std` in the gcc), or force the implicit declaration error if the code has to be compiled under older standard . Not the `cast` is a problem - **implicit declarations are** - causing a lots of UBs in the programs. – 0___________ Aug 11 '17 at 10:04

perror · Answer 2 · 2017-08-11T10:04:01.710

Just for a matter of style and concision, I would rewrite this:

if (siglength % 2 == 0) {
        obj->even = 1;
    }
    else {
        obj->even = 0;
    }

Into the following code:

obj->even = !(siglength % 2);

Or, alternatively:

 obj->even = (siglength % 2) ? 0 : 1;

Also, I think there is room for optimization in this function:

static void dwt_sym(wt_object wt, float *inp, int N, float *cA, int len_cA, 
                    float *cD, int len_cD) {
    int i, l, t, len_avg;

    len_avg = wt->wave->lpd_len;

    for (i = 0; i < len_cA; ++i) {
        t = 2 * i + 1;
        cA[i] = 0.0;
        cD[i] = 0.0;
        for (l = 0; l < len_avg; ++l) {
            if ((t - l) >= 0 && (t - l) < N) {
                cA[i] += wt->wave->lpd[l] * inp[t - l];
                cD[i] += wt->wave->hpd[l] * inp[t - l];
                printf("world1 \n\r");
            }
            else if ((t - l) < 0) {
                cA[i] += wt->wave->lpd[l] * inp[-t + l - 1];
                cD[i] += wt->wave->hpd[l] * inp[-t + l - 1];
                printf("world2 \n\r");
            }
            else if ((t - l) >= N) {
                cA[i] += wt->wave->lpd[l] * inp[2 * N - t + l - 1];
                cD[i] += wt->wave->hpd[l] * inp[2 * N - t + l - 1];

                printf("world3 \n\r");
            }
        }
    }
}

First, you are always referring to t - 1 and never t itself, so why not have:

t = 2 * i;

And, I can guess that a lot of computation can be placed outside of the inner loop... If you want to optimize, there are many good candidate here.

One last word about optimization!

You should first profile your software and see where you spend the most time before thinking about optimization. You cannot optimize "in the air" without knowing where your software does really struggle. Consider using gprof.

PS: You should never ever use the letter l (ell) as a variable... it is way to close from the number 1 (one). Consider changing this is also, it can improve the reading.

How to optimize my c code?

2 Answers2