0

I have the following convolution algorithm:

public class Spatializer : MonoBehaviour 
{   
    const int MAX_TAPS = 128;
    static float[,,] IRs = null;

    // http://stackoverflow.com/questions/7237907/1d-fast-convolution-without-fft
    public static float[] Spatialize( float[] srcMono, int toneme, bool loop )
    {
        if( IRs == null )
            LoadIRs( );

        int inSamps = srcMono.Length;
        int outSamps = inSamps + ( loop ? 0 : MAX_TAPS-1 );

        float [] L = new float[ outSamps ];
        float [] R = new float[ outSamps ];

        int i,j,k;
        float x_i;
        for ( i = 0; i < inSamps; i++)
        {
            x_i = srcMono[ i ];
            for ( j = 0; j < MAX_TAPS; j++)
            {
                k = i + j;
                if( k >= inSamps )
                    if( loop )
                        k %= inSamps;

                L[ k ] += x_i * IRs[ toneme, 0, j ];
                R[ k ] += x_i * IRs[ toneme, 1, j ];
            }
        }

        float[] outputInterleaved = new float[ 2 * outSamps ];
        int outPtr = 0;

        for ( i = 0; i < outSamps; i++)
        {
            outputInterleaved[ outPtr++ ] = L[ i ];
            outputInterleaved[ outPtr++ ] = R[ i ];
        }

        return outputInterleaved;
    }



    static void LoadIRs( )
    {
        IRs = new float[ 12, 2, MAX_TAPS ];

        // !!! get wav from file
        float [] wav = new float[ ... ];
        float step = ...;

        // de-interleave and resample
        for( int toneme = 0; toneme < 12; toneme++ ) 
        {   
            for( int tap=0; tap < MAX_TAPS; tap++ )
            {
                int n = (int)Mathf.RoundToInt( (float)tap * step );

                IRs[ toneme, 0, tap ] = wav[ 2 * n ];
                IRs[ toneme, 1, tap ] = wav[ 2 * n + 1 ];
            }
        }
    }


}

I suspect it would be much faster if I just had to access a 1D array within the convolution loop, as it stands to reason that accessing an element from a one-dimensional array is going to involve fewer cycles than accessing an element from a three-dimensional array.

        // extract array we want OUTSIDE loop
        float [] IR_L = IRs[ toneme, 0 ];  // BAD SYNTAX <-- how to do this?
        float [] IR_R = IRs[ toneme, 1 ];  // BAD SYNTAX <-- how to do this?

        for ( i = 0; i < inSamps; i++)
        {
            x_i = srcMono[ i ];
            for ( j = 0; j < MAX_TAPS; j++)
            {
                k = i + j;
                if( k >= inSamps )
                    if( loop )
                        k %= inSamps;

                L[ k ] += x_i * IR_L[ j ];
                R[ k ] += x_i * IR_R[ j ];
            }
        }

The above is pseudocode, as I don't know how to go about implementing it, or even whether it is possible.

So my question is: can I extract

float [] IR_L = IRs[ toneme, 0 ]; // BAD SYNTAX <-- how to do this?

so that instead of writing

IRs[ toneme, 0, j ] 

within the inner loop, I could just write

IR_L[ j ]
P i
  • 29,020
  • 36
  • 159
  • 267
  • 1
    I have edited your title. Please see, "[Should questions include “tags” in their titles?](http://meta.stackexchange.com/questions/19190/)", where the consensus is "no, they should not". – John Saunders Oct 20 '13 at 01:41
  • I would think you'd optimize your loop the same way you'd optimize anything else: find out what parts are slow, then speed them up. Just looking at the code won't help you much: after making a change, how will you know if you've improved things? How will you know you haven't broken the code? – John Saunders Oct 20 '13 at 01:42
  • Sorry, I have reworded the question so as to make it precise. – P i Oct 20 '13 at 13:27
  • 1
    @John Saunders, a loop, especially dealing with arrays may be a subject to optimization: http://stackoverflow.com/questions/11227809/why-is-processing-a-sorted-array-faster-than-an-unsorted-array – Basilevs Oct 20 '13 at 13:38
  • @Basilevs: I said nothing to indicate that the loop was not subject to optimization. I simply recommend that any optimizations be based on _proof_ and that adequate unit test coverage should be in place, so that it can be proven that the optimizations have not broken the code. – John Saunders Oct 20 '13 at 16:24

1 Answers1

0

As a result of looking through that superb link from Basilevs, I have removed the branch from the inner loop.

I found the answer to the array question: the solution is to use a jagged array; [][] rather than [,]. The common knowledge is that [][] performs faster, whereas [,] allows for tidier and more succinct code. In my case I needed [] [].

Here is the final convolution algorithm -- it performs at least four times faster than the original:

const int MAX_TAPS = 128;
static float[,][] IRs = null;

public static float[] Spatialize( float[] srcMono, int toneme, bool loop )
{
    if( IRs == null )
        LoadIRs( );

    int inSamps = srcMono.Length;
    int convSamps = inSamps + MAX_TAPS;

    float [] destL = new float[ convSamps ];
    float [] destR = new float[ convSamps ];

    float [] filtL = IRs[ toneme, 0 ];
    float [] filtR = IRs[ toneme, 1 ];

    int i,j,k;
    float x_i;
    for ( i = 0; i < inSamps; i++)
    {
        x_i = srcMono[ i ];
        k = i;
        for ( j = 0; j < MAX_TAPS; j++, k++ )
        {
            // think: k = i + j;
            destL[ k ] += x_i * filtL[ j ];
            destR[ k ] += x_i * filtR[ j ];
        }
    }

    // circular convolution?
    if( loop ) {
        for ( j = 0; j < MAX_TAPS; j++ ) {
            destL[ j ] += destL[ inSamps + j ];
            destR[ j ] += destR[ inSamps + j ];
        }
    }

    int outSamps = loop ? inSamps : convSamps;

    float[] outputInterleaved = new float[ 2 * outSamps ];
    int outPtr = 0;

    for ( i = 0; i < outSamps; i++)
    {
        outputInterleaved[ outPtr++ ] = destL[ i ];
        outputInterleaved[ outPtr++ ] = destR[ i ];
    }

    return outputInterleaved;
}

static void LoadIRs( )
{
    IRs = new float[ 12, 2] [];

            // !!! fill wav[] from file

    for( int toneme = 0; toneme < 12; toneme++ ) 
    {           
        // de-interleave and resample
        float [] L = new float[ MAX_TAPS ];
        float [] R = new float[ MAX_TAPS ];

        for( int tap=0; tap < MAX_TAPS; tap++ )
        {
            int n = (int)Mathf.RoundToInt( (float)tap * step );

            L[ tap ] = wav[ 2 * n ];
            R[ tap ] = wav[ 2 * n + 1 ];
        }

        IRs[ toneme, 0 ] = L;
        IRs[ toneme, 1 ] = R;
    }
P i
  • 29,020
  • 36
  • 159
  • 267