1

The following MEXed C code simply makes calls to magma to invert a matrix. The stand alone C code (which is also posted) works, but the mex code crashes.

I've triple checked the documentation, verified that other magma functions work as expected, and posted on the Magma forum and was told my code is fine (this post is a cross post from Magma forum). This means that the problem is with mex. I would like to know what is causing the mex code to seg-fault and how to get it to run as expected.

Mexed code:

#include <mex.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <stddef.h>
#include <magma_v2.h>
#include <cuda_runtime.h>

void mat2magma(magmaDoubleComplex* p, double* pr, double* pi,int numElements)
{
    int j=0;
    for(j=0;j<numElements;j++){
        p[j].x=pr[j];
        p[j].y=pi[j];
    }
}

void magma2mat(magmaDoubleComplex* p, double* pr, double* pi,int numElements)
{
    int j=0;
    for(j=0;j<numElements;j++){
        pr[j]= p[j].x;
        pi[j]= p[j].y;
    }
}

/*gateway function*/
void mexFunction( int nlhs, mxArray *plhs[],
        int nrhs, const mxArray *prhs[]) {

    /*initialize magma*/
    magma_init();
    magma_queue_t queue = NULL;
    magma_device_t dev;
    magma_getdevice(&dev);
    magma_queue_create(dev,&queue );

    magma_int_t m,ldwork,info;
    magma_int_t *piv;
    magmaDoubleComplex *a,*da,*dwork;

    /* Matlab -> Host */
    m=mxGetM(prhs[0]);
    piv=(magma_int_t*) malloc(m*sizeof(magma_int_t));
    magma_zmalloc_cpu(&a,m*m);
    mat2magma(a,mxGetPr(prhs[0]),mxGetPi(prhs[0]),m*m);
    ldwork = m*magma_get_zgetri_nb(m);

    /* Host -> GPU */
    magma_zmalloc(&dwork,ldwork);
    magma_zmalloc(&da,m*m);
    magma_zsetmatrix(m,m,a,m,da,m,queue);

    /*LU and Inverse */
    magma_zgetrf_gpu(m,m,da,m,piv,&info);
    magma_zgetri_gpu(m,da,m,piv,dwork,ldwork,&info);

    /*GPU -> Host */
    magma_zgetmatrix(m,m,da,m,a,m,queue);

    /*Host -> Matlab*/
    plhs[0] = mxCreateDoubleMatrix(m,m,mxCOMPLEX);
    magma2mat(a,mxGetPr(plhs[0]),mxGetPi(plhs[0]),m*m);
    free(a);
    free(piv);
    magma_free(dwork);
    magma_free(da);
    magma_queue_destroy(queue);
    magma_finalize();
}

I compliled it with mex CC=gcc LDFLAGS="-lmagma -lcudart -lcublas" magmaZinv.c then from matlab, I ran:

a=magic(3)+magic(3)*1i;
magmaZinv(a)

Standalone C code:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <stddef.h>
#include <magma_v2.h>
#include <cuda_runtime.h>
#include <sys/time.h>
#include <time.h>

/*gateway function*/
int main() {

    /*initialize magma*/
    magma_init();
    magma_queue_t queue = NULL;
    magma_device_t dev;
    magma_getdevice(&dev);
    magma_queue_create(dev,&queue );

    int m,ldwork,info;
    int *piv;
    magmaDoubleComplex *a,*da,*dwork;

    /* allocate and initialize a = magic(3)+magic(3)*1i; */
    m=3;
    piv=(int*) malloc(m*sizeof(int));
    ldwork = m*magma_get_zgetri_nb(m);
    magma_zmalloc_cpu(&a,m*m);
    a[0].x=8;a[0].y=8;
    a[1].x=3;a[1].y=3;
    a[2].x=4;a[2].y=4;
    a[3].x=1;a[3].y=1;
    a[4].x=5;a[4].y=5;
    a[5].x=9;a[5].y=9;
    a[6].x=6;a[6].y=6;
    a[7].x=7;a[7].y=7;
    a[8].x=2;a[8].y=2;

    /* Host -> GPU */
    magma_zmalloc(&dwork,ldwork);
    magma_zmalloc(&da,m*m);
    magma_zsetmatrix(m,m,a,m,da,m,queue);

    /*LU and Inverse */
    magma_zgetrf_gpu(m,m,da,m,piv,&info);
    magma_zgetri_gpu(m,da,m,piv,dwork,ldwork,&info);

    /*GPU -> Host */
    magma_zgetmatrix(m,m,da,m,a,m,queue);

    /* display inv(a) */
    for (int i=0;i<(m*m);i++){
        printf("%f +%fi\n",a[i].x,a[i].y);
    }

    /* free memory */
    free(a);
    free(piv);
    magma_free(dwork);
    magma_free(da);
    magma_queue_destroy(queue);
    magma_finalize();

    return 0;
}

I compiled with: gcc -lmagma -lcudart Ccode.c -o Ccode.o

avgn
  • 982
  • 6
  • 19
  • where does it segfault? – Ander Biguri Aug 15 '18 at 07:53
  • The seg-fault happens on calling magma_zgetrf_gpu. If both magma_zgetrf_gpu and magma_zgetri_gpu calls are commented, the MEXed code runs fine, and simply returning the unchanged input matrix to LHS. – avgn Aug 15 '18 at 14:32
  • Try to print some of the values using `mexPrintf` before those calls. The most likely scenario is some wrong type has been passed to one of those input variables. Print 1 element of them, and see which print gives the segfault (or prints garbage). – Ander Biguri Aug 15 '18 at 14:34
  • I've tried that actually at each stage. Every single argument is correct. In fact, if I don't call those 2 functions, the matrix goes from prhs -> host memory convert from matlab to interleaved-> gpu memory -> host memory convert from interleaved to matlab -> plhs. successfully. I'm really stumped! and I need this code! – avgn Aug 16 '18 at 00:42
  • There must be a difference, otherwise the function would do the same thing in the C and mex versions. Once bruteforce debugging method is to create a function that would save all variables to a text file just before the `magma_zgetrf_gpu` call. Use the exact same function for both versions, and save all the input variables. Then try to see what is wrong. But if all is the same, the same behavior is expected, ultimately, computers are deterministic., – Ander Biguri Aug 16 '18 at 08:44
  • There might be a conflict between CUDA version used by MATLAB and your MEX-file. – Cris Luengo Aug 16 '18 at 22:22

1 Answers1

1

My sys admin has figured out why the standalone C code works while the mexed C code does not. I'll post the reason here incase it is helpful to anyone facing the same issues when using Magma from within Matlab.

  1. The version of Matlab I was using was 2014a. The supported compiler for this version is 4.7.x. I was using a higher version of gcc to compile the code. I've never had a problem with using different versions of GCC with matlab, despite the warning it gives, but for the above code it does matter.

  2. Compile with the MKL_ilp64 flag when using Magma with Matlab to ensure that magma_int_t is int64.

With these two suggestions, Magma can be mexed into matlab with no problems.

avgn
  • 982
  • 6
  • 19