0

I try to write use magma library in matlab, so basically I write a mexfunction which incorporate c code using magma function and then compile this mexfunction into mexa64 file, thus I could use in matlab.

The mexfunction or source c code is below:(called eig_magma)

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <math.h>
#include <cuda_runtime_api.h>
#include <cublas.h>

// includes, project
#include "flops.h"
#include "magma.h"
#include "magma_lapack.h"
#include "testings.h"


#include <math.h>
#include "mex.h"

#define A(i,j)  A[i + j*lda]
extern "C"


void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
    #define L_OUT plhs[0]
    #define A_IN  prhs[0]
    #define S_OUT plhs[1]

    magma_init();
    real_Double_t gpu_perf, gpu_time;
    double *h_A, *h_R, *h_work, *L,*S;
    double *w;
    magma_int_t *iwork;
    magma_int_t N, n2, info, lwork, liwork, lda, aux_iwork[1];
    double aux_work[1];

    magma_vec_t jobz = MagmaVec;
    magma_uplo_t uplo = MagmaLower;

    magma_int_t i,j;

    N      = mxGetM(A_IN);  
    lda    = N;
    n2     = lda*N;

     // query for workspace sizes
        magma_dsyevd( jobz, uplo,
                      N, NULL, lda, NULL,
                      aux_work,  -1,
                      aux_iwork, -1,
                      &info );
        lwork  = (magma_int_t) aux_work[0];
        liwork = aux_iwork[0];


    TESTING_MALLOC_CPU( h_A, double, n2);
        h_A    = (double *)mxGetData(A_IN);
        L_OUT = mxCreateDoubleMatrix(N,N,mxREAL);
        L  = mxGetPr(L_OUT);
    S_OUT = mxCreateDoubleMatrix(N,1,mxREAL);
        S  = mxGetPr(S_OUT);

//print and check
        printf("print out h_A\n");
        for(i=0;i<N;i++)
        {
            for(j=0;j<N;j++)
                    printf("%f\t",h_A[i+j*lda]);
            printf("\n");
            }

    /* Allocate host memory for the matrix */
    TESTING_MALLOC_CPU( w,     double, N     );
    TESTING_MALLOC_CPU( iwork,  magma_int_t, liwork );

    TESTING_MALLOC_PIN( h_R,    double, N*lda  );
        TESTING_MALLOC_PIN( h_work, double, lwork  );   

    printf("allocate memory on cpu with pin!\n");
    printf("copy h_A to h_R, and then print h_R\n");
    memcpy( h_R, h_A, sizeof(double)*n2);
//  lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda,h_R, &lda );
//  print and check   
    for(i=0;i<N;i++)
    {   
        for(j=0;j<N;j++)
        {
           printf("%f\t",h_R[i+j*lda]);
        }
    printf("\n");
    }


    /* Performs operation using MAGA */
    gpu_time = magma_wtime();
    printf("start eig\n");
    magma_dsyevd( jobz, uplo,
                          N, h_R, lda, w,
                          h_work, lwork,
                          iwork, liwork,
                          &info );
        gpu_time = magma_wtime() - gpu_time;
    printf("%d\n",info);
        if (info != 0)
        printf("magma_dsyevd returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));

    printf("convey the output\n");
    memcpy(L,h_R,sizeof(double)*n2);    
    memcpy(S,w,sizeof(double)*N);   

    TESTING_FREE_CPU( w );
    TESTING_FREE_CPU( iwork );
    TESTING_FREE_PIN( h_R );
    TESTING_FREE_PIN( h_work);
    TESTING_FINALIZE();

} 

Because using magma need cuda and magma lib I write the makefile to compile the code into mex file:

# Paths where MAGMA, CUDA, and OpenBLAS are installed
MAGMADIR     = /home/yuxin/magma-1.5.0
CUDADIR      = /usr/local/cuda

MATLABHOME   = /opt/share/MATLAB/R2012b.app/

CC            = icpc
LD            = icpc
CFLAGS        = -Wall
LDFLAGS       = -Wall -mkl=parallel

MEX_CFLAGS     = -fPIC -ansi -pthread -DMX_COMPAT_32 \
               -DMATLAB_MEX_FILE

MAGMA_CFLAGS := -DADD_ -DHAVE_CUBLAS -I$(MAGMADIR)/include -I$(CUDADIR)/include -I$(MAGMADIR)/testing

MAGMA_LIBS   := -L$(MAGMADIR)/lib -L$(CUDADIR)/lib64 \
                -lmagma -lcublas -lcudart

MEXLIBS      := -L/opt/share/MATLAB/R2012b.app/bin/glnxa64 -lmx -lmex -lmat -lm -lstdc++

MEX_INCLU    := -I$(MATLABHOME)/extern/include -I$(MATLABHOME)/simulink/include

MEXFLAGS     := -shared

OBJECT = eig_magma.o
EXECUTABLE = eig_magma

$(EXECUTABLE): $(OBJECT)
    $(CC) $(LDFLAGS) $(MEXFLAGS) $(OBJECT) -o $@ $(MAGMA_LIBS) $(MEXLIBS)
eig_magma.o: eig_magma.c
    $(CC) $(CFLAGS) $(MAGMA_CFLAGS) $(MEX_INCLU) $(MEX_CFLAGS) -c $< -o $@


clean:
    rm -rf eig_magma *.o eig_magma.mexa64

It could be compiled successfully, however when I run the eig_magma in matlab, and when matlab execute to

magma_dsyevd( jobz, uplo, N, h_R, lda, w, h_work, lwork, iwork, liwork, &info );

matlab crashed.......I also have tried to only write the eig_magma in c, without using mex function, it was compiled successfully and works fine.

Anyone has idea how to solve this problem?

Thank you

yuxin

Amro
  • 123,847
  • 25
  • 243
  • 454
yuxin
  • 1
  • 1
  • you should get a crash dump file: http://www.mathworks.com/matlabcentral/answers/100816-how-do-i-locate-the-crash-dump-files-generated-by-matlab. Have you checked it? – Amro Oct 20 '14 at 09:12
  • @Amro I have checked that.....But it is hard for me to interpret it...So what kind of infor I should look for in crash dump file? – yuxin Oct 20 '14 at 10:42
  • If it helps, Parallel Computing Toolbox's `gpuArray` support is based in part on MAGMA, and includes `eig`... – Edric Oct 20 '14 at 11:14
  • @yuxin: first thing to look for is the stack trace to figure out exactly in what library the exception is occurring.. I'm guessing it's an access violation of some kind, most likely because you passed an input that doesnt follow the expected format (lower vs. upper). I suggest you start by simplifying the code and removing unnecessary parts; isolate the call to `magma_dsyevd` by itself, and apply it on some small array with all predetermined sizes (don't take MEX inputs, just hard-code the arrays in the file). That way you make sure the basics are working, eliminating possible bugs elsewhere.. – Amro Oct 20 '14 at 12:35

1 Answers1

1

Let me give an example. Since I never worked with MAGMA before, I'm showing the same code only using regular LAPACK (code adapted from a previous answer I gave). It shouldn't be difficult to convert it to using MAGMA equivalent functions instead.

Note that MATLAB already ships with the BLAS/LAPACK libraries and the necessary header files for you to use from MEX-files. In fact this is the Intel MKL library, so it's a well-optimized implementation.

eig_lapack.cpp

#include "mex.h"
#include "lapack.h"

/*
// the following prototype is defined in "lapack.h" header
extern void dsyevd(char *jobz, char *uplo,
    ptrdiff_t *n, double *a, ptrdiff_t *lda, double *w,
    double *work, ptrdiff_t *lwork, ptrdiff_t *iwork, ptrdiff_t *liwork,
    ptrdiff_t *info);
*/

void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
    // validate input
    if (nrhs != 1 || nlhs > 2)
        mexErrMsgIdAndTxt("mex:error", "Wrong number of arguments.");
    if (!mxIsDouble(prhs[0]) || mxIsComplex(prhs[0]))
        mexErrMsgIdAndTxt("mex:error", "Input is not real double matrix.");
    mwSignedIndex m = mxGetM(prhs[0]), n = mxGetN(prhs[0]);
    if (m != n)
        mexErrMsgIdAndTxt("mex:error", "Input is not square symmetric matrix.");

    // allocate output
    plhs[0] = mxDuplicateArray(prhs[0]);
    plhs[1] = mxCreateDoubleMatrix(1, n, mxREAL);
    double *A = mxGetPr(plhs[0]);
    double *w = mxGetPr(plhs[1]);

    // query and allocate the optimal workspace size
    double workopt = 0;
    mwSignedIndex iworkopt = 0;
    mwSignedIndex lwork = -1, liwork = -1, info = 0;
    dsyevd("Vectors", "Upper", &n, A, &n, w,
        &workopt, &lwork, &iworkopt, &liwork, &info);
    lwork = (mwSignedIndex) workopt;
    liwork = (mwSignedIndex) iworkopt;
    double *work = (double*) mxMalloc(lwork * sizeof(double));
    mwSignedIndex *iwork = (mwSignedIndex*) mxMalloc(liwork * sizeof(mwSignedIndex));

    // compute eigenvectors/eigenvalues
    dsyevd("Vectors", "Upper", &n, A, &n, w,
        work, &lwork, iwork, &liwork, &info);
    mxFree(work);
    mxFree(iwork);

    // check successful computation
    if (info < 0)
        mexErrMsgIdAndTxt("mex:error", "Illegal values in arguments.");
    else if (info > 0)
        mexWarnMsgIdAndTxt("mex:warn", "Algorithm failed to converge.");
}

First we compile the MEX-file (I'm using VS2013 compiler on Windows):

>> mex -largeArrayDims eig_lapack.cpp libmwlapack.lib

Now I compare against the built-in eig function:

>> A = [1 2 3 4 5; 2 2 3 4 5; 3 3 3 4 5; 4 4 4 4 5; 5 5 5 5 5]
A =
     1     2     3     4     5
     2     2     3     4     5
     3     3     3     4     5
     4     4     4     4     5
     5     5     5     5     5

>> [V,D] = eig(A)
V =
    0.6420   -0.5234    0.3778   -0.1982    0.3631
    0.4404    0.1746   -0.6017    0.5176    0.3816
    0.1006    0.6398   -0.0213   -0.6356    0.4196
   -0.2708    0.2518    0.6143    0.5063    0.4791
   -0.5572   -0.4720   -0.3427   -0.1801    0.5629
D =
   -3.1851         0         0         0         0
         0   -0.7499         0         0         0
         0         0   -0.3857         0         0
         0         0         0   -0.2769         0
         0         0         0         0   19.5976

>> [VV,DD] = eig_lapack(A)
VV =
    0.6420   -0.5234    0.3778   -0.1982    0.3631
    0.4404    0.1746   -0.6017    0.5176    0.3816
    0.1006    0.6398   -0.0213   -0.6356    0.4196
   -0.2708    0.2518    0.6143    0.5063    0.4791
   -0.5572   -0.4720   -0.3427   -0.1801    0.5629
DD =
   -3.1851   -0.7499   -0.3857   -0.2769   19.5976

the results are the same (though not guaranteed, seeing that eigenvectors are defined up-to a scale):

>> V-VV, D-diag(DD)
ans =
     0     0     0     0     0
     0     0     0     0     0
     0     0     0     0     0
     0     0     0     0     0
     0     0     0     0     0
ans =
     0     0     0     0     0
     0     0     0     0     0
     0     0     0     0     0
     0     0     0     0     0
     0     0     0     0     0
Community
  • 1
  • 1
Amro
  • 123,847
  • 25
  • 243
  • 454