1

I would like to kill two birds with one stone, as the questions are very similiar:

1:
I followed this code on github Smith Waterman Alignment to create the smith-waterman in C++. After some research I understood that implementing double H[N_a+1][N_b+1]; is not possible (anymore) for the "newer" C++ versions. So to create a constant variable I changed this line to:

double **H = new double*[nReal + 1];
for (int i = 0; i < nReal + 1; i++)
    H[i] = new double[nSynth + 1];

and also the same scheme for int I_i[N_a+1][N_b+1], I_j[N_a+1][N_b+1]; and so one (well, everywhere, where a two dimensional array exists). Now I'm getting the exception:

Unhandled exception at 0x00007FFF7B413C58 in Smith-Waterman.exe: Microsoft C 
++ exception: std :: bad_alloc at location 0x0000008FF4F9FA50.

What is wrong here? Already debugged, and the program throws the exceptions above the for (int i = 0; i < nReal + 1; i++).

2:
This code uses std::strings as parameters. Would it be also possible to create a smith waterman algortihm for cv::Mat?

For maybe more clarification, my full code looks like this:

#include "BinaryAlignment.h"
#include "WallMapping.h"

//using declarations
using namespace cv;
using namespace std;

//global variables
std::string bin;
cv::Mat temp;
std::stringstream sstrMat;

const int maxMismatch = 2;
const float mu = 0.33f;
const float delta = 1.33;
int ind;


BinaryAlignment::BinaryAlignment() { }
BinaryAlignment::~BinaryAlignment() { }

/**
*** Convert matrix to binary sequence
**/
std::string BinaryAlignment::matToBin(cv::Mat src, std::experimental::filesystem::path path) {

    cv::Mat linesMat = WallMapping::wallMapping(src, path);

    for (int i = 0; i < linesMat.size().height; i++) {
        for (int j = 0; j < linesMat.size().width; j++) {
            if (linesMat.at<Vec3b>(i, j)[0] == 0
                && linesMat.at<Vec3b>(i, j)[1] == 0
                && linesMat.at<Vec3b>(i, j)[2] == 255) {
                src.at<int>(i, j) = 1;
            }
            else {
                src.at<int>(i, j) = 0;
            }
            sstrMat << src.at<int>(i, j);
        }
    }
    bin = sstrMat.str();

    return bin;
}


double BinaryAlignment::similarityScore(char a, char b) {
    double result;

    if (a == b)
        result = 1;
    else
        result = -mu;

    return result;
}

double BinaryAlignment::findArrayMax(double array[], int length) {
    double max = array[0];
    ind = 0;

    for (int i = 1; i < length; i++) {
        if (array[i] > max) {
            max = array[i];
            ind = i;
        }
    }
    return max;
}


/**
*** Smith-Waterman alignment for given sequences
**/
int BinaryAlignment::watermanAlign(std::string seqSynth, std::string seqReal, bool viableAlignment) {   
    const int nSynth = seqSynth.length();                                               //length of sequences
    const int nReal = seqReal.length();

    //H[nSynth + 1][nReal + 1]
    double **H = new double*[nReal + 1];
    for (int i = 0; i < nReal + 1; i++)
        H[i] = new double[nSynth + 1];

    cout << "passt";

    for (int m = 0; m <= nSynth; m++)
        for (int n = 0; n <= nReal; n++)
            H[m][n] = 0;

    double temp[4];
    int **Ii = new int*[nReal + 1];
    for (int i = 0; i < nReal + 1; i++)
        Ii[i] = new int[nSynth + 1];

    int **Ij = new int*[nReal + 1];
    for (int i = 0; i < nReal + 1; i++)
        Ij[i] = new int[nSynth + 1];

    for (int i = 1; i <= nSynth; i++) {
        for (int j = 1; j <= nReal; j++) {
            temp[0] = H[i - 1][j - 1] + similarityScore(seqSynth[i - 1], seqReal[j - 1]);
            temp[1] = H[i - 1][j] - delta;
            temp[2] = H[i][j - 1] - delta;
            temp[3] = 0;
            H[i][j] = findArrayMax(temp, 4);

            switch (ind) {
            case 0:                                  // score in (i,j) stems from a match/mismatch
                Ii[i][j] = i - 1;
                Ij[i][j] = j - 1;
                break;
            case 1:                                  // score in (i,j) stems from a deletion in sequence A
                Ii[i][j] = i - 1;
                Ij[i][j] = j;
                break;
            case 2:                                  // score in (i,j) stems from a deletion in sequence B
                Ii[i][j] = i;
                Ij[i][j] = j - 1;
                break;
            case 3:                                  // (i,j) is the beginning of a subsequence
                Ii[i][j] = i;
                Ij[i][j] = j;
                break;
            }
        }
    }

    //Print matrix H to console 
    std::cout << "**********************************************" << std::endl;
    std::cout << "The scoring matrix is given by  " << std::endl << std::endl;
    for (int i = 1; i <= nSynth; i++) {
        for (int j = 1; j <= nReal; j++) {
            std::cout << H[i][j] << " ";
        }
        std::cout << std::endl;
    }

    //search H for the moaximal score
    double Hmax = 0;
    int imax = 0, jmax = 0;

    for (int i = 1; i <= nSynth; i++) {
        for (int j = 1; j <= nReal; j++) {
            if (H[i][j] > Hmax) {
                Hmax = H[i][j];
                imax = i;
                jmax = j;
            }
        }
    }

    std::cout << Hmax << endl;
    std::cout << nSynth << ", " << nReal << ", " << imax << ", " << jmax << std::endl;
    std::cout << "max score: " << Hmax << std::endl;
    std::cout << "alignment index: " << (imax - jmax) << std::endl;

    //Backtracing from Hmax
    int icurrent = imax, jcurrent = jmax;
    int inext = Ii[icurrent][jcurrent];
    int jnext = Ij[icurrent][jcurrent];
    int tick = 0;
    char *consensusSynth = new char[nSynth + nReal + 2];
    char *consensusReal = new char[nSynth + nReal + 2];

    while (((icurrent != inext) || (jcurrent != jnext)) && (jnext >= 0) && (inext >= 0)) {

        if (inext == icurrent)
            consensusSynth[tick] = '-';                         //deletion in A
        else
            consensusSynth[tick] = seqSynth[icurrent - 1];      //match / mismatch in A

        if (jnext == jcurrent)
            consensusReal[tick] = '-';                          //deletion in B
        else
            consensusReal[tick] = seqReal[jcurrent - 1];        //match/mismatch in B

        //fix for adding first character of the alignment.
        if (inext == 0)
            inext = -1;
        else if (jnext == 0)
            jnext = -1;
        else
            icurrent = inext;
        jcurrent = jnext;
        inext = Ii[icurrent][jcurrent];
        jnext = Ij[icurrent][jcurrent];

        tick++;
    }


    // Output of the consensus motif to the console
    std::cout << std::endl << "***********************************************" << std::endl;
    std::cout << "The alignment of the sequences" << std::endl << std::endl;
    for (int i = 0; i < nSynth; i++) {
        std::cout << seqSynth[i];
    };
    std::cout << "  and" << std::endl;
    for (int i = 0; i < nReal; i++) {
        std::cout << seqReal[i];
    };
    std::cout << std::endl << std::endl;
    std::cout << "is for the parameters  mu = " << mu << " and delta = " << delta << " given by" << std::endl << std::endl;
    for (int i = tick - 1; i >= 0; i--)
        std::cout << consensusSynth[i];
    std::cout << std::endl;
    for (int j = tick - 1; j >= 0; j--)
        std::cout << consensusReal[j];
    std::cout << std::endl;


    int numMismatches = 0;
    for (int i = tick - 1; i >= 0; i--) {
        if (consensusSynth[i] != consensusReal[i]) {
            numMismatches++;
        }
    }

    viableAlignment = numMismatches <= maxMismatch;
    return imax - jmax;
}

Thanks!

Viktoria
  • 533
  • 2
  • 7
  • 24
  • 2
    More precisely, variable length arrays are not standard C++ (although some compilers support them as an extension). As for the bad alloc what is the value of `nReal` and `nSynth` when you allocate? – Borgleader Aug 03 '17 at 12:59
  • @Borgleader that's why I implemented the code snippet – Viktoria Aug 03 '17 at 13:01
  • @Borgleader `const int nSynth = seqSynth.length(); const int nReal = seqReal.length();` – Viktoria Aug 03 '17 at 13:02
  • 3
    What I meant was that VLAs were never standardized in C++, its not about "never versions" but rather compiler extensions. Also that doesn't tell me how much memory you're trying to allocate. 2MB? 2GB? Bad alloc means it failed to allocate the memory which means how much you're trying to allocate is potentially relevant. – Borgleader Aug 03 '17 at 13:06
  • @Borgleader oh okay, I just wrote that because in other questions/answers said older versions don't throw any error. Nevertheless I looked up for the memory allocation, it's 7GB. – Viktoria Aug 03 '17 at 13:17
  • 1
    Do note that you can replace all those new and deletes and pointers with `std::vector> name(rows, std::vector(cols));` – NathanOliver Aug 03 '17 at 13:17
  • @Viktoria Are you compiling a 32bit program? because those AFAIK cant go above 2GB. ([Related question](https://stackoverflow.com/questions/639540/how-much-memory-can-a-32-bit-process-access-on-a-64-bit-operating-system)) – Borgleader Aug 03 '17 at 13:19
  • @Borgleader no, I'm compiling a 64bit program – Viktoria Aug 03 '17 at 13:20
  • How much RAM does your machine have? – NathanOliver Aug 03 '17 at 13:21
  • @NathanOliver do you mean instead of `double **H = new double*[nReal + 1]; //and so on` `std::vector H(nReal, std::vector nSynth));` ? – Viktoria Aug 03 '17 at 13:22
  • @NathanOliver 8GB – Viktoria Aug 03 '17 at 13:23
  • As a variant to Nathan's suggestion you can use a 1D vector and index into it [like this](http://coliru.stacked-crooked.com/a/effb9f15ecd6013e) – Borgleader Aug 03 '17 at 13:23
  • `std::vector H(nReal, std::vector(nSynth));` but yeah. This gives you automatic memory management. – NathanOliver Aug 03 '17 at 13:23
  • 1
    If your machine has 8GB and the code needs 7GB then you probably don't have enough RAM to load everything into memory. You need big enough buckets and the OS and other programs are going to be consuming RAM as well. – NathanOliver Aug 03 '17 at 13:25
  • @NathanOliver also here I'll get a exception but this time in `xmemory internal header` (from ). Oh, okay. So I have to reduce the input data.. – Viktoria Aug 03 '17 at 13:29
  • 2
    @Viktoria You'll still get an error because your machine is just not capable of running the code. It requires to much RAM. – NathanOliver Aug 03 '17 at 13:30
  • @NathanOliver great, with a smaller `std::string` it works, thanks! – Viktoria Aug 03 '17 at 13:53

0 Answers0