Dear stackoverflow community,
I am currently working on a MEX function that itself calls external C++ code. I am calling my MEX function in a double loop in matlab (48 x 48 times) creating a similarity matrix. The similarity is calculated by the forementioned MEX function.
/*
* Matlab interface function.
*/
void mexFunction(int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[]) {
if (nrhs < 2) {
//we need exactly two input strings.
mexErrMsgTxt("Two input strings are needed!");
return;
}
if (nrhs > 5) {
mexErrMsgTxt("Maximum number of inputs is 5!");
return;
}
if (nrhs == 3 || nrhs == 4) {
mexErrMsgTxt("You are expected to give all three score vectors: for meta, middle and nodes score.");
return;
}
if (nlhs != 1) {
//and we only give one output argument
mexErrMsgTxt("The fragment distance only provides one output argument!");
return;
}
//if possible get score vectors
if (nrhs == 5) {
extractScoreVector(prhs, 2, META_SCORENUM, meta_scores);
extractScoreVector(prhs, 3, MIDDLE_SCORENUM, middle_scores);
extractScoreVector(prhs, 4, NODES_SCORENUM, nodes_scores);
} else {
//otherwise take default scores
meta_scores[meta_del] = -1;
meta_scores[meta_ins] = -1;
meta_scores[meta_match] = 10;
meta_scores[meta_mismatch] = -2;
middle_scores[0] = -6;
middle_scores[1] = -6;
nodes_scores[nodes_del] = -18;
nodes_scores[nodes_ins] = -18;
nodes_scores[nodes_skipl] = 0;
nodes_scores[nodes_skipr] = 0;
}
//get both string inputs.
std::string firstSeq = getMatlabString(prhs, 0);
std::string sndSeq = getMatlabString(prhs, 1);
//split them into node encodings.
firstNodes = split(firstSeq, '|');
sndNodes = split(sndSeq, '|');
//initialize distance table.
distanceTable = (int**) malloc(sizeof (int *) * firstNodes.size());
for (unsigned int i = 0; i < firstNodes.size(); i++) {
distanceTable[i] = (int*) malloc(sizeof (int) * sndNodes.size());
for (unsigned int j = 0; j < sndNodes.size(); j++) {
distanceTable[i][j] = -1;
}
}
//construct input for nodes alignment: nodes are only represented by index with normed length to 3 (instead of index 1 we append 001).
std::stringstream nodesInput;
//first the node indices of the first fragment.
for (unsigned int i = 0; i < firstNodes.size(); i++) {
int magnitude = getMagnitude(i);
for (int j = 0; j < 3 - magnitude; j++) {
nodesInput << '0';
}
nodesInput << i << '|';
}
//then an @
nodesInput << '@';
//then the reversed indices of the second fragment with normed length to 3 (instead of index 1 we append 001).
for (int i = sndNodes.size() - 1; i >= 0; i--) {
int magnitude = getMagnitude(i);
for (int j = 0; j < 3 - magnitude; j++) {
nodesInput << '0';
}
nodesInput << i << '|';
}
nodesInput << '\0';
//call nodes alignment.
char* nodes_argv[2];
//fake program name, dummy string
nodes_argv[0] = (char*) "nodes";
//actual input. The stringstream string has to be bound to a constant string
//reference in order to prevent damage to the string behind it. a string stream
//usually only protects its memory until the string is first evaluated.
//this special construct prevents the memory from being overwritten.
const std::string& tmp = nodesInput.str();
nodes_argv[1] = const_cast<char*> (tmp.c_str());
//call nodes alignment.
gapc::Opts opts;
try {
//parse inputs
opts.parse(2, nodes_argv);
} catch (std::exception &e) {
std::cerr << "Exception: " << e.what() << '\n';
std::exit(1);
}
nodes obj;
try {
obj.init(opts);
} catch (std::exception &e) {
std::cerr << "Exception: " << e.what() << '\n';
std::exit(1);
}
obj.cyk();
gapc::return_type res = obj.run();
//free distance table memory.
for (unsigned int i = 0; i < firstNodes.size(); i++) {
free(distanceTable[i]);
}
free(distanceTable);
//clear the node vectors
firstNodes.clear();
sndNodes.clear();
//Version for simple score return value
//plhs[0] = mxCreateDoubleScalar(res);
//Version for string return value
std::stringstream nodeOutput;
obj.print_result(nodeOutput, res);
const std::string& outStr = nodeOutput.str();
plhs[0] = mxCreateString(outStr.c_str());
}
The external code is the gapc::opts and nodes obj part. Until now there are no known memory leak problems with the external code, so I am guessing that the problem is with me in the code I sent here. Unfortunately I am not able to find the mistake. I tried to free about any variable manually that is mentioned in the code, but this always lead to a MATLAB crash (As I see it Matlab tries to free the variables itself and crashes if there are not in memory anymore).
The memory leak is critical here: After about 7 steps in the loop there is already about 1 GB RAM occupied and it goes up to about 13 GB RAM in my test case. This is not plausible for the program so a memory leak seems very probable.
I also tried to find a fix in stackoverflow but everything mentioned here does not seem applicable to my scenario.
As the memory leak is very huge the variables that are most plausible (as they contain the most content) are firstSeq, sndSeq, firstNodes, sndNodes, distanceTable, opts and obj.
So my questions are:
- Have I forgotten to free one of those variables?
- Do you see something else that could cause a memory leak in the code?
- How can I fix that?
As far as my research goes the objects don't have to be freed as they are managed automatically. Still: Somewhere memory has to be leaking.
/edit
As requested I provide also the code of my helper functions. Please note that the functions "nodes_score", "meta_score" and "node_distance" are called from external functions that I call within the code using obj.run().
//using namespace std;
/*
* This solution for the split problem is taken from
*
* http://stackoverflow.com/questions/236129/splitting-a-string-in-c
*/
std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
std::stringstream ss(s);
std::string item;
while (std::getline(ss, item, delim)) {
elems.push_back(item);
}
return elems;
}
std::vector<std::string> split(const std::string &s, char delim) {
std::vector<std::string> elems;
split(s, delim, elems);
return elems;
}
//These vectors are global and contain the string encoding of the nodes for
//each fragment.
std::vector<std::string> firstNodes;
std::vector<std::string> sndNodes;
//this table contains the node distances for each combination of nodes.
int** distanceTable;
std::map<int, std::string> constructMetaMap(std::string nodeStr) {
//get the single meta information strings
std::vector<std::string> metaInfoStrs = split(nodeStr, '*');
//initialize the map mapping meta information indices to the respective meta information content.
std::map<int, std::string> metaMap;
for (std::vector<std::string>::iterator metaInfoStr = metaInfoStrs.begin(); metaInfoStr != metaInfoStrs.end(); ++metaInfoStr) {
//string stream for the meta info index.
std::stringstream idxStream;
int metaContentIdx = 1;
for (std::string::iterator metaInfoChar = (*metaInfoStr).begin(); metaInfoChar != (*metaInfoStr).end(); ++metaInfoChar) {
if (*metaInfoChar == '#') {
//if we have finished looking for the current index, store the new map entry.
int metaIdx;
idxStream >> metaIdx;
metaMap[metaIdx] = (*metaInfoStr).substr(metaContentIdx);
} else {
//otherwise store the current char and increment the start index of the actual meta info content.
idxStream << *metaInfoChar;
metaContentIdx++;
}
}
}
return metaMap;
}
const int MIDDLE_SCORENUM = 2;
int middle_scores[MIDDLE_SCORENUM];
/*
* Emulates a call to meta alignment.
*
* The node distance is defined as the sum over the distance between all meta
* informations. If for a certain keyword no meta information exists in one of
* the fragments a negative score is appended.
*/
int node_distance(unsigned int firstNodeIndex, unsigned int sndNodeIndex) {
//check if the distance was already calculated.
if (distanceTable[firstNodeIndex][sndNodeIndex] != -1) {
return distanceTable[firstNodeIndex][sndNodeIndex];
}
//construct maps of keyword indices to meta information content.
std::map<int, std::string> firstMetaMap = constructMetaMap(firstNodes[firstNodeIndex]);
std::map<int, std::string> sndMetaMap = constructMetaMap(sndNodes[sndNodeIndex]);
int node_distance_score = 0;
//iterate over the first map.
for (std::map<int, std::string>::const_iterator metaEntry = firstMetaMap.begin(); metaEntry != firstMetaMap.end(); ++metaEntry) {
const int metaInfoIdx = metaEntry -> first;
//if we don't have a value to that index in the second map, punish that.
if (sndMetaMap.count(metaInfoIdx) == 0) {
node_distance_score += middle_scores[0];
} else {
//otherwise do an alignment of the meta information.
//and construct the input argument string array
std::string sndMetaStr = sndMetaMap[metaInfoIdx];
std::reverse(sndMetaStr.begin(), sndMetaStr.end());
std::stringstream metaInput;
metaInput << metaEntry -> second;
metaInput << '@';
metaInput << sndMetaStr;
metaInput << '\0';
char* argv[2];
//fake program name, dummy string
argv[0] = (char*) "meta";
//actual input. The stringstream string has to be bound to a constant string
//reference in order to prevent damage to the string behind it. a string stream
//usually only protects its memory until the string is first evaluated.
//this special construct prevents the memory from being overwritten.
const std::string& tmp = metaInput.str();
argv[1] = const_cast<char*> (tmp.c_str());
//call meta alignment.
gapc::Opts opts;
try {
opts.parse(2, argv);
} catch (std::exception &e) {
std::cerr << "Exception: " << e.what() << '\n';
std::exit(1);
}
meta obj;
try {
obj.init(opts);
} catch (std::exception &e) {
std::cerr << "Exception: " << e.what() << '\n';
std::exit(1);
}
gapc::add_event("start");
obj.cyk();
int metaScore = obj.run();
node_distance_score += metaScore;
}
}
//iterate over the second map
for (std::map<int, std::string>::const_iterator metaEntry = sndMetaMap.begin(); metaEntry != sndMetaMap.end(); ++metaEntry) {
const int metaInfoIdx = metaEntry -> first;
//if we don't have a value to that index in the second map, punish that.
if (firstMetaMap.count(metaInfoIdx) == 0) {
node_distance_score += middle_scores[1];
}
//otherwise do nothing.
}
//store the result in the table.
distanceTable[firstNodeIndex][sndNodeIndex] = node_distance_score;
//clear the maps
firstMetaMap.clear();
sndMetaMap.clear();
return node_distance_score;
}
const int META_SCORENUM = 6;
const int NODES_SCORENUM = 4;
int meta_scores[META_SCORENUM];
int nodes_scores[NODES_SCORENUM];
/*
* Returns the score for a given operation
*/
int meta_score(meta_score_type type) {
return meta_scores[(int) type];
}
/*
* Returns the score for a given operation
*/
int nodes_score(nodes_score_type type) {
return nodes_scores[(int) type];
}
// Utility function for extracting string inputs
std::string getMatlabString(const mxArray *prhs[], int strIndex) {
const mxArray *strData = prhs[strIndex];
int strLength = mxGetN(prhs[strIndex]) + 1;
char *buf = mxArrayToString(strData);
std::string s(buf);
mxFree(buf);
return s;
}
//Utility function for extracting the score vector.
void extractScoreVector(const mxArray *prhs[], int vecIdx, int scorelength, int scoreVec[]) {
//Declarations
const mxArray *vecData;
double *singleVals;
int rowLen, colLen;
//Copy input pointer
vecData = prhs[vecIdx];
//Get matrix
singleVals = (double*) mxGetPr(vecData);
rowLen = mxGetN(vecData);
colLen = mxGetM(vecData);
//we don't care if it is a column or row vector but it has to be a
//SCORENUM x 1 vector.
if ((rowLen == 1 && colLen == scorelength) || (rowLen == scorelength && colLen == 1)) {
for (int i = 0; i < scorelength; i++) {
scoreVec[i] = (int) singleVals[i];
}
} else {
mexErrMsgTxt("The score vector has the wrong number of entries!");
}
}
int getMagnitude(int number) {
if (number == 0) {
return 1;
}
int magn = 0;
while (number > 0) {
magn++;
number = number / 10;
}
return magn;
}