I've recently encountered a problem in c++ object creation. The problem is somewhat like it in question C++ strange segmentation fault by object creation, however the codes here are part of an open source project and may not have easy errors.
The object creation is called in a method and the method is called in two continuous steps.
The class is defined in strtokenizer.h as follows:
class strtokenizer {
protected:
vector<string> tokens;
int idx;
public:
strtokenizer(string str, string seperators = " ");
void parse(string str, string seperators);
int count_tokens();
string next_token();
void start_scan();
string token(int i);
};
And in strtokenizer.cpp, it is like this:
using namespace std;
strtokenizer::strtokenizer(string str, string seperators) {
parse(str, seperators);
}
void strtokenizer::parse(string str, string seperators) {
int n = str.length();
int start, stop;
if (flag) {
printf("%d\n", n);
}
start = str.find_first_not_of(seperators);
while (start >= 0 && start < n) {
stop = str.find_first_of(seperators, start);
if (stop < 0 || stop > n) {
stop = n;
}
tokens.push_back(str.substr(start, stop - start));
start = str.find_first_not_of(seperators, stop + 1);
}
start_scan();
}
int strtokenizer::count_tokens() {
return tokens.size();
}
void strtokenizer::start_scan() {
idx = 0;
return;
}
string strtokenizer::next_token() {
if (idx >= 0 && idx < tokens.size()) {
return tokens[idx++];
} else {
return "";
}
}
string strtokenizer::token(int i) {
if (i >= 0 && i < tokens.size()) {
return tokens[i];
} else {
return "";
}
}
The method that create the strtokenizer objects is as follows:
int dataset::read_wordmap(string wordmapfile, mapword2id * pword2id) {
pword2id->clear();
FILE * fin = fopen(wordmapfile.c_str(), "r");
if (!fin) {
printf("Cannot open file %s to read!\n", wordmapfile.c_str());
return 1;
}
char buff[BUFF_SIZE_SHORT];
string line;
fgets(buff, BUFF_SIZE_SHORT - 1, fin);
int nwords = atoi(buff);
for (int i = 0; i < nwords; i++) {
fgets(buff, BUFF_SIZE_SHORT - 1, fin);
line = buff;
strtokenizer strtok(line, " \t\r\n");
if (strtok->count_tokens() != 2) {
continue;
}
pword2id->insert(pair<string, int>(strtok->token(0), atoi(strtok->token(1).c_str())));
}
fclose(fin);
return 0;
}
When the read_wordmap() method is run for the first time (first read_wordmap() call), the 'strtok' object is created about 87k times and in the second time (second read_wordmap() call), the oject is expected to be run for more than 88k times. However, it will raise a error (sometime 'segmentation fault' and sometimes 'memory corruption (fast)') at about 86k times in the second method call, at the line:
strtokenizer strtok(line, " \t\r\n");
And when the code block of object creation is revised like those below, there will be no errors.
strtokenizer *strtok = new strtokenizer(line, " \t\r\n");
printf("line: %s", line.c_str());
if (strtok->count_tokens() != 2) {
continue;
}
pword2id->insert(pair<string, int>(strtok->token(0), atoi(strtok->token(1).c_str())));