Yesterday I asked you guys a question and realize that reading and writing a chunk of blocks is more efficient instead of just one block at once. So now i try to read and write a chunk of blocks but the segmentation error is occurred for me.
#define POSITIONAL_TOKEN_CHUNK_SIZE 1000000
#define POSITIONAL_TOKEN_WORD_LENGTH 15
#define POSITIONAL_TOKEN_ID_LENGTH 4
#define POSITIONAL_TOKEN_LENGTH (POSITIONAL_TOKEN_WORD_LENGTH + POSITIONAL_TOKEN_ID_LENGTH)
struct PTBlk {
char w[POSITIONAL_TOKEN_WORD_LENGTH + 1];
int id;
};
int transform_to_bin_a(FILE * fin, int fd) {
int n = 0;
int m = 0;
char buf[TRANSFORM_TO_BIN_BUF_LENGTH] = {};
struct PTBlk * blks = (struct PTBlk *)malloc(POSITIONAL_TOKEN_CHUNK_SIZE * POSITIONAL_TOKEN_LENGTH);
if (blks == NULL) {
puts("no memory being allocated");
return 0;
}
memset(buf, 0, TRANSFORM_TO_BIN_BUF_LENGTH);
printf("total of size being allocated is %d bytes\n", POSITIONAL_TOKEN_CHUNK_SIZE * POSITIONAL_TOKEN_LENGTH);
while (fgets(buf, TRANSFORM_TO_BIN_BUF_LENGTH, fin) != NULL) {
++n;
sscanf(buf, "%s %d", blks[m].w, &blks[m].id);
++m;
if (m >= POSITIONAL_TOKEN_CHUNK_SIZE) { // error !!
write(fd, (void *)blks, POSITIONAL_TOKEN_CHUNK_SIZE * POSITIONAL_TOKEN_LENGTH);
m = 0;
}
memset(buf, 0, TRANSFORM_TO_BIN_BUF_LENGTH);
}
if (m > 0) {
printf("n:%d m:%d\n", n, m);
printf("m:%d\n", m);
write(fd, (void *)blks, m * POSITIONAL_TOKEN_LENGTH);
}
printf("n:%d\n", n);
lseek(fd, 0, SEEK_SET);
write(fd, (void *)&n, POSITIONAL_TOKEN_SEARCH_BEGIN);
free(blks);
return 1;
}
I guess too large POSITIONAL_TOKEN_CHUNK_SIZE is one of problems. But i don't understand why it is reason for segmentation fault. Because in the code i try to allocate only 20,000,000 bytes on heap. Sometimes i coded a long int array as global variable such as 'int arr[20000000];'. But it doesn't matter.
What do i misunderstand in the code?