I'm trying to share a text file between forked processes on my Ubuntu x86_64: the file will not be absurdly large, since strings will be written only if there is not already another identical string in the file; strings will be hostnames of visited websites, so I'll assume no more than 255 bytes for each hostname.
When it is a process' turn to write in shared object, it is OK; once all the processes wrote in shared object, msync
should make the writing effective on the disk, but the mapped.txt
file created only contain one string from arrayString
, i.e. the string the last process wrote in shared object.
Here's the code:
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdlib.h>
#include <semaphore.h>
#include <string.h>
// first forked process will write "first" in file, and so on
const char *arrayString[] = {
"first",
"second",
"third"
};
int main(void) {
int index;
int children = 3;
const char *filepath = "mapped.txt";
sem_t *sem;
sem = sem_open("semaphore", O_CREAT | O_EXCL, 0644, 1);
sem_unlink("semaphore");
int fd;
fd = open(filepath, O_RDWR | O_CREAT, 0644);
if (fd < 0) {
perror("open:");
return EXIT_FAILURE;
}
char *data;
data = (char *)mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (data == MAP_FAILED) {
close(fd);
perror("mmap:");
return EXIT_FAILURE;
}
for (index=0; index<children; index++) {
if (fork() == 0) {
sem_wait(sem);
size_t textsize = strlen(arrayString[index])+1;
if (ftruncate(fd, sizeof(textsize)) == -1) {
perror("ftruncate:");
return EXIT_FAILURE;
}
for (size_t i = 0; i < textsize; i++) {
printf("%d Writing character %c at %zu\n", getpid(), arrayString[index][i], i);
data[i] = arrayString[index][i];
}
printf("%d wrote ", getpid());
for (size_t i = 0; i < textsize; i++) {
printf("%c", data[i]);
}
printf("\n");
if (msync(data, textsize, MS_SYNC) == -1) {
perror("Could not sync the file to disk");
}
sem_post(sem);
_exit(EXIT_SUCCESS);
}
}
close(fd);
return EXIT_SUCCESS;
}
This is one possible output of the code above for three child processes (this is fine):
20373 Writing character s at 0
20373 Writing character e at 1
20373 Writing character c at 2
20373 Writing character o at 3
20373 Writing character n at 4
20373 Writing character d at 5
20373 Writing character at 6
20373 wrote second
20374 Writing character t at 0
20374 Writing character h at 1
20374 Writing character i at 2
20374 Writing character r at 3
20374 Writing character d at 4
20374 Writing character at 5
20374 wrote third
20372 Writing character f at 0
20372 Writing character i at 1
20372 Writing character r at 2
20372 Writing character s at 3
20372 Writing character t at 4
20372 Writing character at 5
20372 wrote first
And here's the content of mapped.txt
(this is bad):
first^@^@^@
I expected:
second
third
first
but all I get is only the string of the last process, with those strange symbols. I'd like to keep this file persistent in memory, but because of the I/O slowness, I'm trying to use memory mapping. Any idea why my file only contains the string written by the last process accessing the shared file?
Edit: I think I get it, it seems to work now: I hope it will be of help to someone. Compiled with g++ -g -o mapthis mapthis.cpp -lrt -pthread
. Beware that some error checking are missing, like for fsync
, snprintf
and lseek
.
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdlib.h>
#include <semaphore.h>
#include <time.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
const char *arrayString[] = {
"www.facebook.com",
"www.google.com",
"www.cnn.com",
"www.speechrepository.com",
"www.youtube.com",
"www.facebook.com",
"www.google.com",
"www.cnn.com",
"www.speechrepository.com",
"www.youtube.com",
"www.facebook.com",
"www.google.com",
"www.cnn.com",
"www.speechrepository.com",
"www.youtube.com"
};
int main(void) {
int index;
int children = sizeof(arrayString) / sizeof(char*);;
const char *filepath = "mapped.txt";
sem_t *sem;
char *data;
struct stat filestats;
sem = sem_open("semaphore", O_CREAT | O_EXCL, 0644, 1);
sem_unlink("semaphore");
int fd;
fd = open(filepath, O_RDWR | O_CREAT, 0644);
if (fd < 0) {
perror("open:");
return EXIT_FAILURE;
}
if (fstat(fd, &filestats) < 0) {
close(fd);
perror("fstat:");
return EXIT_FAILURE;
}
data = (char *)mmap(NULL, filestats.st_size ? filestats.st_size : 1, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (data == MAP_FAILED) {
close(fd);
perror("first map:");
return EXIT_FAILURE;
}
for (index=0; index<children; index++) {
sleep(1);
pid_t pid = fork();
if (pid == 0) {
int nw = 0;
int hostnameSize = 0;
const size_t origsize = filestats.st_size;
char *hostPos = NULL;
char *numPos = NULL;
char *backslashPos = NULL;
char tempBuff[64];
memset((char *)tempBuff, 0, sizeof(tempBuff));
sem_wait(sem);
// remap to current file size if it changed
fstat(fd, &filestats);
// file empty, just insert
if (filestats.st_size == 0) {
nw = snprintf(tempBuff, sizeof(tempBuff), "%s %010lu\n", arrayString[index], (unsigned long)time(NULL));
write(fd, tempBuff, nw);
fsync(fd);
}
else {
// file not empty, let's look for string
hostPos = strstr(data, arrayString[index]);
if (hostPos) {
// string is already inserted, search for offset of number of seconds
lseek(fd, hostPos-data, SEEK_SET);
numPos = strchr(hostPos, ' ')+1;
backslashPos = strchr(numPos, '\n');
long unsigned before = atoi(numPos);
long unsigned now = (unsigned long)time(NULL);
long unsigned difference = now - before;
printf("%s visited %ld seconds ago (%ld - %ld)\n",
arrayString[index], difference, now, before);
nw = snprintf(tempBuff, backslashPos-hostPos+1, "%s %010lu", arrayString[index], now);
write(fd, tempBuff, nw);
write(fd, "\n", 1);
fsync(fd);
}
else {
data = (char *)mremap(data, origsize, filestats.st_size, MREMAP_MAYMOVE);
if (data == MAP_FAILED) {
close(fd);
sem_post(sem);
perror("mmap:");
_exit(EXIT_FAILURE);
}
lseek(fd, 0, SEEK_END);
nw = snprintf(tempBuff, sizeof(tempBuff), "%s %010lu\n", arrayString[index], (unsigned long)time(NULL));
write(fd, tempBuff, nw);
fsync(fd);
}
}
munmap(data, filestats.st_size);
close(fd);
sem_post(sem);
_exit(EXIT_SUCCESS);
}
else if (pid > 0) {
wait(NULL);
}
}
munmap(data, filestats.st_size);
close(fd);
return EXIT_SUCCESS;
}