I'm unable to reproduce your issue.
I modified your program to add an option to generate a sample/test file:
It can just do a truncate
to create a large file. This takes a fraction of a second.
It can then fill it in with real data. This takes about 10 minutes to create a 243 GB file on my system.
The result is the same in either mode. So, IMO, the quick mode is sufficient (i.e. the file has holes). In other words, anybody can run the program in a matter of seconds on their system.
I tried every combination I could think of this and other options. In no circumstance, could I reproduce. See below for a comparison of my system and yours.
After reading below, if you can think of any other idea, I'd be glad to try it on my system to reproduce your failure.
Here is the modified program:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <time.h>
#define GBSIZE(_gb) (size_t) _gb * 1024 * 1024 * 1024
#define GBOF(_siz) (double) _siz / (1024 * 1024 * 1024)
int opt_f;
int opt_G;
int opt_v;
const char *file;
char pagebuf[64 * 1024];
#define ONERR(_expr,_reason) \
do { \
if (_expr) { \
printf("ONERR: " #_expr " -- %s\n",strerror(errno)); \
exit(1); \
} \
} while (0)
void genfile(void);
void mapshow(void);
int
main(int argc,char **argv)
{
int fd;
int err;
setlinebuf(stdout);
--argc;
++argv;
for (; argc > 0; --argc, ++argv) {
char *cp = *argv;
if (*cp != '-')
break;
cp += 2;
switch (cp[-1]) {
case 'f':
opt_f = ! opt_f;
break;
case 'G':
opt_G = (*cp != 0) ? strtol(cp,&cp,10) : 243;
break;
case 'v':
opt_v = ! opt_v;
break;
}
}
if (argc == 1)
file = *argv;
else
file = "tmp";
printf("file='%s'\n",file);
if (opt_G) {
genfile();
exit(0);
}
fd = open(file,O_RDONLY);
ONERR(fd < 0,"open/RDONLY");
struct stat st;
err = fstat(fd,&st);
ONERR(err < 0,"fstat");
size_t fsize = st.st_size;
size_t mapsize = fsize - GBSIZE(3);
printf("main: st.st_size=%zu/%.3f mapsize=%zu/%.3F\n",
fsize,GBOF(fsize),mapsize,GBOF(mapsize));
errno = 0;
void *ptr = mmap(0, mapsize, PROT_READ, MAP_SHARED, fd, 0);
printf("Result = %p -- errno=%d %s\n", ptr, errno, strerror(errno));
mapshow();
if (ptr != MAP_FAILED)
munmap(ptr,mapsize);
close(fd);
// remove the temp file
#if 0
unlink(file);
#endif
return 0;
}
void
genfile(void)
{
int fd;
int err;
// get desired file size
size_t mksize = GBSIZE(opt_G);
printf("genfile: unlink ...\n");
unlink(file);
printf("genfile: G=%d mksize=%zu\n",opt_G,mksize);
// create the file
printf("genfile: open ...\n");
fd = open(file,O_WRONLY | O_CREAT,0644);
ONERR(fd < 0,"open/WRONLY");
// truncate
printf("genfile: ftruncate ...\n");
err = ftruncate(fd,mksize);
ONERR(err < 0,"ftruncate");
close(fd);
struct stat st;
err = stat(file,&st);
ONERR(err < 0,"stat");
printf("genfile: st_size=%zu\n",(size_t) st.st_size);
errno = 0;
ONERR(st.st_size != mksize,"st_size");
// fill the file with real data -- not really necessary
if (opt_f) {
printf("genfile: memset ...\n");
fd = open(file, O_RDWR);
ONERR(fd < 0,"open/RDWR");
size_t curlen;
size_t remlen = mksize;
size_t outsize = 0;
int val = 0;
time_t todbeg = time(NULL);
time_t todold = todbeg;
for (; remlen > 0; remlen -= curlen, outsize += curlen, ++val) {
curlen = remlen;
if (curlen > sizeof(pagebuf))
curlen = sizeof(pagebuf);
memset(pagebuf,val,sizeof(pagebuf));
ssize_t xlen = write(fd,pagebuf,curlen);
ONERR(xlen < 0,"write");
time_t todnow = time(NULL);
if ((todnow - todold) >= 1) {
todold = todnow;
double pct = outsize;
pct /= mksize;
pct *= 100;
printf("\rELAPSED: %ld %.3f/%.3f %.3f%%",
todnow - todbeg,GBOF(outsize),GBOF(mksize),pct);
fflush(stdout);
}
}
printf("\n");
close(fd);
}
}
void
mapshow(void)
{
char file[100];
char buf[1000];
printf("\n");
sprintf(file,"/proc/%d/maps",getpid());
FILE *xfsrc = fopen(file,"r");
ONERR(xfsrc == NULL,"fopen/maps");
while (1) {
if (fgets(buf,sizeof(buf),xfsrc) == NULL)
break;
fputs(buf,stdout);
}
fclose(xfsrc);
}
Here is my configuration:
COMMAND: uname -r
5.3.11-100.fc29.x86_64
COMMAND: sysctl vm.overcommit_memory
vm.overcommit_memory = 0
COMMAND: ulimit -a
core file size (blocks, -c) unlimited
data seg size (kbytes, -d) unlimited
scheduling priority (-e) 0
file size (blocks, -f) unlimited
pending signals (-i) 47763
max locked memory (kbytes, -l) 16384
max memory size (kbytes, -m) unlimited
open files (-n) 1024
pipe size (512 bytes, -p) 8
POSIX message queues (bytes, -q) 819200
real-time priority (-r) 0
stack size (kbytes, -s) 8192
cpu time (seconds, -t) unlimited
max user processes (-u) 47763
virtual memory (kbytes, -v) unlimited
file locks (-x) unlimited
COMMAND: free -m
total used free shared buff/cache available
Mem: 11972 3744 750 68 7477 7842
Swap: 122879 1147 121732
Slight differences:
You have 192 GB of RAM. But, I only have 12 GB of RAM. This difference should work in your favor. But, it doesn't. The program works on my system that has less than 1/10 of the amount of RAM.
I have a 128 GB swap disk. But, I reran the program after doing swapoff -a
to disable all swap disks. There was no difference in program operation.
vm.overcommit_memory
is 0. But, I set it to 1 and there was no difference in program operation.
On my vm.mmap_min_addr
is 65536 (see TASK_SIZE
below)
My computer system is over ten years old.
I'm (probably) running a much older kernel version.
At the time of the test, I had:
- A few
gnome-terminal
windows
firefox
with pages on SO
thunderbird
- A few background shell programs (of my own design).
Because of my much smaller RAM, I have to dispute neo-jgrec's answer:
On an x86 (64 bit) system, TASK_SIZE
can be either:
- Normal system:
1ul << 47
131,072 GB (128 TB)
- 5 level paging enabled:
1ul << 56
67,108,864 GB (65,536 TB)
Even using the smaller address value we are clearly not going beyond TASK_SIZE
I've done mmap
on many 100+ GB files, in the past, without issue. For example, see my answer: read line by line in the most efficient way platform specific
Here is the stat of the file:
File: tmp
Size: 260919263232 Blocks: 509608032 IO Block: 4096 regular file
Device: 901h/2305d Inode: 180624922 Links: 1
Access: (0644/-rw-r--r--) Uid: ( 1000/ user) Gid: ( 1000/ user)
Context: unconfined_u:object_r:user_tmp_t:s0
Access: 2023-06-18 15:39:51.253702772 -0400
Modify: 2023-06-18 15:58:43.512226035 -0400
Change: 2023-06-18 15:58:43.512226035 -0400
Birth: -
Here is the program output:
file='tmp'
main: st.st_size=260919263232/243.000 mapsize=257698037760/240.000
Result = 0x7edf00cf9000 -- errno=0 Success
00400000-00401000 r--p 00000000 09:01 180624914 /home/user/bigmmap/orig
00401000-00402000 r-xp 00001000 09:01 180624914 /home/user/bigmmap/orig
00402000-00403000 r--p 00002000 09:01 180624914 /home/user/bigmmap/orig
00403000-00404000 r--p 00002000 09:01 180624914 /home/user/bigmmap/orig
00404000-00405000 rw-p 00003000 09:01 180624914 /home/user/bigmmap/orig
00405000-00415000 rw-p 00000000 00:00 0
013bb000-013dc000 rw-p 00000000 00:00 0 [heap]
7edf00cf9000-7f1b00cf9000 r--s 00000000 09:01 180624922 /home/user/bigmmap/tmp
7f1b00cf9000-7f1b00d1b000 r--p 00000000 09:00 1202975 /usr/lib64/libc-2.28.so
7f1b00d1b000-7f1b00e68000 r-xp 00022000 09:00 1202975 /usr/lib64/libc-2.28.so
7f1b00e68000-7f1b00eb4000 r--p 0016f000 09:00 1202975 /usr/lib64/libc-2.28.so
7f1b00eb4000-7f1b00eb5000 ---p 001bb000 09:00 1202975 /usr/lib64/libc-2.28.so
7f1b00eb5000-7f1b00eb9000 r--p 001bb000 09:00 1202975 /usr/lib64/libc-2.28.so
7f1b00eb9000-7f1b00ebb000 rw-p 001bf000 09:00 1202975 /usr/lib64/libc-2.28.so
7f1b00ebb000-7f1b00ec1000 rw-p 00000000 00:00 0
7f1b00f16000-7f1b00f17000 r--p 00000000 09:00 1182318 /usr/lib64/ld-2.28.so
7f1b00f17000-7f1b00f37000 r-xp 00001000 09:00 1182318 /usr/lib64/ld-2.28.so
7f1b00f37000-7f1b00f3f000 r--p 00021000 09:00 1182318 /usr/lib64/ld-2.28.so
7f1b00f3f000-7f1b00f40000 r--p 00028000 09:00 1182318 /usr/lib64/ld-2.28.so
7f1b00f40000-7f1b00f41000 rw-p 00029000 09:00 1182318 /usr/lib64/ld-2.28.so
7f1b00f41000-7f1b00f42000 rw-p 00000000 00:00 0
7fff0d6d7000-7fff0d6f8000 rw-p 00000000 00:00 0 [stack]
7fff0d75a000-7fff0d75d000 r--p 00000000 00:00 0 [vvar]
7fff0d75d000-7fff0d75e000 r-xp 00000000 00:00 0 [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]