13

(The correct code is in 'Update 5')

I tried to map a range of memory from 0x100000000 to 0x200000000 in this example C code:

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/mman.h>

int main(void)
{ 
    uint64_t* rr_addr = 0;
    uint64_t i = 17179869184;

    printf("\nsizeof(size_t): %llu\n", sizeof(size_t));

    printf("(uint64_t)0x100000000: %llx\n", (uint64_t)0x100000000);
    printf("1L << 33: %llx\n", 1L << 33);
    rr_addr = mmap((void*)i, (1UL << 33), PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
    printf("rr_addr: %p, %llu \n", rr_addr, rr_addr);
    if (rr_addr == MAP_FAILED) {
        perror("mmap error");
    }

    return 0;
}

On different systems (Linux, gcc), I get different results:

Result 1:

sizeof(size_t): 8
(uint64_t)0x100000000: 100000000
1L << 33: 200000000
rr_addr: 0xffffffffffffffff, 18446744073709551615 
mmap error: Cannot allocate memory

System info (Fedora 14):

Linux localhost.localdomain 2.6.35.10-74.fc14.x86_64 #1 SMP Thu Dec 23 16:04:50 UTC 2010 x86_64 x86_64 x86_64 GNU/Linux

gcc (GCC) 4.5.1 20100924 (Red Hat 4.5.1-4)

glibc: 2.12.90-21

Result 2:

sizeof(size_t): 8
(uint64_t)0x100000000: 100000000
1L << 33: 200000000
rr_addr: 0x400000000, 17179869184 

System info (Fedora 12):

Linux wiles 2.6.32.13 #2 SMP Fri Sep 10 01:29:43 HKT 2010 x86_64 x86_64 x86_64 GNU/Linux

gcc (GCC) 4.4.4 20100630 (Red Hat 4.4.4-10)

glibc verison: 2.11.2-1

I expect "Result 2". Maybe there is something wrong with my code.

Please help me out.

Update 1: errno is printed out if mmap fails.

Update 3: after changing the mmap call to these lines:

char *cmd[20]; 

sprintf(cmd, "pmap -x %i", getpid()); 
printf("%s\n", cmd);
system(cmd);

rr_addr = mmap((void*)i, (1UL << 33), PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);

printf("%s\n", cmd);
system(cmd);

Result:

sizeof(size_t): 8
(uint64_t)0x100000000: 100000000
1L << 33: 200000000
pmap -x 5618
5618:   ./test
Address           Kbytes     RSS   Dirty Mode   Mapping
0000000000400000       4       4       0 r-x--  test
0000000000600000       4       4       4 rw---  test
00007f1cc941e000    1640     280       0 r-x--  libc-2.12.90.so
00007f1cc95b8000    2044       0       0 -----  libc-2.12.90.so
00007f1cc97b7000      16      16      16 r----  libc-2.12.90.so
00007f1cc97bb000       4       4       4 rw---  libc-2.12.90.so
00007f1cc97bc000      24      16      16 rw---    [ anon ]
00007f1cc97c2000     132     108       0 r-x--  ld-2.12.90.so
00007f1cc99c6000      12      12      12 rw---    [ anon ]
00007f1cc99e0000       8       8       8 rw---    [ anon ]
00007f1cc99e2000       4       4       4 r----  ld-2.12.90.so
00007f1cc99e3000       4       4       4 rw---  ld-2.12.90.so
00007f1cc99e4000       4       4       4 rw---    [ anon ]
00007fffa0da8000     132       8       8 rw---    [ stack ]
00007fffa0dff000       4       4       0 r-x--    [ anon ]
ffffffffff600000       4       0       0 r-x--    [ anon ]
----------------  ------  ------  ------
total kB            4040     476      80
pmap -x 5618
5618:   ./test
Address           Kbytes     RSS   Dirty Mode   Mapping
0000000000400000       4       4       0 r-x--  test
0000000000600000       4       4       4 rw---  test
00007f1cc941e000    1640     280       0 r-x--  libc-2.12.90.so
00007f1cc95b8000    2044       0       0 -----  libc-2.12.90.so
00007f1cc97b7000      16      16      16 r----  libc-2.12.90.so
00007f1cc97bb000       4       4       4 rw---  libc-2.12.90.so
00007f1cc97bc000      24      16      16 rw---    [ anon ]
00007f1cc97c2000     132     108       0 r-x--  ld-2.12.90.so
00007f1cc99c6000      12      12      12 rw---    [ anon ]
00007f1cc99e0000       8       8       8 rw---    [ anon ]
00007f1cc99e2000       4       4       4 r----  ld-2.12.90.so
00007f1cc99e3000       4       4       4 rw---  ld-2.12.90.so
00007f1cc99e4000       4       4       4 rw---    [ anon ]
00007fffa0da8000     132       8       8 rw---    [ stack ]
00007fffa0dff000       4       4       0 r-x--    [ anon ]
ffffffffff600000       4       0       0 r-x--    [ anon ]
----------------  ------  ------  ------
total kB            4040     476      80
rr_addr: 0xffffffffffffffff, 18446744073709551615 
mmap error: Cannot allocate memory

Update 4: add "system("ulimit -m -v");" just before calling mmap: The output of ulimit is:

max memory size         (kbytes, -m) unlimited
virtual memory          (kbytes, -v) unlimited

The other output is the same as 'Update 3' (still fails) except the pid.

Update 5: the updated code which works on both systems:

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/mman.h>

int main(void)
{ 
    uint64_t* rr_addr = 0;
    uint64_t i = 17179869184;
    uint64_t len = 0;

    char cmd[20]; 

    printf("\nsizeof(size_t): %llu\n", sizeof(size_t));

    len = (1UL << 32);
    printf("len: %llx\n", len);

    snprintf(cmd, sizeof cmd, "pmap -x %i", getpid()); 
    printf("%s\n", cmd);
    system(cmd);

    system("ulimit -m -v");

    rr_addr = mmap((void*)i, len, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE|MAP_NORESERVE, -1, 0);

    printf("%s\n", cmd);
    system(cmd);

    printf("rr_addr: %p, %llu \n", rr_addr, rr_addr);
    if (rr_addr == MAP_FAILED) {
        perror("mmap error");
    }

    return 0;
}

The right answer is given by @caf: adding the MAP_NORESERVE flag to mmap solves this problem. Details of the reason are in caf's answer. Thanks a lot caf and all these give kind help!

ericzma
  • 763
  • 3
  • 9
  • 23
  • 3
    Read the man page: "on failure -1, and errno is set". It may or may not tell you anything useful, but you ought to be printing it out before asking other people to investigate. – Tony Delroy Jan 26 '11 at 09:58
  • Why are you specifying the base address yourself? – sarnold Jan 26 '11 at 10:03
  • @Tony: Thanks! I printed the error message, it says "Cannot allocate memory". – ericzma Jan 26 '11 at 10:05
  • @sarnold: We want to write some code in assembly to read/write memory in specific range. I know it is a little strange for a normal application. – ericzma Jan 26 '11 at 10:07
  • 1
    @Zhiqiang Ma, aha! Thanks :) I wonder if this is the [Address Space Layout Randomization](http://en.wikipedia.org/wiki/Address_space_layout_randomization) giving you different 'free spaces' on each run? – sarnold Jan 26 '11 at 10:14
  • @sarnold: Nice guess! :) It is similar in some aspect. We just give the program a static memory range to use. The program can read/write freely in this specific range no matter what it uses the range to do. – ericzma Jan 26 '11 at 10:27
  • @sarnold: Actually, I was working on the DVM project by that time ( http://www.zhiqiangma.com/homepage/publication/ma-vee12-dvm.pdf ). We use mmap to support the unified and distributed memory of the DVM by reusing the page fault handling hardware and mechanism in the OS. – ericzma Nov 07 '12 at 12:37
  • In case anyone got here trying to `mmap()` the RX ring buffer of a socket... there appears to be a 4GB limit when configuring the RX ring with `setsockopt()`. This is a separate problem [for which I opened a question here](https://stackoverflow.com/questions/49859884/why-is-the-rx-ring-of-a-linux-raw-socket-limited-to-4gb-in-size). – rizard Apr 16 '18 at 14:40

5 Answers5

9

If you do not actually have significantly more than 8G of swap configured, then that large mapping is likely to fail.

You can add the MAP_NORESERVE flag to mmap() to tell it not to reserve any swap space for the mapping up front.

caf
  • 233,326
  • 40
  • 323
  • 462
3

How much physical memory is there available? Linux has two distinct modes for address space allocation: Memory allocation on write (i.e. overcommit mode) or memory allocation on address space allocation. You can check by reading two files in procfs:

cat /proc/sys/vm/overcommit_memory
cat /proc/sys/vm/overcommit_ratio

If overcommit_memory is not 0, then every address space allocation must be backed by physical memory (RAM + swap space), if overcommit_memory is 0, then memory is overcommited, i.e. the kernel will happily hand out address space, but the memory will be only allocated if data is writen to the allocated address space. And then memory is not allocated for the full reserved address space, but only for those pages that are touched. This is kinda like booking a flight ticket: Airlines usually sell more tickets than there are seats on a flight, expecting not all booked passengers will actually show up. Now you may wonder, what happens if all programs make use of the full space… Well then some nasty thing kicks in: The Linux Out Of Memory Killer will wreak havoc on your system and very likely kill those processes you need the most, due to it's arcane heuristics.

overcommit_ratio tells the kernel

  • in overcommit mode to which ratio physical memory may be overcommited, i.e. how much more address space may be handed out, than there is physical memory.

  • in non-overcommit-mode how much spare memory to keep

So maybe the overcommit mode just differs between the systems.

datenwolf
  • 159,371
  • 13
  • 185
  • 298
  • 1
    Linux VM accounting actually has three modes: Heuristic overcommit (0), Always overcommit (1) and Never overcommit (2). – caf Jan 26 '11 at 12:14
  • Thanks a lot! On both systems, the overcommit_ratio is 50 while the overcommit_memory is 0. I only have 2GB memory + 2GB swap. After adding MAP_NORESERVE flag as suggested by @caf, it can mmap 8GB memory. – ericzma Jan 26 '11 at 12:23
2

Just ran your code on Fedora 13 and it produces result 2.

Check errno when mmap() returns MAP_FAILED (-1). You can also stick the following line before and after mmap call to see if you've got space in the virtual address space of the process for a 4GB region:

system("pmap -x $$");

Update: The above actually prints the map of the child process. Correct code:

char buf[0x100];
snprintf(buf, sizeof buf, "pmap -x %u", (unsigned)getpid());
system(buf);
Maxim Egorushkin
  • 131,725
  • 17
  • 180
  • 271
  • @Maxim Thanks! I have added system("pmap -x $$") before and after mmap call. I have posted the updated result (update 2). I am not sure I use it in the right way. Please help me to take a look at it. – ericzma Jan 26 '11 at 10:18
  • 1
    Ooops, silly me; the `$$` gets the _shell_ pid. The `pmap` is dumping the wrong map. Replace the `system("pmap -x $$")` with something like `char pid[10]; sprintf(pid, "%i", getpid()); execlp("pmap",pid, (char *)NULL);` – sarnold Jan 26 '11 at 10:35
  • @sarnold: I have changed the code and print the new result (Update 3). I generate the right command and execute it by calling system. The code you give seems missing the '-x' parameter. I think the range (0x100000000 to 0x200000000) isn't be used according to the output of pmap. – ericzma Jan 26 '11 at 10:50
  • @Maxim Yegorushkin: I have used a similar one (yours is better: safer ;) ), but thanks the same. I have also posted the output in "Update 3". – ericzma Jan 26 '11 at 10:53
1

Since you try to map to a specific address, it will depend on the current memory layout for your process when you call mmap. The strategy at which address the request is fulfill is system dependent, the linux man page says something of a "hint".

So maybe in the first case there simply not enough room in the virtual address space of your process to fulfill the request, since there is already another mapping in the way in that range.

A good idea to check if this is related to that would be to check if you succeed when you don't give the addr hint.

Jens Gustedt
  • 76,821
  • 6
  • 102
  • 177
  • Thanks for the suggestion! I change the addr to NULL in mmap call, but it still fails and gives the "Cannot allocate memory" error message. – ericzma Jan 26 '11 at 10:35
1

Maybe you are running into resource limits? Try adding system("ulimit -m -v"); to print out the amount of memory and address space that may be allocated.

EDIT: Well, I'm out of ideas. Sorry. After cleaning up the errors and warnings in the code, I have this source:

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/mman.h>

int main(void)
{
    uint64_t* rr_addr = 0;
    uint64_t i = 17179869184;

    printf("\nsizeof(size_t): %lu\n", sizeof(size_t));

    printf("(uint64_t)0x100000000: %lx\n", (uint64_t)0x100000000);
    printf("1L << 33: %lx\n", 1L << 33);

    char cmd[20];

    sprintf(cmd, "pmap -x %i", getpid());
    printf("%s\n", cmd);
    system(cmd);

    rr_addr = mmap((void*)i, (1UL << 33), PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);

    printf("%s\n", cmd);
    system(cmd);


    printf("rr_addr: %p, %lu \n", rr_addr, rr_addr);
    if (rr_addr == MAP_FAILED) {
        perror("mmap error");
    }

    return 0;
}

and this output:

sizeof(size_t): 8
(uint64_t)0x100000000: 100000000
1L << 33: 200000000
pmap -x 23819
23819:   ./zhiqiang
Address           Kbytes     RSS   Dirty Mode   Mapping
0000000000400000       0       4       0 r-x--  zhiqiang
0000000000600000       0       4       4 r----  zhiqiang
0000000000601000       0       4       4 rw---  zhiqiang
00007f37b3c27000       0     260       0 r-x--  libc-2.12.1.so
00007f37b3da1000       0       0       0 -----  libc-2.12.1.so
00007f37b3fa0000       0      16      16 r----  libc-2.12.1.so
00007f37b3fa4000       0       4       4 rw---  libc-2.12.1.so
00007f37b3fa5000       0      12      12 rw---    [ anon ]
00007f37b3faa000       0     108       0 r-x--  ld-2.12.1.so
00007f37b41aa000       0      12      12 rw---    [ anon ]
00007f37b41c7000       0      12      12 rw---    [ anon ]
00007f37b41ca000       0       4       4 r----  ld-2.12.1.so
00007f37b41cb000       0       4       4 rw---  ld-2.12.1.so
00007f37b41cc000       0       4       4 rw---    [ anon ]
00007fff70cf8000       0      12      12 rw---    [ stack ]
00007fff70dff000       0       4       0 r-x--    [ anon ]
ffffffffff600000       0       0       0 r-x--    [ anon ]
----------------  ------  ------  ------
total kB            3912     464      88
pmap -x 23819
23819:   ./zhiqiang
Address           Kbytes     RSS   Dirty Mode   Mapping
0000000000400000       0       4       0 r-x--  zhiqiang
0000000000600000       0       4       4 r----  zhiqiang
0000000000601000       0       4       4 rw---  zhiqiang   
0000000400000000       0       0       0 rw---    [ anon ]
00007f37b3c27000       0     260       0 r-x--  libc-2.12.1.so
00007f37b3da1000       0       0       0 -----  libc-2.12.1.so
00007f37b3fa0000       0      16      16 r----  libc-2.12.1.so
00007f37b3fa4000       0       4       4 rw---  libc-2.12.1.so
00007f37b3fa5000       0      12      12 rw---    [ anon ]
00007f37b3faa000       0     108       0 r-x--  ld-2.12.1.so
00007f37b41aa000       0      12      12 rw---    [ anon ]
00007f37b41c7000       0      12      12 rw---    [ anon ]
00007f37b41ca000       0       4       4 r----  ld-2.12.1.so
00007f37b41cb000       0       4       4 rw---  ld-2.12.1.so
00007f37b41cc000       0       4       4 rw---    [ anon ]
00007fff70cf8000       0      12      12 rw---    [ stack ]
00007fff70dff000       0       4       0 r-x--    [ anon ]
ffffffffff600000       0       0       0 r-x--    [ anon ]
----------------  ------  ------  ------
total kB         8392520     464      88
rr_addr: 0x400000000, 17179869184

And details of my system:

Linux haig 2.6.35-24-generic #42-Ubuntu SMP Thu Dec 2 02:41:37 UTC 2010 x86_64 GNU/Linux
gcc version 4.4.5 (Ubuntu/Linaro 4.4.4-14ubuntu5)
GNU C Library (Ubuntu EGLIBC 2.12.1-0ubuntu10.1) stable release version 2.12.1, by Roland McGrath et al.
sarnold
  • 102,305
  • 22
  • 181
  • 238
  • Looks like it: ENOMEM No memory is available, or the process's maximum number of mappings would have been exceeded. – Maxim Egorushkin Jan 26 '11 at 11:02
  • I have added this command just before calling mmap. The output is shown in 'Update 4'. It shows 'unlimited' for both "max memory size" and "virtual memory". – ericzma Jan 26 '11 at 11:04
  • Thanks all the same for your help! I have also tried different systems with different versions of kernel/gcc/libc. Only on my laptop with Fedora 14 (system 1), this program fails. – ericzma Jan 26 '11 at 11:24
  • @Zhiqiang Ma, if @datenwolf doesn't have the right answer, I think it's time to file a bug report with Fedora :) – sarnold Jan 26 '11 at 11:30
  • caf's gives the right solution. mmap tries to reserver space for the 8GB memory while my computer don't have. Adding MAP_NORESERVE flag will avoid reserving the space. That's not a bug from Fedora, it is my code's bug. But it's nice to find it out. – ericzma Jan 26 '11 at 12:27