Some practical advices:
- VLAs are in practice located on the space-limited stack, while
malloc()
and its friends allocates on the heap, that is likely to allow bigger allocations. Moreveover you have more control on that process, as malloc()
could return NULL
if it fails. In other words you have to be careful with VLA not-to-blow your stack in runtine.
- Not all compilers support VLA, e.g. Visual Studio. Moreover C11 marked them as optional feature and allows not to support them when
__STDC_NO_VLA__
macro is defined.
From my experience (numerical programs like finding prime numbers with trial division, Miller-Rabin etc.) I wouldn't say that VLAs are any faster than malloc()
. There is some overhead of malloc()
call of course, but what seems to be more important is data access efficiency.
Here is some quick & dirty comparison using GNU/Linux x86-64 and GCC compiler. Note that results may vary from platform to another or even compiler's version. You might use as some basic (though very far of being complete) data-access malloc()
vs VLA benchmark.
prime-trial-gen.c
:
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
bool isprime(int n);
int main(void)
{
FILE *fp = fopen("primes.txt", "w");
assert(fp);
fprintf(fp, "%d\n", 2);
for (int i = 3; i < 10000; i += 2)
if (isprime(i))
fprintf(fp, "%d\n", i);
fclose(fp);
return 0;
}
bool isprime(int n)
{
if (n % 2 == 0)
return false;
for (int i = 3; i * i <= n; i += 2)
if (n % i == 0)
return false;
return true;
}
Compile & run:
$ gcc -std=c99 -pedantic -Wall -W prime-trial-gen.c
$ ./a.out
Then here is second program, that take use of generated "primes dictionary":
prime-trial-test.c
:
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
bool isprime(int n, int pre_prime[], int num_pre_primes);
int get_num_lines(FILE *fp);
int main(void)
{
FILE *fp = fopen("primes.txt", "r");
assert(fp);
int num_lines = get_num_lines(fp);
rewind(fp);
#if WANT_VLA
int pre_prime[num_lines];
#else
int *pre_prime = malloc(num_lines * sizeof *pre_prime);
assert(pre_prime);
#endif
for (int i = 0; i < num_lines; i++)
assert(fscanf(fp, "%d", pre_prime + i));
fclose(fp);
/* NOTE: primes.txt holds primes <= 10 000 (10**4), thus we are safe upto 10**8 */
int num_primes = 1; // 2
for (int i = 3; i < 10 * 1000 * 1000; i += 2)
if (isprime(i, pre_prime, num_lines))
++num_primes;
printf("pi(10 000 000) = %d\n", num_primes);
#if !WANT_VLA
free(pre_prime);
#endif
return 0;
}
bool isprime(int n, int pre_prime[], int num_pre_primes)
{
for (int i = 0; i < num_pre_primes && pre_prime[i] * pre_prime[i] <= n; ++i)
if (n % pre_prime[i] == 0)
return false;
return true;
}
int get_num_lines(FILE *fp)
{
int ch, c = 0;
while ((ch = fgetc(fp)) != EOF)
if (ch == '\n')
++c;
return c;
}
Compile & run (malloc version):
$ gcc -O2 -std=c99 -pedantic -Wall -W prime-trial-test.c
$ time ./a.out
pi(10 000 000) = 664579
real 0m1.930s
user 0m1.903s
sys 0m0.013s
Compile & run (VLA version):
$ gcc -DWANT_VLA=1 -O2 -std=c99 -pedantic -Wall -W prime-trial-test.c
ime ./a.out
pi(10 000 000) = 664579
real 0m1.929s
user 0m1.907s
sys 0m0.007s
As you might check π(10**7)
is indeed 664,579
. Notice that both execution times are almost the same.