I am exploring how different implementations of simple loops in C99 auto-vectorize based upon the function signature.
Here is my code:
/* #define PRAGMA_SIMD _Pragma("simd") */
#define PRAGMA_SIMD
#ifdef __INTEL_COMPILER
#define ASSUME_ALIGNED(a) __assume_aligned(a,64)
#else
#define ASSUME_ALIGNED(a)
#endif
#ifndef ARRAY_RESTRICT
#define ARRAY_RESTRICT
#endif
void foo1(double * restrict a, const double * restrict b, const double * restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i) {
if (c[i] > 0) {
a[i] = b[i];
} else {
a[i] = 0.0;
}
}
}
void foo2(double * restrict a, const double * restrict b, const double * restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i) {
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
/* Undetermined size version */
void foo3(int n, double * restrict a, const double * restrict b, const double * restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i) {
if (c[i] > 0) {
a[i] = b[i];
} else {
a[i] = 0.0;
}
}
}
void foo4(int n, double * restrict a, const double * restrict b, const double * restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i) {
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
/* Static array versions */
void foo5(double ARRAY_RESTRICT a[2048], const double ARRAY_RESTRICT b[2048], const double ARRAY_RESTRICT c[2048])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i) {
if (c[i] > 0) {
a[i] = b[i];
} else {
a[i] = 0.0;
}
}
}
void foo6(double ARRAY_RESTRICT a[2048], const double ARRAY_RESTRICT b[2048], const double ARRAY_RESTRICT c[2048])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i) {
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
/* VLA versions */
void foo7(int n, double ARRAY_RESTRICT a[n], const double ARRAY_RESTRICT b[n], const double ARRAY_RESTRICT c[n])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i) {
if (c[i] > 0) {
a[i] = b[i];
} else {
a[i] = 0.0;
}
}
}
void foo8(int n, double ARRAY_RESTRICT a[n], const double ARRAY_RESTRICT b[n], const double ARRAY_RESTRICT c[n])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i) {
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
When I compile with
$ icc -O3 -std=c99 -opt-report5 -mavx -S foo.c
icc: remark #10397: optimization reports are generated in *.optrpt files in the output location
I see that the VLA cases are not auto-vectorized, but when I add the flag to assert no aliasing -fno-alias
, they are. Thus, I conclude that I should prescribe this in the source, so I attempt to do that by compiling with
$ icc -O3 -std=c99 -opt-report5 -mavx -DARRAY_RESTRICT=restrict -S foo.c
icc: remark #10397: optimization reports are generated in *.optrpt files in the output location
The compiler error output includes
foo.c(98): error: "restrict" is not allowed
void foo7(int n, double ARRAY_RESTRICT a[n], const double ARRAY_RESTRICT b[n],
const double ARRAY_RESTRICT c[n])
^
but as you can see, restrict is not allowed on my VLA arguments.
So my question is: is there no way to assert no aliasing of VLA in ISO C?
Note that I can assert no aliasing in the source code using pragmas - e.g. simd
, omp simd
, ivdep
etc. - and get the auto-vectorization that I want but these aren't ISO C.
In this context, ISO C means the most recent version of C, which of course is C11 as of the writing of this post.