I am using the target_clones GCC attribute for run-time optimized SIMD versions of several functions, some declared static and others used by other objects in the same static library. The latter have declarations, with the target_clones attribute, in a header file. All of the objects build fine and are assembled into a static archive with ar. The final application linking stage fails though when including the static archive, with undefined reference errors to the versioned symbols for the library public functions.
UPDATE: Added a gcc bug report, in case that is what this is: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91664
I created a test application which illustrates this problem here https://github.com/elementgreen/fmv-test It has 3 make targets. The first one "make works" just compiles the test app all in one step from .c files. The second "make also_works" compiles each .c file into object .o files and then links those with gcc -o. The third target "make borken" does not work and illustrates the problem. Each .c file is compiled into .o object files, then a static archive is created with ar, then gcc is used to link the static archive into the final application.
Here are the contents of the test application I put on github:
main.c:
#include <stdlib.h>
#include <string.h>
#include "fmv-test.h"
#define ARRAY_SIZE 100000
int
main (int argc, char **argv)
{
double *dArray;
dArray = malloc (ARRAY_SIZE * sizeof (double));
memset (dArray, 0, ARRAY_SIZE * sizeof (double));
fmv_test (dArray, ARRAY_SIZE);
return 0;
}
fmv-test.h:
#ifndef __FMV_TEST_H__
#define __FMV_TEST_H__
#define SIMD_CLONE __attribute__ ((__target_clones__ ("avx2","avx","sse4.1","sse2","default")))
double fmv_test (double *dArray, int size) SIMD_CLONE;
#endif
fmv-test.c:
#include "fmv-test.h"
static void internal_func (double *dArray, int size) SIMD_CLONE;
double
fmv_test (double *dArray, int size)
{
double result;
int i;
internal_func (dArray, size);
for (i = 0; i < size; i++)
result += dArray[i];
return result;
}
static void
internal_func (double *dArray, int size)
{
int i;
for (i = 0; i < size; i++)
dArray[i] += 1.0;
}
Makefile:
works:
@echo "This works"
gcc -o fmv-test fmv-test.c main.c
also_works:
@echo "This also works"
gcc -c fmv-test.c
gcc -c main.c
gcc -o fmv-test fmv-test.o main.o
borken:
@echo "This doesn't work"
gcc -c fmv-test.c
gcc -c main.c
ar cr fmv-test.a fmv-test.o main.o
gcc -o fmv-test-borken fmv-test.a
Linking a static archive with gcc function multi-versioning should work. Instead it fails with the following errors:
/usr/bin/ld: fmv-test.a(main.o): in function `fmv_test':
main.c:(.text.fmv_test.resolver[fmv_test.resolver]+0x1f): undefined reference to `fmv_test.avx2.0'
/usr/bin/ld: main.c:(.text.fmv_test.resolver[fmv_test.resolver]+0x3b): undefined reference to `fmv_test.avx.1'
/usr/bin/ld: main.c:(.text.fmv_test.resolver[fmv_test.resolver]+0x57): undefined reference to `fmv_test.sse4_1.2'
/usr/bin/ld: main.c:(.text.fmv_test.resolver[fmv_test.resolver]+0x71): undefined reference to `fmv_test.sse2.3'
/usr/bin/ld: main.c:(.text.fmv_test.resolver[fmv_test.resolver]+0x7a): undefined reference to `fmv_test.default.4'
collect2: error: ld returned 1 exit status
It seems the object files end up with different numbered endings on the various symbols, but these still manage to link up with the "also_works" target, so I don't think that is the problem (see below output from objdump). Is this a bug in gcc? I'm using version 8.3.0 on Ubuntu 19.04.
objdump -t fmv-test.o | grep fmv_test
0000000000000000 l F .text 0000000000000062 fmv_test.default.9
0000000000000239 l F .text 0000000000000062 fmv_test.avx2.4
000000000000029b l F .text 0000000000000062 fmv_test.avx.5
00000000000002fd l F .text 0000000000000062 fmv_test.sse4_1.6
000000000000035f l F .text 0000000000000062 fmv_test.sse2.7
0000000000000000 l d .text.fmv_test.resolver 0000000000000000 .text.fmv_test.resolver
00000000000003c1 g i .text 0000000000000080 internal_func._GLOBAL___fmv_test.ifunc
0000000000000000 w F .text.fmv_test.resolver 0000000000000080 fmv_test.resolver
0000000000000000 g i .text.fmv_test.resolver 0000000000000080 fmv_test
objdump -t main.o | grep fmv_test
0000000000000000 l d .text.fmv_test.resolver 0000000000000000 .text.fmv_test.resolver
0000000000000000 g i .text.fmv_test.resolver 0000000000000080 fmv_test
0000000000000000 w F .text.fmv_test.resolver 0000000000000080 fmv_test.resolver
0000000000000000 *UND* 0000000000000000 fmv_test.avx2.0
0000000000000000 *UND* 0000000000000000 fmv_test.avx.1
0000000000000000 *UND* 0000000000000000 fmv_test.sse4_1.2
0000000000000000 *UND* 0000000000000000 fmv_test.sse2.3
0000000000000000 *UND* 0000000000000000 fmv_test.default.4