Where can I find a reference for the AMD FMA 4 intrinsics?

Question

I am trying to modify a piece of code that uses SSE (128bit) calls to use the 256bit FMA feature on the Bulldozer Opteron. I cant seem to find the intrinsics for these calls.

Some questions on this forum have used these intrinsics (ex: How to find the horizontal maximum in a 256-bit AVX vector )

I found this: http://msdn.microsoft.com/en-us/library/gg445140.aspx
and http://software.intel.com/sites/products/documentation/studio/composer/en-us/2011/compiler_c/index.htm#intref_cls/common/intref_avx_fmadd_ps.htm

But I cant seem to find anything on AMD developer docs.

The other question did not reference FMA instructions, only the AVX instruction set. Intel's AVX instruction set does not support FMA. — Jason R, Apr 05 '12 at 18:13
Yes, I understand that. I was just giving an example of what I could find. — powerrox, Apr 05 '12 at 21:26

score 3 · Accepted Answer · answered Apr 19 '12 at 11:52

You find the intrinsics in the file fma4intrin.h. Here are the 256 bit instructions from this file, some function attributes stripped. The __buitin* functions emit the FMA instruction which is part of their name. So if you want to find a intrinsic function name, you need to lookup the correct __builtin_instructionname after the return and use the surrounding function wrapper.

/* 256b Floating point multiply/add type instructions.  */
_mm256_macc_ps (__m256 __A, __m256 __B, __m256 __C)
{
  return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
}

_mm256_macc_pd (__m256d __A, __m256d __B, __m256d __C)
{
  return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
}

_mm256_msub_ps (__m256 __A, __m256 __B, __m256 __C)
{
  return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}

_mm256_msub_pd (__m256d __A, __m256d __B, __m256d __C)
{
  return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
}

_mm256_nmacc_ps (__m256 __A, __m256 __B, __m256 __C)
{
  return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
}

_mm256_nmacc_pd (__m256d __A, __m256d __B, __m256d __C)
{
  return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C);
}

_mm256_nmsub_ps (__m256 __A, __m256 __B, __m256 __C)
{
  return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}

_mm256_nmsub_pd (__m256d __A, __m256d __B, __m256d __C)
{
  return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
}

_mm256_maddsub_ps (__m256 __A, __m256 __B, __m256 __C)
{
  return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
}

_mm256_maddsub_pd (__m256d __A, __m256d __B, __m256d __C)
{
  return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
}

_mm256_msubadd_ps (__m256 __A, __m256 __B, __m256 __C)
{
  return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}

_mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C)
{
  return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
}

Yes, I already found the header file and have been using that and the MSDN link I provided above for reference. But I cannot seem to find any GCC (or AMD) documentation for these intrinsics. I also found the "AMD64 Architecture Programmer's Manual Volume 4: 128-bit and 256 bit media instructions" but thats not relevant to my question. — powerrox, May 02 '12 at 19:10

score 1 · Answer 2 · edited Oct 24 '14 at 10:23

1

You probably need this document http://support.amd.com/TechDocs/43479.pdf. It contains all XOP and FMA4 intrinsics

edited Oct 24 '14 at 10:23

Marc Glisse

7,550
2
30
53

answered Aug 08 '12 at 11:40

captain

815
14
26

It describes the instructions, but I can't see documentation of the intrinsics. – Marc Glisse Oct 24 '14 at 10:25

score 0 · Answer 3 · answered Oct 24 '14 at 09:38

0

Just for completion - for the Microsoft version of the header above, use:

#include <immintrin.h>

answered Oct 24 '14 at 09:38

fies

53
7

Where can I find a reference for the AMD FMA 4 intrinsics?

3 Answers3