0

I am trying to reinterpret cast between __m256i to __m256 by using the casting intrinsics, however, I observe that the underlying value (bits) after casting change, which I did not expect. This is the code snippet:

    #include <iostream>
    #include <xmmintrin.h>
    #include <immintrin.h>
    #include <x86intrin.h>
    #include <inttypes.h>
    #include <iomanip>
    #include <bit>
    
    using namespace std;
    
    int main()
    {
        // uint32 value representing the number 0.5430 in float
        // hexadecimal value is 0x3f0b0000

        __m256i input_uint32 = _mm256_set_epi32(
            1057685504U, 1057685504U, 1057685504U, 1057685504U,
            1057685504U, 1057685504U, 1057685504U, 1057685504U);
        
        __m128i y1_128 = _mm256_extracti128_si256(input_uint32, 0);
        uint32_t u1 = _mm_extract_ps(y1_128, 0);

        // Prints 3f0b0000
        cout << hex << u1 << endl;              
    
        // Expression below seems to be doing casting and not reinterpret casting
        __m256 input_float32 = _mm256_castsi256_ps(input_uint32);
    
        __m128 t1_128 = _mm256_extractf32x4_ps(input_float32, 0);

        float f1 = _mm_extract_ps(t1_128, 0);
        uint32_t f1_as_uint32 = reinterpret_cast<uint32_t &>(f1);

        // Prints 4e7c2c00 (instead of 3f0b0000)
        cout << hex << f1_as_uint32 << endl;
        // Prints 1.05769e+09 (it does cast instead of reinterpret cast)
        cout << f1 << endl;

        // By doing normal reinterpret casting
        uint32_t data_uint32 = 1057685504U;
        float expected_result = reinterpret_cast<float &>(data_uint32);
        // Prints 0.5430
        cout << expected_result << endl;
    
        return 0;
    }

My understanding is that the instruction '_mm256_castsi256_ps' would act as reinterpret_cast, but it seems to be doing casting instead. Am I perhaps missing something? is there another expression for reinterpret_cast?

I would appreciate any information you can provide on this issue.

Paul R
  • 208,748
  • 37
  • 389
  • 560
  • 2
    Note that [`_mm_extract_ps` confusingly returns an `int`](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_ps&ig_expand=3019). You then implicitly cast this int result to a float here: `float f1 = _mm_extract_ps(t1_128, 0)`. – Paul R Jun 10 '22 at 14:10
  • See also: https://stackoverflow.com/q/5526658/253056 – Paul R Jun 10 '22 at 14:13
  • 1
    What compiler/options are you using? `_mm256_extractf32x4_ps` is AVX512 (though your compiler may optimize it away) and `_mm_extract_ps` needs a `__m128` as first parameter. – chtz Jun 10 '22 at 14:13
  • 1
    Although this seems not to be the problem here, `reinterpret_cast` is generally unsafe. If possible, use `std::bitcast` alternatively `std::memcpy`. – chtz Jun 10 '22 at 14:15
  • 1
    If you single-step with a debugger and look at variable values, you'll see that `input_float32` and `t1_128` do indeed have the same bit-patterns as `input_uint32`. But that as Paul pointed out, `float f1 = _mm_extract_ps()` does implicit int->float conversion on that integer bit-pattern, like you told it to. Every function has a return type, so you were going to get implicit conversion in `uint32_t u1 = foo()` or `float f1 = foo()`. Intrinsics aren't magic; they obey normal C++ rules (except strict aliasing). – Peter Cordes Jun 10 '22 at 20:20

0 Answers0