How to convert 16-bit unsigned short to 8-bit unsigned char using scaling efficiently?

Question

I'm trying to convert 16 bit unsigned short data to 8 bit unsigned char using some scaling function. Currently I'm doing this by converting into float and scale down and then saturate into 8 bit. Is there any more efficient way to do this?

int _tmain(int argc, _TCHAR* argv[])
{
    float Scale=255.0/65535.0;

    USHORT sArr[8]={512,1024,2048,4096,8192,16384,32768,65535};
    BYTE bArr[8],bArrSSE[8];        

    //Desired Conventional Method
    for (int i = 0; i < 8; i++)
    {
        bArr[i]=(BYTE)(sArr[i]*Scale);                  
    }

    __m128  vf_scale = _mm_set1_ps(Scale),
            vf_Round = _mm_set1_ps(0.5),                      
            vf_zero = _mm_setzero_ps();         
    __m128i vi_zero = _mm_setzero_si128();

    __m128i vi_src = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&sArr[0]));

    __m128 vf_Src_Lo=_mm_cvtepi32_ps(_mm_unpacklo_epi16(vi_src, _mm_set1_epi16(0)));    
    __m128 vf_Src_Hi=_mm_cvtepi32_ps(_mm_unpackhi_epi16(vi_src, _mm_set1_epi16(0)));    

    __m128 vf_Mul_Lo=_mm_sub_ps(_mm_mul_ps(vf_Src_Lo,vf_scale),vf_Round);   
    __m128 vf_Mul_Hi=_mm_sub_ps(_mm_mul_ps(vf_Src_Hi,vf_scale),vf_Round);   

    __m128i v_dst_i = _mm_packus_epi16(_mm_packs_epi32(_mm_cvtps_epi32(vf_Mul_Lo), _mm_cvtps_epi32(vf_Mul_Hi)), vi_zero);
    _mm_storel_epi64((__m128i *)(&bArrSSE[0]), v_dst_i);

    for (int i = 0; i < 8; i++)
    {       
        printf("ushort[%d]= %d     * %f = %.3f ,\tuChar[%d]= %d,\t SSE uChar[%d]= %d \n",i,sArr[i],Scale,(float)(sArr[i]*Scale),i,bArr[i],i,bArrSSE[i]);
    }

    return 0;
}

Pleas note tha the scaling factor may need to be set to other values, e.g. 255.0/512.0, 255.0/1024.0 or 255.0/2048.0, so any solution should not be hard-coded for 255.0/65535.0.

Ignore the signedness in the debug view, it doesn't affect anything. The values are correct, just reinterpret the bits in your mind (or print them). — harold, Jan 20 '17 at 07:42
I see you deleted your answer - you really don't need to do this - although it's somewhat inefficient it works and it serves as a good starting point for other answers to improve upon. Please consider undeleting it. (I would write an answer myself, probably using `_mm_mulhi_epu16`, but I'm pretty busy today - maybe I'll put something together over the weekend.) — Paul R, Jan 20 '17 at 11:20

score 2 · Answer 1 · answered Jan 20 '17 at 11:23

If ratio in your code is fixed, you can perform the scale with the following algorithm

Shift the high byte of each word into the lower one.
E.g. 0x200 -> 0x2, 0xff80 -> 0xff
Add an offset of -1 if the low byte was less than 0x80.
E.g. 0x200 -> Offset -1, 0xff80 -> Offset 0

The first part is easily achieved with _mm_srli_epi16

The second one is trickier but it basically consists in taking the bit7 (the higher bit of the lower byte) of each word, replicating it all over the word and then negating it.

I used another approach: I created a vector of words valued -1 by comparing a vector with itself for equality.
Then I isolated the bit7 of each source word and add it to the -1 words.

#include <stdio.h>
#include <emmintrin.h>

int main(int argc, char* argv[])
{
    float Scale=255.0/65535.0;

    unsigned short sArr[8]={512,1024,2048,4096,8192,16384,32768,65535};
    unsigned char bArr[8], bArrSSE[16];        

    //Desired Conventional Method
    for (int i = 0; i < 8; i++)
    {
        bArr[i]=(unsigned char)(sArr[i]*Scale);                  
    }



    //Values to be converted
    __m128i vi_src = _mm_loadu_si128((__m128i const*)sArr);

    //This computes 8 words (16-bit) that are
    // -1 if the low byte of relative word in vi_src is less than 0x80
    // 0  if the low byte of relative word in vi_src is >= than 0x80

    __m128i vi_off = _mm_cmpeq_epi8(vi_src, vi_src);   //Set all words to -1
    //Add the bit15 of each word in vi_src to each -1 word
    vi_off 
    = _mm_add_epi16(vi_off, _mm_srli_epi16(_mm_slli_epi16(vi_src, 8), 15));

    //Shift vi_src word right by 8 (move hight byte into low byte)
    vi_src = _mm_srli_epi16 (vi_src, 8);  
    //Add the offsets
    vi_src = _mm_add_epi16(vi_src, vi_off); 
    //Pack the words into bytes
    vi_src = _mm_packus_epi16(vi_src, vi_src);

    _mm_storeu_si128((__m128i *)bArrSSE, vi_src);

    for (int i = 0; i < 8; i++)
    {       
        printf("%02x %02x\n",   bArr[i],bArrSSE[i]);
    }

    return 0;
}

Paul R · Accepted Answer · 2017-01-20T14:37:10.613

Here is an implementation and test harness using _mm_mulhi_epu16 to perform a fixed point scaling operation.

scale_ref is your original scalar code, scale_1 is the floating point SSE implementation from your (currently deleted) answer, and scale_2 is my fixed point implementation.

I've factored out the various implementations into separate functions and also added a size parameter and a loop, so that they can be used for any size array (although currently n must be a multiple of 8 for the SSE implementations).

There is a compile-time flag, ROUND, which controls whether the fixed point implementation truncates (like your scalar code) or rounds (to nearest). Truncation is slightly faster.

Also note that scale is a run-time parameter, currently hard-coded to 255 (equivalent to 255.0/65535.0) in the test harness below, but it can be any reasonable value.

#include <stdio.h>
#include <stdint.h>
#include <limits.h>
#include <xmmintrin.h>

#define ROUND 1     // use rounding rather than truncation

typedef uint16_t USHORT;
typedef uint8_t BYTE;

static void scale_ref(const USHORT *src, BYTE *dest, const USHORT scale, const size_t n)
{
    const float kScale = (float)scale / (float)USHRT_MAX;

    for (size_t i = 0; i < n; i++)
    {
        dest[i] = src[i] * kScale;
    }
}

static void scale_1(const USHORT *src, BYTE *dest, const USHORT scale, const size_t n)
{
    const float kScale = (float)scale / (float)USHRT_MAX;

    __m128 vf_Scale = _mm_set1_ps(kScale);
    __m128 vf_Round = _mm_set1_ps(0.5f);

    __m128i vi_zero = _mm_setzero_si128();

    for (size_t i = 0; i < n; i += 8)
    {
        __m128i vi_src = _mm_loadu_si128((__m128i *)&src[i]);

        __m128 vf_Src_Lo = _mm_cvtepi32_ps(_mm_unpacklo_epi16(vi_src, _mm_set1_epi16(0)));
        __m128 vf_Src_Hi = _mm_cvtepi32_ps(_mm_unpackhi_epi16(vi_src, _mm_set1_epi16(0)));
        __m128 vf_Mul_Lo = _mm_mul_ps(vf_Src_Lo, vf_Scale);
        __m128 vf_Mul_Hi = _mm_mul_ps(vf_Src_Hi, vf_Scale);

        //Convert -ive to +ive Value
        vf_Mul_Lo = _mm_max_ps(_mm_sub_ps(vf_Round, vf_Mul_Lo), vf_Mul_Lo);
        vf_Mul_Hi = _mm_max_ps(_mm_sub_ps(vf_Round, vf_Mul_Hi), vf_Mul_Hi);

        __m128i v_dst_i = _mm_packus_epi16(_mm_packs_epi32(_mm_cvtps_epi32(vf_Mul_Lo), _mm_cvtps_epi32(vf_Mul_Hi)), vi_zero);
        _mm_storel_epi64((__m128i *)&dest[i], v_dst_i);
    }
}

static void scale_2(const USHORT *src, BYTE *dest, const USHORT scale, const size_t n)
{
    const __m128i vk_scale = _mm_set1_epi16(scale);
#if ROUND
    const __m128i vk_round = _mm_set1_epi16(scale / 2);
#endif

    for (size_t i = 0; i < n; i += 8)
    {
        __m128i v = _mm_loadu_si128((__m128i *)&src[i]);
#if ROUND
        v = _mm_adds_epu16(v, vk_round);
#endif
        v = _mm_mulhi_epu16(v, vk_scale);
        v = _mm_packus_epi16(v, v);
        _mm_storel_epi64((__m128i *)&dest[i], v);
    }
}

int main(int argc, char* argv[])
{
    const size_t n = 8;
    const USHORT scale = 255;

    USHORT src[n] = { 512, 1024, 2048, 4096, 8192, 16384, 32768, 65535 };
    BYTE dest_ref[n], dest_1[n], dest_2[n];

    scale_ref(src, dest_ref, scale, n);
    scale_1(src, dest_1, scale, n);
    scale_2(src, dest_2, scale, n);

    for (size_t i = 0; i < n; i++)
    {
        printf("src = %u, ref = %u, test_1 = %u, test_2 = %u\n", src[i], dest_ref[i], dest_1[i], dest_2[i]);
    }

    return 0;
}

Thank you for your answer! Could you please tell me How to convert the floating point scale value to int? i.e How 255.0/65535.0 = 255? — Balaji R, Jan 23 '17 at 09:37
@BalajiR: there is an implicit division by 65536 because we are using "multiply high", so the scaling factor just becomes 255. (The difference between `255.0/65535.0` and `255.0/65536.0` is not significant, given that your result data only has 8 bits of precision.) — Paul R, Jan 23 '17 at 11:01
Yes i understand that! But what if the Denominator is not constant? i.e 255.0/ 512.0, 255.0/ 1024.0, 255.0/ 2048.0? So how can i convert that to the Scale factor? I'm sorry if i didn't explained properly, My Numerator is constant, But the denominator only changes! — Balaji R, Jan 23 '17 at 11:16
OK - you didn't really make this requirement clear in the question, so for example @MargaretBloom's answer does not work for your newly-added use case. Anyway, for the above code, it's just simple arithmetic to generate the required scale factor, e.g. for your example of `255.0/512.0` it would be `255.0/512.0*65536 = 32640`. — Paul R, Jan 23 '17 at 11:25
Yes I'm sorry it was not clear in the question! Thank you for clarifying this! I'll leave the question as it is! — Balaji R, Jan 23 '17 at 11:39
OK - I've edited the question to make it a little clearer now. — Paul R, Jan 23 '17 at 12:23

score 0 · Answer 3 · edited May 23 '17 at 12:08

Ok found the solution with reference to this.

Here is my Solution:

int _tmain(int argc, _TCHAR* argv[])
{
    float Scale=255.0/65535.0;

    USHORT sArr[8]={512,1024,2048,4096,8192,16384,32768,65535};
    BYTE bArr[8],bArrSSE[8];        

    //Desired Conventional Method
    for (int i = 0; i < 8; i++)
    {
        bArr[i]=(BYTE)(sArr[i]*Scale);                  
    }

    __m128  vf_scale = _mm_set1_ps(Scale),                      
            vf_zero = _mm_setzero_ps();         
    __m128i vi_zero = _mm_setzero_si128();

    __m128i vi_src = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&sArr[0]));

    __m128 vf_Src_Lo=_mm_cvtepi32_ps(_mm_unpacklo_epi16(vi_src, _mm_set1_epi16(0)));    
    __m128 vf_Src_Hi=_mm_cvtepi32_ps(_mm_unpackhi_epi16(vi_src, _mm_set1_epi16(0)));    
    __m128 vf_Mul_Lo=_mm_mul_ps(vf_Src_Lo,vf_scale);    
    __m128 vf_Mul_Hi=_mm_mul_ps(vf_Src_Hi,vf_scale);

    //Convert -ive to +ive Value
    vf_Mul_Lo=_mm_max_ps(_mm_sub_ps(vf_zero, vf_Mul_Lo), vf_Mul_Lo);
    vf_Mul_Hi=_mm_max_ps(_mm_sub_ps(vf_zero, vf_Mul_Hi), vf_Mul_Hi);

    __m128i v_dst_i = _mm_packus_epi16(_mm_packs_epi32(_mm_cvtps_epi32(vf_Mul_Lo), _mm_cvtps_epi32(vf_Mul_Hi)), vi_zero);
    _mm_storel_epi64((__m128i *)(&bArrSSE[0]), v_dst_i);

    for (int i = 0; i < 8; i++)
    {       
        printf("ushort[%d]= %d     * %f = %.3f ,\tuChar[%d]= %d,\t SSE uChar[%d]= %d \n",i,sArr[i],Scale,(float)(sArr[i]*Scale),i,bArr[i],i,bArrSSE[i]);
    }

    return 0;
}

This is way more complicated and inefficient than it needs to be - you can just use fixed point arithmetic for scaling, without converting to/from floating point. — Paul R, Jan 20 '17 at 08:46
@Paul R Thank you for your comment! Could you please point me to some example of that method! — Balaji R, Jan 20 '17 at 08:56

How to convert 16-bit unsigned short to 8-bit unsigned char using scaling efficiently?

3 Answers3