I'm trying to convert 16 bit unsigned short
data to 8 bit unsigned char
using some scaling function. Currently I'm doing this by converting into float and scale down and then saturate into 8 bit. Is there any more efficient way to do this?
int _tmain(int argc, _TCHAR* argv[])
{
float Scale=255.0/65535.0;
USHORT sArr[8]={512,1024,2048,4096,8192,16384,32768,65535};
BYTE bArr[8],bArrSSE[8];
//Desired Conventional Method
for (int i = 0; i < 8; i++)
{
bArr[i]=(BYTE)(sArr[i]*Scale);
}
__m128 vf_scale = _mm_set1_ps(Scale),
vf_Round = _mm_set1_ps(0.5),
vf_zero = _mm_setzero_ps();
__m128i vi_zero = _mm_setzero_si128();
__m128i vi_src = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&sArr[0]));
__m128 vf_Src_Lo=_mm_cvtepi32_ps(_mm_unpacklo_epi16(vi_src, _mm_set1_epi16(0)));
__m128 vf_Src_Hi=_mm_cvtepi32_ps(_mm_unpackhi_epi16(vi_src, _mm_set1_epi16(0)));
__m128 vf_Mul_Lo=_mm_sub_ps(_mm_mul_ps(vf_Src_Lo,vf_scale),vf_Round);
__m128 vf_Mul_Hi=_mm_sub_ps(_mm_mul_ps(vf_Src_Hi,vf_scale),vf_Round);
__m128i v_dst_i = _mm_packus_epi16(_mm_packs_epi32(_mm_cvtps_epi32(vf_Mul_Lo), _mm_cvtps_epi32(vf_Mul_Hi)), vi_zero);
_mm_storel_epi64((__m128i *)(&bArrSSE[0]), v_dst_i);
for (int i = 0; i < 8; i++)
{
printf("ushort[%d]= %d * %f = %.3f ,\tuChar[%d]= %d,\t SSE uChar[%d]= %d \n",i,sArr[i],Scale,(float)(sArr[i]*Scale),i,bArr[i],i,bArrSSE[i]);
}
return 0;
}
Pleas note tha the scaling factor may need to be set to other values, e.g. 255.0/512.0
, 255.0/1024.0
or 255.0/2048.0
, so any solution should not be hard-coded for 255.0/65535.0
.