I have a question if this is possible using SIMD instructions at all or if there could be some kind of Hybrid solution SIMD/Normal C# code solution to what I am trying to do or if it is not possible and will sacrifice to much speed of the SIMD instructions for it to be interesting.
My code counts how many 0,1,2,3 that exists in: inputArray
(Micro Optimization of a 4-bucket histogram of a large array or list). Result is shown in: countArray
The correct answer to this is: 3,9,1,3.
My question now is this:
As seen we also have passed along: indexArray
to the function. If we take 0
from inputArray
as an example where we have 3 occurrences. We can see in the indexArray
that those occurrences are at indexes: 9,43,5
I wonder if it is possible to add/collect this information in for example 4 buckets as well for each of the possible element values (0,1,2,3) in inputArray
?
In the below code part we can see where the indexes for example: val = 1
is found: 0,9,10
I don't know if it is possible to catch those indexes somehow that could map/get
the indexes from the indexArray
?
The main goal to for example: ´0´ is to get an array in the end like this:
int[] indexes0 = new int[3]; //But we dont know that it will be 3 beforehand
indexes0[0] = 9;
indexes0[1] = 43;
indexes0[2] = 5;
[1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0]
// accumulate match counts into 4 vectors of 8-bit counters, one for each val
var v = Avx.LoadVector128(&fixedInput[0]);
for (byte val = 0; val < 4; val++)
{
//[1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0] -CompareEqual to: [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
var match = Avx.CompareEqual(v, Vector128.Create(val));
counts[val] = Avx.Subtract(counts[val], match); // count -= 0 or -1
}
(Please note: I have a Piledriver CPU that can't run AVX2)
Complete Code (simplified for an array that's only one vector-width long):
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
private void button3_Click(object sender, EventArgs e)
{
//Create 16 indexes with numbers between: 0-3. The goal is to count how many of those occurences we have for the numbers: 0-3
byte[] v1 = new byte[16]; int[] v2 = new int[16];
v1[0] = 0; v2[0] = 9;
v1[1] = 1; v2[1] = 12;
v1[2] = 2; v2[2] = 55;
v1[3] = 3; v2[3] = 23;
v1[4] = 1; v2[4] = 568;
v1[5] = 1; v2[5] = 4;
v1[6] = 1; v2[6] = 6;
v1[7] = 1; v2[7] = 1008;
v1[8] = 1; v2[8] = 188800;
v1[9] = 0; v2[9] = 43;
v1[10] = 0; v2[10] = 5;
v1[11] = 3; v2[11] = 2;
v1[12] = 1; v2[12] = 1;
v1[13] = 1; v2[13] = 58;
v1[14] = 1; v2[14] = 8;
v1[15] = 3; v2[15] = 15;
/*---------------*/
ReadOnlySpan<byte> inputArray = v1;
ReadOnlySpan<int> indexArray = v2;
//Call function
countElements(inputArray, indexArray);
}
// simplified for inputArray.Count == 16 exactly, no looping or cleanup
// shows counts, doesn't find positions at all
private unsafe static void countElements(ReadOnlySpan<byte> inputArray, ReadOnlySpan<int> indexArray)
{
int[,] indexes0123 = new int[,] { }; //Store arrays for indexes found for the numbers: 0,1,2,3 in the "indexArray"
//Below starts the SIMD calculations!
int[] countArray = new int[4];
Span<Vector128<byte>> counts = stackalloc Vector128<byte>[4];
Span<Vector128<UInt64>> sum64 = stackalloc Vector128<UInt64>[4];
unsafe
{
fixed (byte* fixedInput = inputArray)
{
var v = Avx.LoadVector128(&fixedInput[0]);
for (byte val = 0; val < 4; val++)
{
//[1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0] -CompareEqual to: [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
var match = Avx.CompareEqual(v, Vector128.Create(val));
counts[val] = Avx.Subtract(counts[val], match);
}
//Here SumAbsoluteDifferences
for (int i3 = 0; i3 < 4; i3++)
{
sum64[i3] = Sse2.Add(sum64[i3], Sse2.SumAbsoluteDifferences(counts[i3], Vector128<byte>.Zero).AsUInt64()); //sum64: <2,0,0,0,3,0,0,0>
}
//UnpackHigh and get the lower element from the Vector128<UInt64>
for (int i3 = 0; i3 < 4; i3++)
{
Vector128<UInt64> upper = Sse2.UnpackHigh(sum64[i3], sum64[i3]).AsUInt64(); //3
countArray[i3] += Sse2.Add(sum64[i3], upper).ToScalar();
}
//We now know how many 0,1,2,3 we have of each: (But we also need to collect which indexes from "indexlist" each of these 4 buckets has)
//3,9,1,3
MessageBox.Show(countArray[0] + "," + countArray[1] + "," + countArray[2] + "," + countArray[3]);
}
}
}