I'm summing a bounch of harmonics together, with different phase/magnitude each, using vectorization (only SSE2 max as SIMD).
Here's my actual try:
float output = 0.0f;
simd::float_4 freqFundamentalNormalized = freq * (1.0f / sampleRate);
simd::float_4 harmonicIndex{1.0f, 2.0f, 3.0f, 4.0f};
simd::float_4 harmonicIncrement{4.0f, 4.0f, 4.0f, 4.0f};
// harmonics
const int numHarmonicsV4 = numHarmonics / 4;
const int numHarmonicsRemainder = numHarmonics - (numHarmonicsV4 * 4);
// v4
for (int i = 0; i < numHarmonicsV4; i++) {
// signal
simd::float_4 sineOutput4 = simd::sin(mPhases4[i] * g2PIf) * mMagnitudes4[i];
for (int v = 0; v < 4; v++) {
output += sineOutput4[v];
}
// increments
mPhases4[i] += harmonicIndex * freqFundamentalNormalized;
mPhases4[i] -= simd::floor(mPhases4[i]);
harmonicIndex += harmonicIncrement;
}
// remainder
if (numHarmonicsRemainder > 0) {
// signal
simd::float_4 sineOutput4 = simd::sin(mPhases4[numHarmonicsV4] * g2PIf) * mMagnitudes4[numHarmonicsV4];
for (int v = 0; v < numHarmonicsRemainder; v++) {
output += sineOutput4[v];
}
// increments
mPhases4[numHarmonicsV4] += harmonicIndex * freqFundamentalNormalized;
mPhases4[numHarmonicsV4] -= simd::floor(mPhases4[numHarmonicsV4]);
}
but:
- I think I can optimize it more, maybe with some math tricks, or saving in some increments
- I don't like to repeat the "same code" once for
V4
, once forremainder
(if the num of harmonics are not % 4): is there a way to put a sort of "mask" to the last V4 placing (for example) magnitudes at 0? (so it do the same operation in the same block, but won't sum to the final output).