C++ Code
void Mesh::RecalculateNormals()
{
for (int i = 0; i < indexCount; i += 3)
{
const int ia = indices[i];
const int ib = indices[i + 1];
const int ic = indices[i + 2];
const glm::vec3 e1 = glm::vec3(vert[ia].position) - glm::vec3(vert[ib].position);
const glm::vec3 e2 = glm::vec3(vert[ic].position) - glm::vec3(vert[ib].position);
const glm::vec3 no = cross(e1, e2);
vert[ia].normal += glm::vec4(no, 0.0);
vert[ib].normal += glm::vec4(no, 0.0);
vert[ic].normal += glm::vec4(no, 0.0);
}
for (int i = 0; i < vertexCount; i++)
vert[i].normal = glm::vec4(glm::normalize(glm::vec3(vert[i].normal)), 0.0f);
}
New Code with SIMD
// From : https://geometrian.com/programming/tutorials/cross-product/index.php
[[nodiscard]] inline static __m128 cross_product(__m128 const& vec0, __m128 const& vec1) {
__m128 tmp0 = _mm_shuffle_ps(vec0, vec0, _MM_SHUFFLE(3, 0, 2, 1));
__m128 tmp1 = _mm_shuffle_ps(vec1, vec1, _MM_SHUFFLE(3, 1, 0, 2));
__m128 tmp2 = _mm_mul_ps(tmp0, vec1);
__m128 tmp3 = _mm_mul_ps(tmp0, tmp1);
__m128 tmp4 = _mm_shuffle_ps(tmp2, tmp2, _MM_SHUFFLE(3, 0, 2, 1));
return _mm_sub_ps(tmp3, tmp4);
}
void normalize(const glm::vec4& lpInput, glm::vec4& lpOutput) {
const __m128& vInput = reinterpret_cast<const __m128&>(lpInput); // load input vector (x, y, z, a)
__m128 vSquared = _mm_mul_ps(vInput, vInput); // square the input values
__m128 vHalfSum = _mm_hadd_ps(vSquared, vSquared);
__m128 vSum = _mm_hadd_ps(vHalfSum, vHalfSum); // compute the sum of values
float fInvSqrt; _mm_store_ss(&fInvSqrt, _mm_rsqrt_ss(vSum)); // compute the inverse sqrt
__m128 vNormalized = _mm_mul_ps(vInput, _mm_set1_ps(fInvSqrt)); // normalize the input vector
lpOutput = reinterpret_cast<const glm::vec4&>(vNormalized); // store normalized vector (x, y, z, a)
}
void Mesh::RecalculateNormals()
{
float result[4];
glm::vec4 tmp;
for (int i = 0; i < indexCount; i += 3)
{
const int ia = indices[i];
const int ib = indices[i + 1];
const int ic = indices[i + 2];
__m128 iav = _mm_set_ps(vert[ia].position.x, vert[ia].position.y, vert[ia].position.z, 0.0f);
__m128 ibv = _mm_set_ps(vert[ib].position.x, vert[ib].position.y, vert[ib].position.z, 0.0f);
__m128 icv = _mm_set_ps(vert[ic].position.x, vert[ic].position.y, vert[ic].position.z, 0.0f);
__m128 e1i = _mm_sub_ps(iav, ibv);
__m128 e2i = _mm_sub_ps(icv, ibv);
//const glm::vec3 e1 = glm::vec3(vert[ia].position) - glm::vec3(vert[ib].position);
//const glm::vec3 e2 = glm::vec3(vert[ic].position) - glm::vec3(vert[ib].position);
//const glm::vec3 no = cross(e1, e2);
__m128 no = cross_product(e1i, e2i);
//vert[ia].normal += glm::vec4(no, 0.0);
//vert[ib].normal += glm::vec4(no, 0.0);
//vert[ic].normal += glm::vec4(no, 0.0);
_mm_storeu_ps(result, no);
tmp = glm::make_vec4(result);
vert[ia].normal += tmp;
vert[ib].normal += tmp;
vert[ic].normal += tmp;
}
for (int i = 0; i < vertexCount; i++)
vert[i].normal = glm::vec4(glm::normalize(glm::vec3(vert[i].normal)), 0.0f);
}
But its not working. Please can anyone help finding the problems.
(I am very new to SIMD)