 | Code: -- C:/devel/sl/cool/linden_old/indra/newview/llface.cpp Wed Jul 29 00:25:54 2020 +++ C:/devel/sl/cool/linden/indra/newview/llface.cpp Fri Oct 30 19:34:35 2020 @@ -57,10 +57,16 @@ #include "llvoclouds.h" #include "llvopartgroup.h" #include "llvosky.h" #include "llvovolume.h" +#if defined(__AVX2__) +#include <immintrin.h> +#else +#include <xmmintrin.h> +#endif + #define LL_MAX_INDICES_COUNT 1000000 static LLStaticHashedString sTextureIndexIn("texture_index_in"); static LLStaticHashedString sColorIn("color_in"); @@ -1720,23 +1726,22 @@ if (!mat && do_bump) { bump_tc.reserve(num_vertices); } + + if (texgen == LLTextureEntry::TEX_GEN_PLANAR) { LLVector4a vec; for (S32 i = 0; i < num_vertices; ++i) { LLVector2 tc(vf.mTexCoords[i]); LLVector4a& norm = vf.mNormals[i]; LLVector4a& center = *(vf.mCenter); - if (texgen == LLTextureEntry::TEX_GEN_PLANAR) - { vec = vf.mPositions[i]; vec.mul(scalea); planarProjection(tc, norm, center, vec); - } if (tex_mode && mTextureMatrix) { LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f); tmp = tmp * *mTextureMatrix; @@ -1753,10 +1758,164 @@ if (!mat && do_bump) { bump_tc.push_back(tc); } } + } + else if (tex_mode && mTextureMatrix) + { + for (S32 i = 0; i < num_vertices; ++i) + { + LLVector2 tc(vf.mTexCoords[i]); + + LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f); + tmp = tmp * *mTextureMatrix; + tc.mV[0] = tmp.mV[0]; + tc.mV[1] = tmp.mV[1]; + + *dst++ = tc; + + if (!mat && do_bump) + { + bump_tc.push_back(tc); + } + } + } + else + { + S32 i = 0; +#if defined(__AVX2__) + __m256 cos_vec = _mm256_set1_ps(cos_ang); + __m256 sin_vec = _mm256_set1_ps(sin_ang); + __m256 off = _mm256_set1_ps(-0.5f); + __m256 osoff = _mm256_set1_ps(os + 0.5f); + __m256 otoff = _mm256_set1_ps(ot + 0.5f); + __m256 ms_vec = _mm256_set1_ps(ms); + __m256 mt_vec = _mm256_set1_ps(mt); + + for (; i + 8 <= num_vertices; i += 8) + { + F32 sv[8]; + F32 tv[8]; + + sv[0] = vf.mTexCoords[i].mV[0]; + tv[0] = vf.mTexCoords[i].mV[1]; + sv[1] = vf.mTexCoords[i+1].mV[0]; + tv[1] = vf.mTexCoords[i+1].mV[1]; + sv[2] = vf.mTexCoords[i+2].mV[0]; + tv[2] = vf.mTexCoords[i+2].mV[1]; + sv[3] = vf.mTexCoords[i+3].mV[0]; + tv[3] = vf.mTexCoords[i+3].mV[1]; + sv[4] = vf.mTexCoords[i+4].mV[0]; + tv[4] = vf.mTexCoords[i+4].mV[1]; + sv[5] = vf.mTexCoords[i+5].mV[0]; + tv[5] = vf.mTexCoords[i+5].mV[1]; + sv[6] = vf.mTexCoords[i+6].mV[0]; + tv[6] = vf.mTexCoords[i+6].mV[1]; + sv[7] = vf.mTexCoords[i+7].mV[0]; + tv[7] = vf.mTexCoords[i+7].mV[1]; + + __m256 svv = _mm256_loadu_ps(sv); + __m256 tvv = _mm256_loadu_ps(tv); + // Texture transforms are done about the center of the face. + svv = _mm256_add_ps(svv, off); + tvv = _mm256_add_ps(tvv, off); + + // Transform the texture coordinates for this face. + __m256 coss = _mm256_mul_ps(svv, cos_vec); + __m256 sins = _mm256_mul_ps(svv, sin_vec); + svv = _mm256_fmadd_ps(tvv, sin_vec, coss); + tvv = _mm256_fmsub_ps(tvv, cos_vec, sins); + + // Then scale and offset + svv = _mm256_fmadd_ps(svv, ms_vec, osoff); + tvv = _mm256_fmadd_ps(tvv, mt_vec, otoff); + + _mm256_storeu_ps(sv, svv); + _mm256_storeu_ps(tv, tvv); + + for (S32 j = 0; j < 8; ++j) + { + LLVector2 tc(sv[j], tv[j]); + *dst++ = tc; + + if (!mat && do_bump) + { + bump_tc.push_back(tc); + } + } + } +#else + /* SSE2 Version, we have no FMA :-( */ + __m128 cos_vec = _mm_set1_ps(cos_ang); + __m128 sin_vec = _mm_set1_ps(sin_ang); + __m128 off = _mm_set1_ps(-0.5f); + __m128 osoff = _mm_set1_ps(os + 0.5f); + __m128 otoff = _mm_set1_ps(ot + 0.5f); + __m128 ms_vec = _mm_set1_ps(ms); + __m128 mt_vec = _mm_set1_ps(mt); + + for (; i + 4 <= num_vertices; i += 4) + { + F32 sv[4]; + F32 tv[4]; + + sv[0] = vf.mTexCoords[i].mV[0]; + tv[0] = vf.mTexCoords[i].mV[1]; + sv[1] = vf.mTexCoords[i + 1].mV[0]; + tv[1] = vf.mTexCoords[i + 1].mV[1]; + sv[2] = vf.mTexCoords[i + 2].mV[0]; + tv[2] = vf.mTexCoords[i + 2].mV[1]; + sv[3] = vf.mTexCoords[i + 3].mV[0]; + tv[3] = vf.mTexCoords[i + 3].mV[1]; + + __m128 svv = _mm_loadu_ps(sv); + __m128 tvv = _mm_loadu_ps(tv); + // Texture transforms are done about the center of the face. + svv = _mm_add_ps(svv, off); + tvv = _mm_add_ps(tvv, off); + + // Transform the texture coordinates for this face. + __m128 coss = _mm_mul_ps(svv, cos_vec); + __m128 sins = _mm_mul_ps(svv, sin_vec); + /* no fmadd, so do it in two steps */ + svv = _mm_add_ps(_mm_mul_ps(tvv, sin_vec), coss); + tvv = _mm_add_ps(_mm_mul_ps(tvv, cos_vec), sins); + + // Then scale and offset + svv = _mm_add_ps(_mm_mul_ps(svv, ms_vec), osoff); + tvv = _mm_add_ps(_mm_mul_ps(tvv, mt_vec), otoff); + + _mm_storeu_ps(sv, svv); + _mm_storeu_ps(tv, tvv); + + for (S32 j = 0; j < 4; ++j) + { + LLVector2 tc(sv[j], tv[j]); + *dst++ = tc; + + if (!mat && do_bump) + { + bump_tc.push_back(tc); + } + } + } +#endif + for (; i < num_vertices; ++i) + { + LLVector2 tc(vf.mTexCoords[i]); + xform(tc, cos_ang, sin_ang, os, ot, ms, mt); + + *dst++ = tc; + + if (!mat && do_bump) + { + bump_tc.push_back(tc); + } + } + } + } #if USE_MAP_RANGE if (map_range) {
|  |