565 lines
13 KiB
C++
565 lines
13 KiB
C++
|
// Copyright (C) 2007 Id Software, Inc.
|
||
|
//
|
||
|
|
||
|
#include "../precompiled.h"
|
||
|
#pragma hdrstop
|
||
|
|
||
|
#include "Simd_Generic.h"
|
||
|
#include "Simd_MMX.h"
|
||
|
#include "Simd_SSE.h"
|
||
|
#include "Simd_SSE2.h"
|
||
|
#include "Simd_SSE3.h"
|
||
|
|
||
|
|
||
|
//===============================================================
|
||
|
//
|
||
|
// SSE3 implementation of idSIMDProcessor
|
||
|
//
|
||
|
//===============================================================
|
||
|
|
||
|
#ifdef ID_WIN_X86_ASM
|
||
|
|
||
|
#include "Simd_InstructionMacros.h"
|
||
|
|
||
|
ALIGN4_INIT4( unsigned long SIMD_SP_clearLast, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 );
|
||
|
ALIGN4_INIT4( float SIMD_SP_lastOne, 0.0f, 0.0f, 0.0f, 1.0f );
|
||
|
|
||
|
/*
|
||
|
============
|
||
|
SSE3_Dot
|
||
|
============
|
||
|
*/
|
||
|
void SSE3_Dot( const idVec4 &v1, const idVec4 &v2, float &result ) {
|
||
|
__asm {
|
||
|
mov esi, v1
|
||
|
mov edi, v2
|
||
|
mov ecx, result
|
||
|
movaps xmm0, [esi]
|
||
|
mulps xmm0, [edi]
|
||
|
_haddps( _xmm0, _xmm0 )
|
||
|
_haddps( _xmm0, _xmm0 )
|
||
|
movss [ecx], xmm0
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
============
|
||
|
SSE3_Dot4
|
||
|
============
|
||
|
*/
|
||
|
void SSE3_Dot4( const idVec4 v1[4], const idVec4 v2[4], float result[4] ) {
|
||
|
__asm {
|
||
|
mov esi, v1
|
||
|
mov edi, v2
|
||
|
mov ecx, result
|
||
|
movaps xmm0, [esi+0*16]
|
||
|
mulps xmm0, [edi+0*16]
|
||
|
movaps xmm1, [esi+1*16]
|
||
|
mulps xmm1, [edi+1*16]
|
||
|
movaps xmm2, [esi+2*16]
|
||
|
mulps xmm2, [edi+2*16]
|
||
|
movaps xmm3, [esi+3*16]
|
||
|
mulps xmm3, [edi+3*16]
|
||
|
_haddps( _xmm0, _xmm1 )
|
||
|
_haddps( _xmm2, _xmm3 )
|
||
|
_haddps( _xmm0, _xmm2 )
|
||
|
movaps [ecx], xmm0
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
============
|
||
|
idSIMD_SSE3::GetName
|
||
|
============
|
||
|
*/
|
||
|
const char * idSIMD_SSE3::GetName( void ) const {
|
||
|
return "MMX & SSE & SSE2 & SSE3";
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
============
|
||
|
idSIMD_SSE3::TransformVerts
|
||
|
============
|
||
|
*/
|
||
|
void VPCALL idSIMD_SSE3::TransformVerts( idDrawVert *verts, const int numVerts, const idJointMat *joints, const idVec4 *base, const jointWeight_t *weights, const int numWeights ) {
|
||
|
|
||
|
assert_16_byte_aligned( joints );
|
||
|
assert_16_byte_aligned( base );
|
||
|
|
||
|
__asm
|
||
|
{
|
||
|
mov eax, numVerts
|
||
|
test eax, eax
|
||
|
jz done
|
||
|
imul eax, DRAWVERT_SIZE
|
||
|
|
||
|
mov ecx, verts
|
||
|
mov edx, weights
|
||
|
mov esi, base
|
||
|
mov edi, joints
|
||
|
|
||
|
add ecx, eax
|
||
|
neg eax
|
||
|
|
||
|
loopVert:
|
||
|
mov ebx, dword ptr [edx+JOINTWEIGHT_JOINTMATOFFSET_OFFSET]
|
||
|
movaps xmm2, [esi]
|
||
|
add edx, JOINTWEIGHT_SIZE
|
||
|
movaps xmm0, xmm2
|
||
|
add esi, BASEVECTOR_SIZE
|
||
|
movaps xmm1, xmm2
|
||
|
|
||
|
mulps xmm0, [edi+ebx+ 0] // xmm0 = m0, m1, m2, t0
|
||
|
mulps xmm1, [edi+ebx+16] // xmm1 = m3, m4, m5, t1
|
||
|
mulps xmm2, [edi+ebx+32] // xmm2 = m6, m7, m8, t2
|
||
|
|
||
|
cmp dword ptr [edx-JOINTWEIGHT_SIZE+JOINTWEIGHT_NEXTVERTEXOFFSET_OFFSET], JOINTWEIGHT_SIZE
|
||
|
|
||
|
je doneWeight
|
||
|
|
||
|
loopWeight:
|
||
|
mov ebx, dword ptr [edx+JOINTWEIGHT_JOINTMATOFFSET_OFFSET]
|
||
|
movaps xmm5, [esi]
|
||
|
add edx, JOINTWEIGHT_SIZE
|
||
|
movaps xmm3, xmm5
|
||
|
add esi, BASEVECTOR_SIZE
|
||
|
movaps xmm4, xmm5
|
||
|
|
||
|
mulps xmm3, [edi+ebx+ 0] // xmm3 = m0, m1, m2, t0
|
||
|
mulps xmm4, [edi+ebx+16] // xmm4 = m3, m4, m5, t1
|
||
|
mulps xmm5, [edi+ebx+32] // xmm5 = m6, m7, m8, t2
|
||
|
|
||
|
cmp dword ptr [edx-JOINTWEIGHT_SIZE+JOINTWEIGHT_NEXTVERTEXOFFSET_OFFSET], JOINTWEIGHT_SIZE
|
||
|
|
||
|
addps xmm0, xmm3
|
||
|
addps xmm1, xmm4
|
||
|
addps xmm2, xmm5
|
||
|
|
||
|
jne loopWeight
|
||
|
|
||
|
doneWeight:
|
||
|
add eax, DRAWVERT_SIZE
|
||
|
|
||
|
_haddps( _xmm0, _xmm1 )
|
||
|
_haddps( _xmm2, _xmm0 )
|
||
|
|
||
|
movhps [ecx+eax-DRAWVERT_SIZE+DRAWVERT_XYZ_OFFSET+0], xmm2
|
||
|
|
||
|
pshufd xmm3, xmm2, R_SHUFFLE_D( 1, 0, 2, 3 )
|
||
|
addss xmm3, xmm2
|
||
|
|
||
|
movss [ecx+eax-DRAWVERT_SIZE+DRAWVERT_XYZ_OFFSET+8], xmm3
|
||
|
|
||
|
jl loopVert
|
||
|
done:
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
============
|
||
|
idSIMD_SSE3::TransformShadowVerts
|
||
|
============
|
||
|
*/
|
||
|
void VPCALL idSIMD_SSE3::TransformShadowVerts( idDrawVert *verts, const int numVerts, const idJointMat *joints, const idDrawVert *base, const jointWeight_t *weights, const int numWeights ) {
|
||
|
assert_16_byte_aligned( joints );
|
||
|
assert_16_byte_aligned( base );
|
||
|
|
||
|
__asm
|
||
|
{
|
||
|
mov eax, numVerts
|
||
|
test eax, eax
|
||
|
jz done
|
||
|
imul eax, DRAWVERT_SIZE
|
||
|
|
||
|
mov ecx, verts
|
||
|
mov edx, weights
|
||
|
mov esi, base
|
||
|
mov edi, joints
|
||
|
|
||
|
add ecx, eax
|
||
|
neg eax
|
||
|
|
||
|
movaps xmm0, SIMD_SP_clearLast
|
||
|
movaps xmm1, SIMD_SP_lastOne
|
||
|
|
||
|
loopVert:
|
||
|
add esi, DRAWVERT_SIZE
|
||
|
mov ebx, dword ptr [edx+JOINTWEIGHT_JOINTMATOFFSET_OFFSET]
|
||
|
|
||
|
movaps xmm3, [esi-DRAWVERT_SIZE+DRAWVERT_XYZ_OFFSET+0]
|
||
|
andps xmm3, xmm0
|
||
|
orps xmm3, xmm1
|
||
|
|
||
|
movaps xmm4, xmm3
|
||
|
movaps xmm5, xmm3
|
||
|
|
||
|
mulps xmm3, [edi+ebx+ 0] // xmm0 = m0, m1, m2, t0
|
||
|
mulps xmm4, [edi+ebx+16] // xmm1 = m3, m4, m5, t1
|
||
|
mulps xmm5, [edi+ebx+32] // xmm2 = m6, m7, m8, t2
|
||
|
|
||
|
add edx, dword ptr [edx+JOINTWEIGHT_NEXTVERTEXOFFSET_OFFSET]
|
||
|
add eax, DRAWVERT_SIZE
|
||
|
|
||
|
_haddps( _xmm3, _xmm4 )
|
||
|
_haddps( _xmm5, _xmm3 )
|
||
|
|
||
|
movhps [ecx+eax-DRAWVERT_SIZE+DRAWVERT_XYZ_OFFSET+0], xmm5
|
||
|
|
||
|
pshufd xmm7, xmm5, R_SHUFFLE_D( 1, 0, 2, 3 )
|
||
|
addss xmm7, xmm5
|
||
|
|
||
|
movss [ecx+eax-DRAWVERT_SIZE+DRAWVERT_XYZ_OFFSET+8], xmm7
|
||
|
|
||
|
jl loopVert
|
||
|
done:
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
============
|
||
|
idSIMD_SSE3::TransformShadowVerts
|
||
|
============
|
||
|
*/
|
||
|
void VPCALL idSIMD_SSE3::TransformShadowVerts( idDrawVert *verts, const int numVerts, const idJointMat *joints, const idDrawVert *base, const short *weights, const int numWeights ) {
|
||
|
assert_16_byte_aligned( joints );
|
||
|
assert_16_byte_aligned( base );
|
||
|
|
||
|
__asm
|
||
|
{
|
||
|
mov eax, numVerts
|
||
|
test eax, eax
|
||
|
jz done
|
||
|
imul eax, DRAWVERT_SIZE
|
||
|
|
||
|
mov ecx, verts
|
||
|
mov edx, weights
|
||
|
mov esi, base
|
||
|
mov edi, joints
|
||
|
|
||
|
add ecx, eax
|
||
|
neg eax
|
||
|
|
||
|
movaps xmm0, SIMD_SP_clearLast
|
||
|
movaps xmm1, SIMD_SP_lastOne
|
||
|
|
||
|
loopVert:
|
||
|
add esi, DRAWVERT_SIZE
|
||
|
movzx ebx, word ptr [edx]
|
||
|
|
||
|
movaps xmm3, [esi-DRAWVERT_SIZE+DRAWVERT_XYZ_OFFSET+0]
|
||
|
andps xmm3, xmm0
|
||
|
orps xmm3, xmm1
|
||
|
|
||
|
movaps xmm4, xmm3
|
||
|
movaps xmm5, xmm3
|
||
|
|
||
|
mulps xmm3, [edi+ebx+ 0] // xmm0 = m0, m1, m2, t0
|
||
|
mulps xmm4, [edi+ebx+16] // xmm1 = m3, m4, m5, t1
|
||
|
mulps xmm5, [edi+ebx+32] // xmm2 = m6, m7, m8, t2
|
||
|
|
||
|
add edx, 2
|
||
|
add eax, DRAWVERT_SIZE
|
||
|
|
||
|
_haddps( _xmm3, _xmm4 )
|
||
|
_haddps( _xmm5, _xmm3 )
|
||
|
|
||
|
movhps [ecx+eax-DRAWVERT_SIZE+DRAWVERT_XYZ_OFFSET+0], xmm5
|
||
|
|
||
|
pshufd xmm7, xmm5, R_SHUFFLE_D( 1, 0, 2, 3 )
|
||
|
addss xmm7, xmm5
|
||
|
|
||
|
movss [ecx+eax-DRAWVERT_SIZE+DRAWVERT_XYZ_OFFSET+8], xmm7
|
||
|
|
||
|
jl loopVert
|
||
|
done:
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
============
|
||
|
idSIMD_SSE3::TransformShadowVerts
|
||
|
============
|
||
|
*/
|
||
|
void VPCALL idSIMD_SSE3::TransformShadowVerts( shadowCache_t *verts, const int numVerts, const idJointMat *joints, const idDrawVert *base, const short *weights, const int numWeights ) {
|
||
|
assert_16_byte_aligned( joints );
|
||
|
assert_16_byte_aligned( base );
|
||
|
|
||
|
__asm
|
||
|
{
|
||
|
mov eax, numVerts
|
||
|
test eax, eax
|
||
|
jz done
|
||
|
imul eax, SHADOWVERT_SIZE
|
||
|
|
||
|
mov ecx, verts
|
||
|
mov edx, weights
|
||
|
mov esi, base
|
||
|
mov edi, joints
|
||
|
|
||
|
add ecx, eax
|
||
|
neg eax
|
||
|
|
||
|
movaps xmm0, SIMD_SP_clearLast
|
||
|
movaps xmm1, SIMD_SP_lastOne
|
||
|
|
||
|
loopVert:
|
||
|
add esi, DRAWVERT_SIZE
|
||
|
movzx ebx, word ptr [edx]
|
||
|
|
||
|
movaps xmm3, [esi-DRAWVERT_SIZE+DRAWVERT_XYZ_OFFSET+0]
|
||
|
andps xmm3, xmm0
|
||
|
orps xmm3, xmm1
|
||
|
|
||
|
movaps xmm4, xmm3
|
||
|
movaps xmm5, xmm3
|
||
|
|
||
|
mulps xmm3, [edi+ebx+ 0] // xmm0 = m0, m1, m2, t0
|
||
|
mulps xmm4, [edi+ebx+16] // xmm1 = m3, m4, m5, t1
|
||
|
mulps xmm5, [edi+ebx+32] // xmm2 = m6, m7, m8, t2
|
||
|
|
||
|
add edx, 2
|
||
|
add eax, SHADOWVERT_SIZE
|
||
|
|
||
|
_haddps( _xmm3, _xmm4 )
|
||
|
_haddps( _xmm5, _xmm3 )
|
||
|
|
||
|
movhps [ecx+eax-SHADOWVERT_SIZE+0], xmm5
|
||
|
|
||
|
pshufd xmm7, xmm5, R_SHUFFLE_D( 1, 0, 2, 3 )
|
||
|
addss xmm7, xmm5
|
||
|
|
||
|
movss [ecx+eax-SHADOWVERT_SIZE+8], xmm7
|
||
|
|
||
|
jl loopVert
|
||
|
done:
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
#if !defined(SD_USE_DRAWVERT_SIZE_32)
|
||
|
/*
|
||
|
============
|
||
|
idSIMD_SSE3::TransformVertsAndTangents
|
||
|
============
|
||
|
*/
|
||
|
void VPCALL idSIMD_SSE3::TransformVertsAndTangents( idDrawVert *verts, const int numVerts, const idJointMat *joints, const idVec4 *base, const jointWeight_t *weights, const int numWeights ) {
|
||
|
|
||
|
assert_16_byte_aligned( joints );
|
||
|
assert_16_byte_aligned( base );
|
||
|
|
||
|
__asm
|
||
|
{
|
||
|
mov eax, numVerts
|
||
|
test eax, eax
|
||
|
jz done
|
||
|
imul eax, DRAWVERT_SIZE
|
||
|
|
||
|
mov ecx, verts
|
||
|
mov edx, weights
|
||
|
mov esi, base
|
||
|
mov edi, joints
|
||
|
|
||
|
add ecx, eax
|
||
|
neg eax
|
||
|
|
||
|
loopVert:
|
||
|
movss xmm2, [edx+JOINTWEIGHT_WEIGHT_OFFSET]
|
||
|
mov ebx, dword ptr [edx+JOINTWEIGHT_JOINTMATOFFSET_OFFSET]
|
||
|
shufps xmm2, xmm2, R_SHUFFLE_PS( 0, 0, 0, 0 )
|
||
|
add edx, JOINTWEIGHT_SIZE
|
||
|
movaps xmm0, xmm2
|
||
|
movaps xmm1, xmm2
|
||
|
|
||
|
mulps xmm0, [edi+ebx+ 0] // xmm0 = m0, m1, m2, t0
|
||
|
mulps xmm1, [edi+ebx+16] // xmm1 = m3, m4, m5, t1
|
||
|
mulps xmm2, [edi+ebx+32] // xmm2 = m6, m7, m8, t2
|
||
|
|
||
|
cmp dword ptr [edx-JOINTWEIGHT_SIZE+JOINTWEIGHT_NEXTVERTEXOFFSET_OFFSET], JOINTWEIGHT_SIZE
|
||
|
|
||
|
je doneWeight
|
||
|
|
||
|
loopWeight:
|
||
|
movss xmm5, [edx+JOINTWEIGHT_WEIGHT_OFFSET]
|
||
|
mov ebx, dword ptr [edx+JOINTWEIGHT_JOINTMATOFFSET_OFFSET]
|
||
|
shufps xmm5, xmm5, R_SHUFFLE_PS( 0, 0, 0, 0 )
|
||
|
add edx, JOINTWEIGHT_SIZE
|
||
|
movaps xmm3, xmm5
|
||
|
movaps xmm4, xmm5
|
||
|
|
||
|
mulps xmm3, [edi+ebx+ 0] // xmm3 = m0, m1, m2, t0
|
||
|
mulps xmm4, [edi+ebx+16] // xmm4 = m3, m4, m5, t1
|
||
|
mulps xmm5, [edi+ebx+32] // xmm5 = m6, m7, m8, t2
|
||
|
|
||
|
cmp dword ptr [edx-JOINTWEIGHT_SIZE+JOINTWEIGHT_NEXTVERTEXOFFSET_OFFSET], JOINTWEIGHT_SIZE
|
||
|
|
||
|
addps xmm0, xmm3
|
||
|
addps xmm1, xmm4
|
||
|
addps xmm2, xmm5
|
||
|
|
||
|
jne loopWeight
|
||
|
|
||
|
doneWeight:
|
||
|
add esi, 3*BASEVECTOR_SIZE
|
||
|
add eax, DRAWVERT_SIZE
|
||
|
|
||
|
// transform vertex
|
||
|
movaps xmm3, [esi-3*BASEVECTOR_SIZE]
|
||
|
movaps xmm4, xmm3
|
||
|
movaps xmm5, xmm3
|
||
|
|
||
|
mulps xmm3, xmm0
|
||
|
mulps xmm4, xmm1
|
||
|
mulps xmm5, xmm2
|
||
|
|
||
|
_haddps( _xmm3, _xmm4 )
|
||
|
_haddps( _xmm5, _xmm3 )
|
||
|
|
||
|
movhps [ecx+eax-DRAWVERT_SIZE+DRAWVERT_XYZ_OFFSET+0], xmm5
|
||
|
|
||
|
pshufd xmm7, xmm5, R_SHUFFLE_D( 1, 0, 2, 3 )
|
||
|
addss xmm7, xmm5
|
||
|
|
||
|
movss [ecx+eax-DRAWVERT_SIZE+DRAWVERT_XYZ_OFFSET+8], xmm7
|
||
|
|
||
|
// transform normal
|
||
|
movaps xmm3, [esi-2*BASEVECTOR_SIZE]
|
||
|
movaps xmm4, xmm3
|
||
|
movaps xmm5, xmm3
|
||
|
|
||
|
mulps xmm3, xmm0
|
||
|
mulps xmm4, xmm1
|
||
|
mulps xmm5, xmm2
|
||
|
|
||
|
_haddps( _xmm3, _xmm4 )
|
||
|
_haddps( _xmm5, _xmm3 )
|
||
|
|
||
|
movhps [ecx+eax-DRAWVERT_SIZE+DRAWVERT_NORMAL_OFFSET+0], xmm5
|
||
|
|
||
|
pshufd xmm7, xmm5, R_SHUFFLE_D( 1, 0, 2, 3 )
|
||
|
addss xmm7, xmm5
|
||
|
|
||
|
movss [ecx+eax-DRAWVERT_SIZE+DRAWVERT_NORMAL_OFFSET+8], xmm7
|
||
|
|
||
|
// transform first tangent
|
||
|
movaps xmm3, [esi-1*BASEVECTOR_SIZE]
|
||
|
|
||
|
mulps xmm0, xmm3
|
||
|
mulps xmm1, xmm3
|
||
|
mulps xmm2, xmm3
|
||
|
|
||
|
_haddps( _xmm0, _xmm1 )
|
||
|
_haddps( _xmm2, _xmm0 )
|
||
|
|
||
|
movhps [ecx+eax-DRAWVERT_SIZE+DRAWVERT_TANGENT_OFFSET+0], xmm2
|
||
|
|
||
|
pshufd xmm7, xmm2, R_SHUFFLE_D( 1, 0, 2, 3 )
|
||
|
addss xmm7, xmm2
|
||
|
|
||
|
movss [ecx+eax-DRAWVERT_SIZE+DRAWVERT_TANGENT_OFFSET+8], xmm7
|
||
|
|
||
|
jl loopVert
|
||
|
done:
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
============
|
||
|
idSIMD_SSE3::TransformVertsAndTangentsFast
|
||
|
============
|
||
|
*/
|
||
|
void VPCALL idSIMD_SSE3::TransformVertsAndTangentsFast( idDrawVert *verts, const int numVerts, const idJointMat *joints, const idVec4 *base, const jointWeight_t *weights, const int numWeights ) {
|
||
|
|
||
|
assert_16_byte_aligned( joints );
|
||
|
assert_16_byte_aligned( base );
|
||
|
|
||
|
__asm
|
||
|
{
|
||
|
mov eax, numVerts
|
||
|
test eax, eax
|
||
|
jz done
|
||
|
imul eax, DRAWVERT_SIZE
|
||
|
|
||
|
mov ecx, verts
|
||
|
mov edx, weights
|
||
|
mov esi, base
|
||
|
mov edi, joints
|
||
|
|
||
|
add ecx, eax
|
||
|
neg eax
|
||
|
|
||
|
loopVert:
|
||
|
mov ebx, dword ptr [edx+JOINTWEIGHT_JOINTMATOFFSET_OFFSET]
|
||
|
|
||
|
add esi, 3*BASEVECTOR_SIZE
|
||
|
|
||
|
movaps xmm0, [edi+ebx+ 0] // xmm0 = m0, m1, m2, t0
|
||
|
movaps xmm1, [edi+ebx+16] // xmm1 = m3, m4, m5, t1
|
||
|
movaps xmm2, [edi+ebx+32] // xmm2 = m6, m7, m8, t2
|
||
|
|
||
|
add edx, dword ptr [edx+JOINTWEIGHT_NEXTVERTEXOFFSET_OFFSET]
|
||
|
|
||
|
add eax, DRAWVERT_SIZE
|
||
|
|
||
|
// transform vertex
|
||
|
movaps xmm3, [esi-3*BASEVECTOR_SIZE]
|
||
|
movaps xmm4, xmm3
|
||
|
movaps xmm5, xmm3
|
||
|
|
||
|
mulps xmm3, xmm0
|
||
|
mulps xmm4, xmm1
|
||
|
mulps xmm5, xmm2
|
||
|
|
||
|
_haddps( _xmm3, _xmm4 )
|
||
|
_haddps( _xmm5, _xmm3 )
|
||
|
|
||
|
movhps [ecx+eax-DRAWVERT_SIZE+DRAWVERT_XYZ_OFFSET+0], xmm5
|
||
|
|
||
|
pshufd xmm7, xmm5, R_SHUFFLE_D( 1, 0, 2, 3 )
|
||
|
addss xmm7, xmm5
|
||
|
|
||
|
movss [ecx+eax-DRAWVERT_SIZE+DRAWVERT_XYZ_OFFSET+8], xmm7
|
||
|
|
||
|
// transform normal
|
||
|
movaps xmm3, [esi-2*BASEVECTOR_SIZE]
|
||
|
movaps xmm4, xmm3
|
||
|
movaps xmm5, xmm3
|
||
|
|
||
|
mulps xmm3, xmm0
|
||
|
mulps xmm4, xmm1
|
||
|
mulps xmm5, xmm2
|
||
|
|
||
|
_haddps( _xmm3, _xmm4 )
|
||
|
_haddps( _xmm5, _xmm3 )
|
||
|
|
||
|
movhps [ecx+eax-DRAWVERT_SIZE+DRAWVERT_NORMAL_OFFSET+0], xmm5
|
||
|
|
||
|
pshufd xmm7, xmm5, R_SHUFFLE_D( 1, 0, 2, 3 )
|
||
|
addss xmm7, xmm5
|
||
|
|
||
|
movss [ecx+eax-DRAWVERT_SIZE+DRAWVERT_NORMAL_OFFSET+8], xmm7
|
||
|
|
||
|
// transform first tangent
|
||
|
movaps xmm3, [esi-1*BASEVECTOR_SIZE]
|
||
|
|
||
|
mulps xmm0, xmm3
|
||
|
mulps xmm1, xmm3
|
||
|
mulps xmm2, xmm3
|
||
|
|
||
|
_haddps( _xmm0, _xmm1 )
|
||
|
_haddps( _xmm2, _xmm0 )
|
||
|
|
||
|
movhps [ecx+eax-DRAWVERT_SIZE+DRAWVERT_TANGENT_OFFSET+0], xmm2
|
||
|
|
||
|
pshufd xmm7, xmm2, R_SHUFFLE_D( 1, 0, 2, 3 )
|
||
|
addss xmm7, xmm2
|
||
|
|
||
|
movss [ecx+eax-DRAWVERT_SIZE+DRAWVERT_TANGENT_OFFSET+8], xmm7
|
||
|
|
||
|
jl loopVert
|
||
|
done:
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#endif /* ID_WIN_X86_ASM */
|