mirror of
synced 2025-03-11 03:31:05 +00:00
Don't include the lazy precompiled.h everywhere, only what's required for the compilation unit. platform.h needs to be included instead to provide all essential defines and types. All includes use the relative path to the neo or the game specific root. Move all idlib related includes from idlib/Lib.h to precompiled.h. precompiled.h still exists for the MFC stuff in tools/. Add some missing header guards.
252 lines
15 KiB
252 lines
15 KiB
Doom 3 GPL Source Code
Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
This file is part of the Doom 3 GPL Source Code ("Doom 3 Source Code").
Doom 3 Source Code is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Doom 3 Source Code is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
#include "idlib/math/Simd_Generic.h"
AltiVec implementation of idSIMDProcessor
// Defines for enabling parts of the library
// Turns on/off the simple math routines (add, sub, div, etc)
// Turns on/off the dot routines
#define ENABLE_DOT
// Turns on/off the compare routines
// The MinMax routines introduce a couple of bugs. In the bathroom of the alphalabs2 map, the
// wrong surface appears in the mirror at times. It also introduces a noticable delay when map
// data is loaded such as going through doors.
// Turns on/off MinMax routines
// Turns on/off Clamp routines
// Turns on/off XXX16 routines
// Turns on/off LowerTriangularSolve, LowerTriangularSolveTranspose, and MatX_LDLTFactor
// Turns on/off TracePointCull, DecalPointCull, and OverlayPoint
// The Enable_Cull routines breaks the g_decals functionality, DecalPointCull is
// the likely suspect. Bullet holes do not appear on the walls when this optimization
// is enabled.
//#define ENABLE_CULL
// Turns on/off DeriveTriPlanes, DeriveTangents, DeriveUnsmoothedTangents, NormalizeTangents
// Turns on/off CreateTextureSpaceLightVectors, CreateShadowCache, CreateVertexProgramShadowCache
// Turns on/off the sound routines
// Turns on/off the stuff that isn't on elsewhere
// Currently: BlendJoints, TransformJoints, UntransformJoints, ConvertJointQuatsToJointMats, and
// ConvertJointMatsToJointQuats
// This assumes that the dest (and mixBuffer) array to the sound functions is aligned. If this is not true, we take a large
// performance hit from having to do unaligned stores
// This assumes that the vertexCache array to CreateShadowCache and CreateVertexProgramShadowCache is aligned. If it's not,
// then we take a big performance hit from unaligned stores.
// This turns on support for PPC intrinsics in the SIMD_AltiVec.cpp file. Right now it's only used for frsqrte. GCC
// supports these intrinsics but XLC does not.
// This assumes that the idDrawVert array that is used in DeriveUnsmoothedTangents is aligned. If its not aligned,
// then we don't get any speedup
// Disable DRAWVERT_PADDED since we disabled the ENABLE_CULL optimizations and the default
// implementation does not allow for the extra padding.
// This assumes that idDrawVert has been padded by 4 bytes so that xyz always starts at an aligned
// address
class idSIMD_AltiVec : public idSIMD_Generic {
#if defined(__GNUC__) && defined(__ALTIVEC__)
virtual const char * VPCALL GetName( void ) const;
// Basic math, works for both aligned and unaligned data
virtual void VPCALL Add( float *dst, const float constant, const float *src, const int count );
virtual void VPCALL Add( float *dst, const float *src0, const float *src1, const int count );
virtual void VPCALL Sub( float *dst, const float constant, const float *src, const int count );
virtual void VPCALL Sub( float *dst, const float *src0, const float *src1, const int count );
virtual void VPCALL Mul( float *dst, const float constant, const float *src, const int count);
virtual void VPCALL Mul( float *dst, const float *src0, const float *src1, const int count );
virtual void VPCALL Div( float *dst, const float constant, const float *divisor, const int count );
virtual void VPCALL Div( float *dst, const float *src0, const float *src1, const int count );
virtual void VPCALL MulAdd( float *dst, const float constant, const float *src, const int count );
virtual void VPCALL MulAdd( float *dst, const float *src0, const float *src1, const int count );
virtual void VPCALL MulSub( float *dst, const float constant, const float *src, const int count );
virtual void VPCALL MulSub( float *dst, const float *src0, const float *src1, const int count );
// Dot products, expects data structures in contiguous memory
virtual void VPCALL Dot( float *dst, const idVec3 &constant, const idVec3 *src, const int count );
virtual void VPCALL Dot( float *dst, const idVec3 &constant, const idPlane *src, const int count );
virtual void VPCALL Dot( float *dst, const idVec3 &constant, const idDrawVert *src, const int count );
virtual void VPCALL Dot( float *dst, const idPlane &constant,const idVec3 *src, const int count );
virtual void VPCALL Dot( float *dst, const idPlane &constant,const idPlane *src, const int count );
virtual void VPCALL Dot( float *dst, const idPlane &constant,const idDrawVert *src, const int count );
virtual void VPCALL Dot( float *dst, const idVec3 *src0, const idVec3 *src1, const int count );
virtual void VPCALL Dot( float &dot, const float *src1, const float *src2, const int count );
// Comparisons, works for both aligned and unaligned data
virtual void VPCALL CmpGT( byte *dst, const float *src0, const float constant, const int count );
virtual void VPCALL CmpGT( byte *dst, const byte bitNum, const float *src0, const float constant, const int count );
virtual void VPCALL CmpGE( byte *dst, const float *src0, const float constant, const int count );
virtual void VPCALL CmpGE( byte *dst, const byte bitNum, const float *src0, const float constant, const int count );
virtual void VPCALL CmpLT( byte *dst, const float *src0, const float constant, const int count );
virtual void VPCALL CmpLT( byte *dst, const byte bitNum, const float *src0, const float constant, const int count );
virtual void VPCALL CmpLE( byte *dst, const float *src0, const float constant, const int count );
virtual void VPCALL CmpLE( byte *dst, const byte bitNum, const float *src0, const float constant, const int count );
// Min/Max. Expects data structures in contiguous memory
virtual void VPCALL MinMax( float &min, float &max, const float *src, const int count );
virtual void VPCALL MinMax( idVec2 &min, idVec2 &max, const idVec2 *src, const int count );
virtual void VPCALL MinMax( idVec3 &min, idVec3 &max, const idVec3 *src, const int count );
virtual void VPCALL MinMax( idVec3 &min, idVec3 &max, const idDrawVert *src, const int count );
virtual void VPCALL MinMax( idVec3 &min, idVec3 &max, const idDrawVert *src, const int *indexes, const int count );
// Clamp operations. Works for both aligned and unaligned data
virtual void VPCALL Clamp( float *dst, const float *src, const float min, const float max, const int count );
virtual void VPCALL ClampMin( float *dst, const float *src, const float min, const int count );
virtual void VPCALL ClampMax( float *dst, const float *src, const float max, const int count );
// These are already using memcpy and memset functions. Leaving default implementation
// virtual void VPCALL Memcpy( void *dst, const void *src, const int count );
// virtual void VPCALL Memset( void *dst, const int val, const int count );
// Operations that expect 16-byte aligned data and 16-byte padded memory (with zeros), generally faster
virtual void VPCALL Zero16( float *dst, const int count );
virtual void VPCALL Negate16( float *dst, const int count );
virtual void VPCALL Copy16( float *dst, const float *src, const int count );
virtual void VPCALL Add16( float *dst, const float *src1, const float *src2, const int count );
virtual void VPCALL Sub16( float *dst, const float *src1, const float *src2, const int count );
virtual void VPCALL Mul16( float *dst, const float *src1, const float constant, const int count );
virtual void VPCALL AddAssign16( float *dst, const float *src, const int count );
virtual void VPCALL SubAssign16( float *dst, const float *src, const int count );
virtual void VPCALL MulAssign16( float *dst, const float constant, const int count );
// Most of these deal with tiny matrices or vectors, generally not worth altivec'ing since
// the scalar code is already really fast
// virtual void VPCALL MatX_MultiplyVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
// virtual void VPCALL MatX_MultiplyAddVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
// virtual void VPCALL MatX_MultiplySubVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
// virtual void VPCALL MatX_TransposeMultiplyVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
// virtual void VPCALL MatX_TransposeMultiplyAddVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
// virtual void VPCALL MatX_TransposeMultiplySubVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
// virtual void VPCALL MatX_MultiplyMatX( idMatX &dst, const idMatX &m1, const idMatX &m2 );
// virtual void VPCALL MatX_TransposeMultiplyMatX( idMatX &dst, const idMatX &m1, const idMatX &m2 );
virtual void VPCALL MatX_LowerTriangularSolve( const idMatX &L, float *x, const float *b, const int n, int skip = 0 );
virtual void VPCALL MatX_LowerTriangularSolveTranspose( const idMatX &L, float *x, const float *b, const int n );
virtual bool VPCALL MatX_LDLTFactor( idMatX &mat, idVecX &invDiag, const int n );
virtual void VPCALL BlendJoints( idJointQuat *joints, const idJointQuat *blendJoints, const float lerp, const int *index, const int numJoints );
virtual void VPCALL ConvertJointQuatsToJointMats( idJointMat *jointMats, const idJointQuat *jointQuats, const int numJoints );
virtual void VPCALL ConvertJointMatsToJointQuats( idJointQuat *jointQuats, const idJointMat *jointMats, const int numJoints );
virtual void VPCALL TransformJoints( idJointMat *jointMats, const int *parents, const int firstJoint, const int lastJoint );
virtual void VPCALL UntransformJoints( idJointMat *jointMats, const int *parents, const int firstJoint, const int lastJoint );
virtual void VPCALL TransformVerts( idDrawVert *verts, const int numVerts, const idJointMat *joints, const idVec4 *weights, const int *index, const int numWeights );
virtual void VPCALL TracePointCull( byte *cullBits, byte &totalOr, const float radius, const idPlane *planes, const idDrawVert *verts, const int numVerts );
virtual void VPCALL DecalPointCull( byte *cullBits, const idPlane *planes, const idDrawVert *verts, const int numVerts );
virtual void VPCALL OverlayPointCull( byte *cullBits, idVec2 *texCoords, const idPlane *planes, const idDrawVert *verts, const int numVerts );
virtual void VPCALL DeriveTriPlanes( idPlane *planes, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes );
virtual void VPCALL DeriveTangents( idPlane *planes, idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes );
virtual void VPCALL DeriveUnsmoothedTangents( idDrawVert *verts, const dominantTri_s *dominantTris, const int numVerts );
virtual void VPCALL NormalizeTangents( idDrawVert *verts, const int numVerts );
virtual void VPCALL CreateTextureSpaceLightVectors( idVec3 *lightVectors, const idVec3 &lightOrigin, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes );
virtual void VPCALL CreateSpecularTextureCoords( idVec4 *texCoords, const idVec3 &lightOrigin, const idVec3 &viewOrigin, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes );
virtual int VPCALL CreateShadowCache( idVec4 *vertexCache, int *vertRemap, const idVec3 &lightOrigin, const idDrawVert *verts, const int numVerts );
virtual int VPCALL CreateVertexProgramShadowCache( idVec4 *vertexCache, const idDrawVert *verts, const int numVerts );
// Sound upsampling and mixing routines, works for aligned and unaligned data
virtual void VPCALL UpSamplePCMTo44kHz( float *dest, const short *pcm, const int numSamples, const int kHz, const int numChannels );
virtual void VPCALL UpSampleOGGTo44kHz( float *dest, const float * const *ogg, const int numSamples, const int kHz, const int numChannels );
virtual void VPCALL MixSoundTwoSpeakerMono( float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2] );
virtual void VPCALL MixSoundTwoSpeakerStereo( float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2] );
virtual void VPCALL MixSoundSixSpeakerMono( float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6] );
virtual void VPCALL MixSoundSixSpeakerStereo( float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6] );
virtual void VPCALL MixedSoundToSamples( short *samples, const float *mixBuffer, const int numSamples );
#endif /* !__MATH_SIMD_ALTIVEC_H__ */