q3rally/engine/code/renderergl1/tr_altivec.c
zturtleman 96a9e2a9aa ioquake3 resync to revision 3511 from 3444.
This updates from SDL 2.0.4 to SDL 2.0.8.

Fix nullptr dereference in front of nullptr check in FS_CheckPak0
Fix undefined behaviour due to shifting signed in snd_mem.c
Fix shifting bits out of byte in tr_font.c
Fix shift into sign in cl_cin.c
Fix signed bit operations in MSG_ReadBits
Add missing address operator in cm_polylib.c
OpenGL1: Decay float[8] to float * in tr_marks.c
Avoid srcList[-1] in snd_openal.c
Fix the behaviour of CVAR_LATCH|CVAR_CHEAT cvars
Maximize cURL buffer size
Fix mouse grab after toggling fullscreen
Fix q3history buffer not cleared between mods and OOB-access
Revert "Removed "Color Depth" from q3_ui system settings, it didn't control anything."
Fix displayed color/depth/stencil bits values
Restore setting r_colorbits in q3_ui
Make setting r_stencilbits more consistent in Team Arena UI
Fix map list in Team Arena start server menu after entering SP menu
Support SDL audio devices that require float32 samples.
sdl_snd.c should just initialize SDL audio without checking SDL_WasInit().
There's no need to SDL_PauseAudio(1) before calling SDL_CloseAudio().
Added audio capture support to SDL backend.
Use the SDL2 audio device interface instead of the legacy 1.2 API.
Disable SDL audio capture until prebuilt SDL libraries are updated to 2.0.8.
Update SDL2 to 2.0.8
Add SDL 2.0.1 headers for macOS PPC
Make macOS Universal Bundle target 10.6 for x86 and x86_64
Fix possible bot goal state NULL pointer dereference
Fix uninitialized bot_goal_t fields
Remove unnecessary NULL pointer check in Cmd_RemoveCommand
Make UI_DrawProportionalString handle NULL string
Fix compiling against macOS system OpenAL and SDL2 frameworks
Fix array index in CanDamage() function - discovered by MARTY
Fix compiling Makefile (broke in macOS frameworks commit)
Fix clearing keys for control in Team Arena UI
Make s_useOpenAL be CVAR_LATCH
Improvements for dedicated camera followers (team follow1/2)
Fix not closing description.txt and fix path seperator
Fix duplicate bots displayed in Team Arena ingame add bot menu
OpenGL2: Fix parsing specularScale in shaders
Don't allow SDL audio capture using pulseaudio
Isolate the Altivec code so non-Altivec PPC targets can use the same binary.
Limit -maltivec to specific source files on OpenBSD too (untested)
Use SDL 2.0.1 headers for macOS ppc64
Fix console offset while Team Arena voiceMenu is open
OpenGL2: Readd r_deluxeSpecular.
Fix client kicked as unpure when missing the latest cgame/ui pk3s
Don't create multiple windows when GL context creation fails
Require OpenGL 1.2 for GL_CLAMP_TO_EDGE
Fix Linux uninstaller requiring Bash
Fix Linux uninstaller redirecting stderr to stdout in preuninstall.sh
Reported by @illwieckz.
Fix in_restart causing fatal error while video is shutdown
Allow pkg-config binary to be overridden with PKG_CONFIG
Make testgun command without argument disable test gun model
Remove unused renderer_buffer variable
Don't upload 8 bit grayscale images as 16 bit luminance
OpenGL1: Use RE_UploadCinematic() instead of duplicate code
Don't load non-core GL functions for OpenGL 3.2 core context
Load OpenGL ES 2.0 function procs
Don't check fixed function GL extensions when using shader pipeline
OpenGL2: Fix world VAO cache drawing when glIndex_t is unsigned short
OpenGL2: Misc fixes and cleanup
Fix IQM root joint backlerp when joint number is more than 0
Improve IQM loading
Improve IQM CPU vertex skinning performance
OpenGL2: Add GPU vertex skinning for IQM models
2018-07-30 11:35:12 +00:00

414 lines
14 KiB
C

/*
===========================================================================
Copyright (C) 1999-2005 Id Software, Inc.
This file is part of Quake III Arena source code.
Quake III Arena source code is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.
Quake III Arena source code is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Quake III Arena source code; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
===========================================================================
*/
/* This file is only compiled for PowerPC builds with Altivec support.
Altivec intrinsics need to be in a separate file, so GCC's -maltivec
command line can enable them, but give us the option to _not_ use that
on other files, where the compiler might then generate Altivec
instructions for normal floating point, crashing on G3 (etc) processors. */
#include "tr_local.h"
#if idppc_altivec
#if !defined(__APPLE__)
#include <altivec.h>
#endif
void ProjectDlightTexture_altivec( void ) {
int i, l;
vec_t origin0, origin1, origin2;
float texCoords0, texCoords1;
vector float floatColorVec0, floatColorVec1;
vector float modulateVec, colorVec, zero;
vector short colorShort;
vector signed int colorInt;
vector unsigned char floatColorVecPerm, modulatePerm, colorChar;
vector unsigned char vSel = VECCONST_UINT8(0x00, 0x00, 0x00, 0xff,
0x00, 0x00, 0x00, 0xff,
0x00, 0x00, 0x00, 0xff,
0x00, 0x00, 0x00, 0xff);
float *texCoords;
byte *colors;
byte clipBits[SHADER_MAX_VERTEXES];
float texCoordsArray[SHADER_MAX_VERTEXES][2];
byte colorArray[SHADER_MAX_VERTEXES][4];
glIndex_t hitIndexes[SHADER_MAX_INDEXES];
int numIndexes;
float scale;
float radius;
vec3_t floatColor;
float modulate = 0.0f;
if ( !backEnd.refdef.num_dlights ) {
return;
}
// There has to be a better way to do this so that floatColor
// and/or modulate are already 16-byte aligned.
floatColorVecPerm = vec_lvsl(0,(float *)floatColor);
modulatePerm = vec_lvsl(0,(float *)&modulate);
modulatePerm = (vector unsigned char)vec_splat((vector unsigned int)modulatePerm,0);
zero = (vector float)vec_splat_s8(0);
for ( l = 0 ; l < backEnd.refdef.num_dlights ; l++ ) {
dlight_t *dl;
if ( !( tess.dlightBits & ( 1 << l ) ) ) {
continue; // this surface definitely doesn't have any of this light
}
texCoords = texCoordsArray[0];
colors = colorArray[0];
dl = &backEnd.refdef.dlights[l];
origin0 = dl->transformed[0];
origin1 = dl->transformed[1];
origin2 = dl->transformed[2];
radius = dl->radius;
scale = 1.0f / radius;
if(r_greyscale->integer)
{
float luminance;
luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f;
floatColor[0] = floatColor[1] = floatColor[2] = luminance;
}
else if(r_greyscale->value)
{
float luminance;
luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f;
floatColor[0] = LERP(dl->color[0] * 255.0f, luminance, r_greyscale->value);
floatColor[1] = LERP(dl->color[1] * 255.0f, luminance, r_greyscale->value);
floatColor[2] = LERP(dl->color[2] * 255.0f, luminance, r_greyscale->value);
}
else
{
floatColor[0] = dl->color[0] * 255.0f;
floatColor[1] = dl->color[1] * 255.0f;
floatColor[2] = dl->color[2] * 255.0f;
}
floatColorVec0 = vec_ld(0, floatColor);
floatColorVec1 = vec_ld(11, floatColor);
floatColorVec0 = vec_perm(floatColorVec0,floatColorVec0,floatColorVecPerm);
for ( i = 0 ; i < tess.numVertexes ; i++, texCoords += 2, colors += 4 ) {
int clip = 0;
vec_t dist0, dist1, dist2;
dist0 = origin0 - tess.xyz[i][0];
dist1 = origin1 - tess.xyz[i][1];
dist2 = origin2 - tess.xyz[i][2];
backEnd.pc.c_dlightVertexes++;
texCoords0 = 0.5f + dist0 * scale;
texCoords1 = 0.5f + dist1 * scale;
if( !r_dlightBacks->integer &&
// dist . tess.normal[i]
( dist0 * tess.normal[i][0] +
dist1 * tess.normal[i][1] +
dist2 * tess.normal[i][2] ) < 0.0f ) {
clip = 63;
} else {
if ( texCoords0 < 0.0f ) {
clip |= 1;
} else if ( texCoords0 > 1.0f ) {
clip |= 2;
}
if ( texCoords1 < 0.0f ) {
clip |= 4;
} else if ( texCoords1 > 1.0f ) {
clip |= 8;
}
texCoords[0] = texCoords0;
texCoords[1] = texCoords1;
// modulate the strength based on the height and color
if ( dist2 > radius ) {
clip |= 16;
modulate = 0.0f;
} else if ( dist2 < -radius ) {
clip |= 32;
modulate = 0.0f;
} else {
dist2 = Q_fabs(dist2);
if ( dist2 < radius * 0.5f ) {
modulate = 1.0f;
} else {
modulate = 2.0f * (radius - dist2) * scale;
}
}
}
clipBits[i] = clip;
modulateVec = vec_ld(0,(float *)&modulate);
modulateVec = vec_perm(modulateVec,modulateVec,modulatePerm);
colorVec = vec_madd(floatColorVec0,modulateVec,zero);
colorInt = vec_cts(colorVec,0); // RGBx
colorShort = vec_pack(colorInt,colorInt); // RGBxRGBx
colorChar = vec_packsu(colorShort,colorShort); // RGBxRGBxRGBxRGBx
colorChar = vec_sel(colorChar,vSel,vSel); // RGBARGBARGBARGBA replace alpha with 255
vec_ste((vector unsigned int)colorChar,0,(unsigned int *)colors); // store color
}
// build a list of triangles that need light
numIndexes = 0;
for ( i = 0 ; i < tess.numIndexes ; i += 3 ) {
int a, b, c;
a = tess.indexes[i];
b = tess.indexes[i+1];
c = tess.indexes[i+2];
if ( clipBits[a] & clipBits[b] & clipBits[c] ) {
continue; // not lighted
}
hitIndexes[numIndexes] = a;
hitIndexes[numIndexes+1] = b;
hitIndexes[numIndexes+2] = c;
numIndexes += 3;
}
if ( !numIndexes ) {
continue;
}
qglEnableClientState( GL_TEXTURE_COORD_ARRAY );
qglTexCoordPointer( 2, GL_FLOAT, 0, texCoordsArray[0] );
qglEnableClientState( GL_COLOR_ARRAY );
qglColorPointer( 4, GL_UNSIGNED_BYTE, 0, colorArray );
GL_Bind( tr.dlightImage );
// include GLS_DEPTHFUNC_EQUAL so alpha tested surfaces don't add light
// where they aren't rendered
if ( dl->additive ) {
GL_State( GLS_SRCBLEND_ONE | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL );
}
else {
GL_State( GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL );
}
R_DrawElements( numIndexes, hitIndexes );
backEnd.pc.c_totalIndexes += numIndexes;
backEnd.pc.c_dlightIndexes += numIndexes;
}
}
void RB_CalcDiffuseColor_altivec( unsigned char *colors )
{
int i;
float *v, *normal;
trRefEntity_t *ent;
int ambientLightInt;
vec3_t lightDir;
int numVertexes;
vector unsigned char vSel = VECCONST_UINT8(0x00, 0x00, 0x00, 0xff,
0x00, 0x00, 0x00, 0xff,
0x00, 0x00, 0x00, 0xff,
0x00, 0x00, 0x00, 0xff);
vector float ambientLightVec;
vector float directedLightVec;
vector float lightDirVec;
vector float normalVec0, normalVec1;
vector float incomingVec0, incomingVec1, incomingVec2;
vector float zero, jVec;
vector signed int jVecInt;
vector signed short jVecShort;
vector unsigned char jVecChar, normalPerm;
ent = backEnd.currentEntity;
ambientLightInt = ent->ambientLightInt;
// A lot of this could be simplified if we made sure
// entities light info was 16-byte aligned.
jVecChar = vec_lvsl(0, ent->ambientLight);
ambientLightVec = vec_ld(0, (vector float *)ent->ambientLight);
jVec = vec_ld(11, (vector float *)ent->ambientLight);
ambientLightVec = vec_perm(ambientLightVec,jVec,jVecChar);
jVecChar = vec_lvsl(0, ent->directedLight);
directedLightVec = vec_ld(0,(vector float *)ent->directedLight);
jVec = vec_ld(11,(vector float *)ent->directedLight);
directedLightVec = vec_perm(directedLightVec,jVec,jVecChar);
jVecChar = vec_lvsl(0, ent->lightDir);
lightDirVec = vec_ld(0,(vector float *)ent->lightDir);
jVec = vec_ld(11,(vector float *)ent->lightDir);
lightDirVec = vec_perm(lightDirVec,jVec,jVecChar);
zero = (vector float)vec_splat_s8(0);
VectorCopy( ent->lightDir, lightDir );
v = tess.xyz[0];
normal = tess.normal[0];
normalPerm = vec_lvsl(0,normal);
numVertexes = tess.numVertexes;
for (i = 0 ; i < numVertexes ; i++, v += 4, normal += 4) {
normalVec0 = vec_ld(0,(vector float *)normal);
normalVec1 = vec_ld(11,(vector float *)normal);
normalVec0 = vec_perm(normalVec0,normalVec1,normalPerm);
incomingVec0 = vec_madd(normalVec0, lightDirVec, zero);
incomingVec1 = vec_sld(incomingVec0,incomingVec0,4);
incomingVec2 = vec_add(incomingVec0,incomingVec1);
incomingVec1 = vec_sld(incomingVec1,incomingVec1,4);
incomingVec2 = vec_add(incomingVec2,incomingVec1);
incomingVec0 = vec_splat(incomingVec2,0);
incomingVec0 = vec_max(incomingVec0,zero);
normalPerm = vec_lvsl(12,normal);
jVec = vec_madd(incomingVec0, directedLightVec, ambientLightVec);
jVecInt = vec_cts(jVec,0); // RGBx
jVecShort = vec_pack(jVecInt,jVecInt); // RGBxRGBx
jVecChar = vec_packsu(jVecShort,jVecShort); // RGBxRGBxRGBxRGBx
jVecChar = vec_sel(jVecChar,vSel,vSel); // RGBARGBARGBARGBA replace alpha with 255
vec_ste((vector unsigned int)jVecChar,0,(unsigned int *)&colors[i*4]); // store color
}
}
void LerpMeshVertexes_altivec(md3Surface_t *surf, float backlerp)
{
short *oldXyz, *newXyz, *oldNormals, *newNormals;
float *outXyz, *outNormal;
float oldXyzScale QALIGN(16);
float newXyzScale QALIGN(16);
float oldNormalScale QALIGN(16);
float newNormalScale QALIGN(16);
int vertNum;
unsigned lat, lng;
int numVerts;
outXyz = tess.xyz[tess.numVertexes];
outNormal = tess.normal[tess.numVertexes];
newXyz = (short *)((byte *)surf + surf->ofsXyzNormals)
+ (backEnd.currentEntity->e.frame * surf->numVerts * 4);
newNormals = newXyz + 3;
newXyzScale = MD3_XYZ_SCALE * (1.0 - backlerp);
newNormalScale = 1.0 - backlerp;
numVerts = surf->numVerts;
if ( backlerp == 0 ) {
vector signed short newNormalsVec0;
vector signed short newNormalsVec1;
vector signed int newNormalsIntVec;
vector float newNormalsFloatVec;
vector float newXyzScaleVec;
vector unsigned char newNormalsLoadPermute;
vector unsigned char newNormalsStorePermute;
vector float zero;
newNormalsStorePermute = vec_lvsl(0,(float *)&newXyzScaleVec);
newXyzScaleVec = *(vector float *)&newXyzScale;
newXyzScaleVec = vec_perm(newXyzScaleVec,newXyzScaleVec,newNormalsStorePermute);
newXyzScaleVec = vec_splat(newXyzScaleVec,0);
newNormalsLoadPermute = vec_lvsl(0,newXyz);
newNormalsStorePermute = vec_lvsr(0,outXyz);
zero = (vector float)vec_splat_s8(0);
//
// just copy the vertexes
//
for (vertNum=0 ; vertNum < numVerts ; vertNum++,
newXyz += 4, newNormals += 4,
outXyz += 4, outNormal += 4)
{
newNormalsLoadPermute = vec_lvsl(0,newXyz);
newNormalsStorePermute = vec_lvsr(0,outXyz);
newNormalsVec0 = vec_ld(0,newXyz);
newNormalsVec1 = vec_ld(16,newXyz);
newNormalsVec0 = vec_perm(newNormalsVec0,newNormalsVec1,newNormalsLoadPermute);
newNormalsIntVec = vec_unpackh(newNormalsVec0);
newNormalsFloatVec = vec_ctf(newNormalsIntVec,0);
newNormalsFloatVec = vec_madd(newNormalsFloatVec,newXyzScaleVec,zero);
newNormalsFloatVec = vec_perm(newNormalsFloatVec,newNormalsFloatVec,newNormalsStorePermute);
//outXyz[0] = newXyz[0] * newXyzScale;
//outXyz[1] = newXyz[1] * newXyzScale;
//outXyz[2] = newXyz[2] * newXyzScale;
lat = ( newNormals[0] >> 8 ) & 0xff;
lng = ( newNormals[0] & 0xff );
lat *= (FUNCTABLE_SIZE/256);
lng *= (FUNCTABLE_SIZE/256);
// decode X as cos( lat ) * sin( long )
// decode Y as sin( lat ) * sin( long )
// decode Z as cos( long )
outNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng];
outNormal[1] = tr.sinTable[lat] * tr.sinTable[lng];
outNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK];
vec_ste(newNormalsFloatVec,0,outXyz);
vec_ste(newNormalsFloatVec,4,outXyz);
vec_ste(newNormalsFloatVec,8,outXyz);
}
} else {
//
// interpolate and copy the vertex and normal
//
oldXyz = (short *)((byte *)surf + surf->ofsXyzNormals)
+ (backEnd.currentEntity->e.oldframe * surf->numVerts * 4);
oldNormals = oldXyz + 3;
oldXyzScale = MD3_XYZ_SCALE * backlerp;
oldNormalScale = backlerp;
for (vertNum=0 ; vertNum < numVerts ; vertNum++,
oldXyz += 4, newXyz += 4, oldNormals += 4, newNormals += 4,
outXyz += 4, outNormal += 4)
{
vec3_t uncompressedOldNormal, uncompressedNewNormal;
// interpolate the xyz
outXyz[0] = oldXyz[0] * oldXyzScale + newXyz[0] * newXyzScale;
outXyz[1] = oldXyz[1] * oldXyzScale + newXyz[1] * newXyzScale;
outXyz[2] = oldXyz[2] * oldXyzScale + newXyz[2] * newXyzScale;
// FIXME: interpolate lat/long instead?
lat = ( newNormals[0] >> 8 ) & 0xff;
lng = ( newNormals[0] & 0xff );
lat *= 4;
lng *= 4;
uncompressedNewNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng];
uncompressedNewNormal[1] = tr.sinTable[lat] * tr.sinTable[lng];
uncompressedNewNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK];
lat = ( oldNormals[0] >> 8 ) & 0xff;
lng = ( oldNormals[0] & 0xff );
lat *= 4;
lng *= 4;
uncompressedOldNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng];
uncompressedOldNormal[1] = tr.sinTable[lat] * tr.sinTable[lng];
uncompressedOldNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK];
outNormal[0] = uncompressedOldNormal[0] * oldNormalScale + uncompressedNewNormal[0] * newNormalScale;
outNormal[1] = uncompressedOldNormal[1] * oldNormalScale + uncompressedNewNormal[1] * newNormalScale;
outNormal[2] = uncompressedOldNormal[2] * oldNormalScale + uncompressedNewNormal[2] * newNormalScale;
// VectorNormalize (outNormal);
}
VectorArrayNormalize((vec4_t *)tess.normal[tess.numVertexes], numVerts);
}
}
#endif