ioq3/code/renderergl1/tr_altivec.c

415 lines
14 KiB
C

/*
===========================================================================
Copyright (C) 1999-2005 Id Software, Inc.
This file is part of Quake III Arena source code.
Quake III Arena source code is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.
Quake III Arena source code is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Quake III Arena source code; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
===========================================================================
*/
/* This file is only compiled for PowerPC builds with Altivec support.
Altivec intrinsics need to be in a separate file, so GCC's -maltivec
command line can enable them, but give us the option to _not_ use that
on other files, where the compiler might then generate Altivec
instructions for normal floating point, crashing on G3 (etc) processors. */
#include "tr_local.h"
#if idppc_altivec
#if !defined(__APPLE__)
#include <altivec.h>
#endif
void ProjectDlightTexture_altivec( void ) {
int i, l;
vec_t origin0, origin1, origin2;
float texCoords0, texCoords1;
vector float floatColorVec0, floatColorVec1;
vector float modulateVec, colorVec, zero;
vector short colorShort;
vector signed int colorInt;
vector unsigned char floatColorVecPerm, modulatePerm, colorChar;
vector unsigned char vSel = VECCONST_UINT8(0x00, 0x00, 0x00, 0xff,
0x00, 0x00, 0x00, 0xff,
0x00, 0x00, 0x00, 0xff,
0x00, 0x00, 0x00, 0xff);
float *texCoords;
byte *colors;
byte clipBits[SHADER_MAX_VERTEXES];
float texCoordsArray[SHADER_MAX_VERTEXES][2];
byte colorArray[SHADER_MAX_VERTEXES][4];
glIndex_t hitIndexes[SHADER_MAX_INDEXES];
int numIndexes;
float scale;
float radius;
vec3_t floatColor;
float modulate = 0.0f;
if ( !backEnd.refdef.num_dlights ) {
return;
}
// There has to be a better way to do this so that floatColor
// and/or modulate are already 16-byte aligned.
floatColorVecPerm = vec_lvsl(0,(float *)floatColor);
modulatePerm = vec_lvsl(0,(float *)&modulate);
modulatePerm = (vector unsigned char)vec_splat((vector unsigned int)modulatePerm,0);
zero = (vector float)vec_splat_s8(0);
for ( l = 0 ; l < backEnd.refdef.num_dlights ; l++ ) {
dlight_t *dl;
if ( !( tess.dlightBits & ( 1 << l ) ) ) {
continue; // this surface definitely doesn't have any of this light
}
texCoords = texCoordsArray[0];
colors = colorArray[0];
dl = &backEnd.refdef.dlights[l];
origin0 = dl->transformed[0];
origin1 = dl->transformed[1];
origin2 = dl->transformed[2];
radius = dl->radius;
scale = 1.0f / radius;
if(r_greyscale->integer)
{
float luminance;
luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f;
floatColor[0] = floatColor[1] = floatColor[2] = luminance;
}
else if(r_greyscale->value)
{
float luminance;
luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f;
floatColor[0] = LERP(dl->color[0] * 255.0f, luminance, r_greyscale->value);
floatColor[1] = LERP(dl->color[1] * 255.0f, luminance, r_greyscale->value);
floatColor[2] = LERP(dl->color[2] * 255.0f, luminance, r_greyscale->value);
}
else
{
floatColor[0] = dl->color[0] * 255.0f;
floatColor[1] = dl->color[1] * 255.0f;
floatColor[2] = dl->color[2] * 255.0f;
}
floatColorVec0 = vec_ld(0, floatColor);
floatColorVec1 = vec_ld(11, floatColor);
floatColorVec0 = vec_perm(floatColorVec0,floatColorVec0,floatColorVecPerm);
for ( i = 0 ; i < tess.numVertexes ; i++, texCoords += 2, colors += 4 ) {
int clip = 0;
vec_t dist0, dist1, dist2;
dist0 = origin0 - tess.xyz[i][0];
dist1 = origin1 - tess.xyz[i][1];
dist2 = origin2 - tess.xyz[i][2];
backEnd.pc.c_dlightVertexes++;
texCoords0 = 0.5f + dist0 * scale;
texCoords1 = 0.5f + dist1 * scale;
if( !r_dlightBacks->integer &&
// dist . tess.normal[i]
( dist0 * tess.normal[i][0] +
dist1 * tess.normal[i][1] +
dist2 * tess.normal[i][2] ) < 0.0f ) {
clip = 63;
} else {
if ( texCoords0 < 0.0f ) {
clip |= 1;
} else if ( texCoords0 > 1.0f ) {
clip |= 2;
}
if ( texCoords1 < 0.0f ) {
clip |= 4;
} else if ( texCoords1 > 1.0f ) {
clip |= 8;
}
texCoords[0] = texCoords0;
texCoords[1] = texCoords1;
// modulate the strength based on the height and color
if ( dist2 > radius ) {
clip |= 16;
modulate = 0.0f;
} else if ( dist2 < -radius ) {
clip |= 32;
modulate = 0.0f;
} else {
dist2 = Q_fabs(dist2);
if ( dist2 < radius * 0.5f ) {
modulate = 1.0f;
} else {
modulate = 2.0f * (radius - dist2) * scale;
}
}
}
clipBits[i] = clip;
modulateVec = vec_ld(0,(float *)&modulate);
modulateVec = vec_perm(modulateVec,modulateVec,modulatePerm);
colorVec = vec_madd(floatColorVec0,modulateVec,zero);
colorInt = vec_cts(colorVec,0); // RGBx
colorShort = vec_pack(colorInt,colorInt); // RGBxRGBx
colorChar = vec_packsu(colorShort,colorShort); // RGBxRGBxRGBxRGBx
colorChar = vec_sel(colorChar,vSel,vSel); // RGBARGBARGBARGBA replace alpha with 255
vec_ste((vector unsigned int)colorChar,0,(unsigned int *)colors); // store color
}
// build a list of triangles that need light
numIndexes = 0;
for ( i = 0 ; i < tess.numIndexes ; i += 3 ) {
int a, b, c;
a = tess.indexes[i];
b = tess.indexes[i+1];
c = tess.indexes[i+2];
if ( clipBits[a] & clipBits[b] & clipBits[c] ) {
continue; // not lighted
}
hitIndexes[numIndexes] = a;
hitIndexes[numIndexes+1] = b;
hitIndexes[numIndexes+2] = c;
numIndexes += 3;
}
if ( !numIndexes ) {
continue;
}
qglEnableClientState( GL_TEXTURE_COORD_ARRAY );
qglTexCoordPointer( 2, GL_FLOAT, 0, texCoordsArray[0] );
qglEnableClientState( GL_COLOR_ARRAY );
qglColorPointer( 4, GL_UNSIGNED_BYTE, 0, colorArray );
GL_Bind( tr.dlightImage );
// include GLS_DEPTHFUNC_EQUAL so alpha tested surfaces don't add light
// where they aren't rendered
if ( dl->additive ) {
GL_State( GLS_SRCBLEND_ONE | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL );
}
else {
GL_State( GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL );
}
R_DrawElements( numIndexes, hitIndexes );
backEnd.pc.c_totalIndexes += numIndexes;
backEnd.pc.c_dlightIndexes += numIndexes;
}
}
void RB_CalcDiffuseColor_altivec( unsigned char *colors )
{
int i;
float *v, *normal;
trRefEntity_t *ent;
int ambientLightInt;
vec3_t lightDir;
int numVertexes;
vector unsigned char vSel = VECCONST_UINT8(0x00, 0x00, 0x00, 0xff,
0x00, 0x00, 0x00, 0xff,
0x00, 0x00, 0x00, 0xff,
0x00, 0x00, 0x00, 0xff);
vector float ambientLightVec;
vector float directedLightVec;
vector float lightDirVec;
vector float normalVec0, normalVec1;
vector float incomingVec0, incomingVec1, incomingVec2;
vector float zero, jVec;
vector signed int jVecInt;
vector signed short jVecShort;
vector unsigned char jVecChar, normalPerm;
ent = backEnd.currentEntity;
ambientLightInt = ent->ambientLightInt;
// A lot of this could be simplified if we made sure
// entities light info was 16-byte aligned.
jVecChar = vec_lvsl(0, ent->ambientLight);
ambientLightVec = vec_ld(0, (vector float *)ent->ambientLight);
jVec = vec_ld(11, (vector float *)ent->ambientLight);
ambientLightVec = vec_perm(ambientLightVec,jVec,jVecChar);
jVecChar = vec_lvsl(0, ent->directedLight);
directedLightVec = vec_ld(0,(vector float *)ent->directedLight);
jVec = vec_ld(11,(vector float *)ent->directedLight);
directedLightVec = vec_perm(directedLightVec,jVec,jVecChar);
jVecChar = vec_lvsl(0, ent->lightDir);
lightDirVec = vec_ld(0,(vector float *)ent->lightDir);
jVec = vec_ld(11,(vector float *)ent->lightDir);
lightDirVec = vec_perm(lightDirVec,jVec,jVecChar);
zero = (vector float)vec_splat_s8(0);
VectorCopy( ent->lightDir, lightDir );
v = tess.xyz[0];
normal = tess.normal[0];
normalPerm = vec_lvsl(0,normal);
numVertexes = tess.numVertexes;
for (i = 0 ; i < numVertexes ; i++, v += 4, normal += 4) {
normalVec0 = vec_ld(0,(vector float *)normal);
normalVec1 = vec_ld(11,(vector float *)normal);
normalVec0 = vec_perm(normalVec0,normalVec1,normalPerm);
incomingVec0 = vec_madd(normalVec0, lightDirVec, zero);
incomingVec1 = vec_sld(incomingVec0,incomingVec0,4);
incomingVec2 = vec_add(incomingVec0,incomingVec1);
incomingVec1 = vec_sld(incomingVec1,incomingVec1,4);
incomingVec2 = vec_add(incomingVec2,incomingVec1);
incomingVec0 = vec_splat(incomingVec2,0);
incomingVec0 = vec_max(incomingVec0,zero);
normalPerm = vec_lvsl(12,normal);
jVec = vec_madd(incomingVec0, directedLightVec, ambientLightVec);
jVecInt = vec_cts(jVec,0); // RGBx
jVecShort = vec_pack(jVecInt,jVecInt); // RGBxRGBx
jVecChar = vec_packsu(jVecShort,jVecShort); // RGBxRGBxRGBxRGBx
jVecChar = vec_sel(jVecChar,vSel,vSel); // RGBARGBARGBARGBA replace alpha with 255
vec_ste((vector unsigned int)jVecChar,0,(unsigned int *)&colors[i*4]); // store color
}
}
void LerpMeshVertexes_altivec(md3Surface_t *surf, float backlerp)
{
short *oldXyz, *newXyz, *oldNormals, *newNormals;
float *outXyz, *outNormal;
float oldXyzScale QALIGN(16);
float newXyzScale QALIGN(16);
float oldNormalScale QALIGN(16);
float newNormalScale QALIGN(16);
int vertNum;
unsigned lat, lng;
int numVerts;
outXyz = tess.xyz[tess.numVertexes];
outNormal = tess.normal[tess.numVertexes];
newXyz = (short *)((byte *)surf + surf->ofsXyzNormals)
+ (backEnd.currentEntity->e.frame * surf->numVerts * 4);
newNormals = newXyz + 3;
newXyzScale = MD3_XYZ_SCALE * (1.0 - backlerp);
newNormalScale = 1.0 - backlerp;
numVerts = surf->numVerts;
if ( backlerp == 0 ) {
vector signed short newNormalsVec0;
vector signed short newNormalsVec1;
vector signed int newNormalsIntVec;
vector float newNormalsFloatVec;
vector float newXyzScaleVec;
vector unsigned char newNormalsLoadPermute;
vector unsigned char newNormalsStorePermute;
vector float zero;
newNormalsStorePermute = vec_lvsl(0,(float *)&newXyzScaleVec);
newXyzScaleVec = *(vector float *)&newXyzScale;
newXyzScaleVec = vec_perm(newXyzScaleVec,newXyzScaleVec,newNormalsStorePermute);
newXyzScaleVec = vec_splat(newXyzScaleVec,0);
newNormalsLoadPermute = vec_lvsl(0,newXyz);
newNormalsStorePermute = vec_lvsr(0,outXyz);
zero = (vector float)vec_splat_s8(0);
//
// just copy the vertexes
//
for (vertNum=0 ; vertNum < numVerts ; vertNum++,
newXyz += 4, newNormals += 4,
outXyz += 4, outNormal += 4)
{
newNormalsLoadPermute = vec_lvsl(0,newXyz);
newNormalsStorePermute = vec_lvsr(0,outXyz);
newNormalsVec0 = vec_ld(0,newXyz);
newNormalsVec1 = vec_ld(16,newXyz);
newNormalsVec0 = vec_perm(newNormalsVec0,newNormalsVec1,newNormalsLoadPermute);
newNormalsIntVec = vec_unpackh(newNormalsVec0);
newNormalsFloatVec = vec_ctf(newNormalsIntVec,0);
newNormalsFloatVec = vec_madd(newNormalsFloatVec,newXyzScaleVec,zero);
newNormalsFloatVec = vec_perm(newNormalsFloatVec,newNormalsFloatVec,newNormalsStorePermute);
//outXyz[0] = newXyz[0] * newXyzScale;
//outXyz[1] = newXyz[1] * newXyzScale;
//outXyz[2] = newXyz[2] * newXyzScale;
lat = ( newNormals[0] >> 8 ) & 0xff;
lng = ( newNormals[0] & 0xff );
lat *= (FUNCTABLE_SIZE/256);
lng *= (FUNCTABLE_SIZE/256);
// decode X as cos( lat ) * sin( long )
// decode Y as sin( lat ) * sin( long )
// decode Z as cos( long )
outNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng];
outNormal[1] = tr.sinTable[lat] * tr.sinTable[lng];
outNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK];
vec_ste(newNormalsFloatVec,0,outXyz);
vec_ste(newNormalsFloatVec,4,outXyz);
vec_ste(newNormalsFloatVec,8,outXyz);
}
} else {
//
// interpolate and copy the vertex and normal
//
oldXyz = (short *)((byte *)surf + surf->ofsXyzNormals)
+ (backEnd.currentEntity->e.oldframe * surf->numVerts * 4);
oldNormals = oldXyz + 3;
oldXyzScale = MD3_XYZ_SCALE * backlerp;
oldNormalScale = backlerp;
for (vertNum=0 ; vertNum < numVerts ; vertNum++,
oldXyz += 4, newXyz += 4, oldNormals += 4, newNormals += 4,
outXyz += 4, outNormal += 4)
{
vec3_t uncompressedOldNormal, uncompressedNewNormal;
// interpolate the xyz
outXyz[0] = oldXyz[0] * oldXyzScale + newXyz[0] * newXyzScale;
outXyz[1] = oldXyz[1] * oldXyzScale + newXyz[1] * newXyzScale;
outXyz[2] = oldXyz[2] * oldXyzScale + newXyz[2] * newXyzScale;
// FIXME: interpolate lat/long instead?
lat = ( newNormals[0] >> 8 ) & 0xff;
lng = ( newNormals[0] & 0xff );
lat *= 4;
lng *= 4;
uncompressedNewNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng];
uncompressedNewNormal[1] = tr.sinTable[lat] * tr.sinTable[lng];
uncompressedNewNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK];
lat = ( oldNormals[0] >> 8 ) & 0xff;
lng = ( oldNormals[0] & 0xff );
lat *= 4;
lng *= 4;
uncompressedOldNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng];
uncompressedOldNormal[1] = tr.sinTable[lat] * tr.sinTable[lng];
uncompressedOldNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK];
outNormal[0] = uncompressedOldNormal[0] * oldNormalScale + uncompressedNewNormal[0] * newNormalScale;
outNormal[1] = uncompressedOldNormal[1] * oldNormalScale + uncompressedNewNormal[1] * newNormalScale;
outNormal[2] = uncompressedOldNormal[2] * oldNormalScale + uncompressedNewNormal[2] * newNormalScale;
// VectorNormalize (outNormal);
}
VectorArrayNormalize((vec4_t *)tess.normal[tess.numVertexes], numVerts);
}
}
#endif