/* =========================================================================== Copyright (C) 1999-2005 Id Software, Inc. This file is part of Quake III Arena source code. Quake III Arena source code is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. Quake III Arena source code is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Quake III Arena source code; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA =========================================================================== */ /* This file is only compiled for PowerPC builds with Altivec support. Altivec intrinsics need to be in a separate file, so GCC's -maltivec command line can enable them, but give us the option to _not_ use that on other files, where the compiler might then generate Altivec instructions for normal floating point, crashing on G3 (etc) processors. */ #include "tr_local.h" #if idppc_altivec #if !defined(__APPLE__) #include #endif void ProjectDlightTexture_altivec( void ) { int i, l; vec_t origin0, origin1, origin2; float texCoords0, texCoords1; vector float floatColorVec0, floatColorVec1; vector float modulateVec, colorVec, zero; vector short colorShort; vector signed int colorInt; vector unsigned char floatColorVecPerm, modulatePerm, colorChar; vector unsigned char vSel = VECCONST_UINT8(0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff); float *texCoords; byte *colors; byte clipBits[SHADER_MAX_VERTEXES]; float texCoordsArray[SHADER_MAX_VERTEXES][2]; byte colorArray[SHADER_MAX_VERTEXES][4]; glIndex_t hitIndexes[SHADER_MAX_INDEXES]; int numIndexes; float scale; float radius; vec3_t floatColor; float modulate = 0.0f; if ( !backEnd.refdef.num_dlights ) { return; } // There has to be a better way to do this so that floatColor // and/or modulate are already 16-byte aligned. floatColorVecPerm = vec_lvsl(0,(float *)floatColor); modulatePerm = vec_lvsl(0,(float *)&modulate); modulatePerm = (vector unsigned char)vec_splat((vector unsigned int)modulatePerm,0); zero = (vector float)vec_splat_s8(0); for ( l = 0 ; l < backEnd.refdef.num_dlights ; l++ ) { dlight_t *dl; if ( !( tess.dlightBits & ( 1 << l ) ) ) { continue; // this surface definitely doesn't have any of this light } texCoords = texCoordsArray[0]; colors = colorArray[0]; dl = &backEnd.refdef.dlights[l]; origin0 = dl->transformed[0]; origin1 = dl->transformed[1]; origin2 = dl->transformed[2]; radius = dl->radius; scale = 1.0f / radius; if(r_greyscale->integer) { float luminance; luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f; floatColor[0] = floatColor[1] = floatColor[2] = luminance; } else if(r_greyscale->value) { float luminance; luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f; floatColor[0] = LERP(dl->color[0] * 255.0f, luminance, r_greyscale->value); floatColor[1] = LERP(dl->color[1] * 255.0f, luminance, r_greyscale->value); floatColor[2] = LERP(dl->color[2] * 255.0f, luminance, r_greyscale->value); } else { floatColor[0] = dl->color[0] * 255.0f; floatColor[1] = dl->color[1] * 255.0f; floatColor[2] = dl->color[2] * 255.0f; } floatColorVec0 = vec_ld(0, floatColor); floatColorVec1 = vec_ld(11, floatColor); floatColorVec0 = vec_perm(floatColorVec0,floatColorVec0,floatColorVecPerm); for ( i = 0 ; i < tess.numVertexes ; i++, texCoords += 2, colors += 4 ) { int clip = 0; vec_t dist0, dist1, dist2; dist0 = origin0 - tess.xyz[i][0]; dist1 = origin1 - tess.xyz[i][1]; dist2 = origin2 - tess.xyz[i][2]; backEnd.pc.c_dlightVertexes++; texCoords0 = 0.5f + dist0 * scale; texCoords1 = 0.5f + dist1 * scale; if( !r_dlightBacks->integer && // dist . tess.normal[i] ( dist0 * tess.normal[i][0] + dist1 * tess.normal[i][1] + dist2 * tess.normal[i][2] ) < 0.0f ) { clip = 63; } else { if ( texCoords0 < 0.0f ) { clip |= 1; } else if ( texCoords0 > 1.0f ) { clip |= 2; } if ( texCoords1 < 0.0f ) { clip |= 4; } else if ( texCoords1 > 1.0f ) { clip |= 8; } texCoords[0] = texCoords0; texCoords[1] = texCoords1; // modulate the strength based on the height and color if ( dist2 > radius ) { clip |= 16; modulate = 0.0f; } else if ( dist2 < -radius ) { clip |= 32; modulate = 0.0f; } else { dist2 = Q_fabs(dist2); if ( dist2 < radius * 0.5f ) { modulate = 1.0f; } else { modulate = 2.0f * (radius - dist2) * scale; } } } clipBits[i] = clip; modulateVec = vec_ld(0,(float *)&modulate); modulateVec = vec_perm(modulateVec,modulateVec,modulatePerm); colorVec = vec_madd(floatColorVec0,modulateVec,zero); colorInt = vec_cts(colorVec,0); // RGBx colorShort = vec_pack(colorInt,colorInt); // RGBxRGBx colorChar = vec_packsu(colorShort,colorShort); // RGBxRGBxRGBxRGBx colorChar = vec_sel(colorChar,vSel,vSel); // RGBARGBARGBARGBA replace alpha with 255 vec_ste((vector unsigned int)colorChar,0,(unsigned int *)colors); // store color } // build a list of triangles that need light numIndexes = 0; for ( i = 0 ; i < tess.numIndexes ; i += 3 ) { int a, b, c; a = tess.indexes[i]; b = tess.indexes[i+1]; c = tess.indexes[i+2]; if ( clipBits[a] & clipBits[b] & clipBits[c] ) { continue; // not lighted } hitIndexes[numIndexes] = a; hitIndexes[numIndexes+1] = b; hitIndexes[numIndexes+2] = c; numIndexes += 3; } if ( !numIndexes ) { continue; } qglEnableClientState( GL_TEXTURE_COORD_ARRAY ); qglTexCoordPointer( 2, GL_FLOAT, 0, texCoordsArray[0] ); qglEnableClientState( GL_COLOR_ARRAY ); qglColorPointer( 4, GL_UNSIGNED_BYTE, 0, colorArray ); GL_Bind( tr.dlightImage ); // include GLS_DEPTHFUNC_EQUAL so alpha tested surfaces don't add light // where they aren't rendered if ( dl->additive ) { GL_State( GLS_SRCBLEND_ONE | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL ); } else { GL_State( GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL ); } R_DrawElements( numIndexes, hitIndexes ); backEnd.pc.c_totalIndexes += numIndexes; backEnd.pc.c_dlightIndexes += numIndexes; } } void RB_CalcDiffuseColor_altivec( unsigned char *colors ) { int i; float *v, *normal; trRefEntity_t *ent; int ambientLightInt; vec3_t lightDir; int numVertexes; vector unsigned char vSel = VECCONST_UINT8(0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff); vector float ambientLightVec; vector float directedLightVec; vector float lightDirVec; vector float normalVec0, normalVec1; vector float incomingVec0, incomingVec1, incomingVec2; vector float zero, jVec; vector signed int jVecInt; vector signed short jVecShort; vector unsigned char jVecChar, normalPerm; ent = backEnd.currentEntity; ambientLightInt = ent->ambientLightInt; // A lot of this could be simplified if we made sure // entities light info was 16-byte aligned. jVecChar = vec_lvsl(0, ent->ambientLight); ambientLightVec = vec_ld(0, (vector float *)ent->ambientLight); jVec = vec_ld(11, (vector float *)ent->ambientLight); ambientLightVec = vec_perm(ambientLightVec,jVec,jVecChar); jVecChar = vec_lvsl(0, ent->directedLight); directedLightVec = vec_ld(0,(vector float *)ent->directedLight); jVec = vec_ld(11,(vector float *)ent->directedLight); directedLightVec = vec_perm(directedLightVec,jVec,jVecChar); jVecChar = vec_lvsl(0, ent->lightDir); lightDirVec = vec_ld(0,(vector float *)ent->lightDir); jVec = vec_ld(11,(vector float *)ent->lightDir); lightDirVec = vec_perm(lightDirVec,jVec,jVecChar); zero = (vector float)vec_splat_s8(0); VectorCopy( ent->lightDir, lightDir ); v = tess.xyz[0]; normal = tess.normal[0]; normalPerm = vec_lvsl(0,normal); numVertexes = tess.numVertexes; for (i = 0 ; i < numVertexes ; i++, v += 4, normal += 4) { normalVec0 = vec_ld(0,(vector float *)normal); normalVec1 = vec_ld(11,(vector float *)normal); normalVec0 = vec_perm(normalVec0,normalVec1,normalPerm); incomingVec0 = vec_madd(normalVec0, lightDirVec, zero); incomingVec1 = vec_sld(incomingVec0,incomingVec0,4); incomingVec2 = vec_add(incomingVec0,incomingVec1); incomingVec1 = vec_sld(incomingVec1,incomingVec1,4); incomingVec2 = vec_add(incomingVec2,incomingVec1); incomingVec0 = vec_splat(incomingVec2,0); incomingVec0 = vec_max(incomingVec0,zero); normalPerm = vec_lvsl(12,normal); jVec = vec_madd(incomingVec0, directedLightVec, ambientLightVec); jVecInt = vec_cts(jVec,0); // RGBx jVecShort = vec_pack(jVecInt,jVecInt); // RGBxRGBx jVecChar = vec_packsu(jVecShort,jVecShort); // RGBxRGBxRGBxRGBx jVecChar = vec_sel(jVecChar,vSel,vSel); // RGBARGBARGBARGBA replace alpha with 255 vec_ste((vector unsigned int)jVecChar,0,(unsigned int *)&colors[i*4]); // store color } } void LerpMeshVertexes_altivec(md3Surface_t *surf, float backlerp) { short *oldXyz, *newXyz, *oldNormals, *newNormals; float *outXyz, *outNormal; float oldXyzScale QALIGN(16); float newXyzScale QALIGN(16); float oldNormalScale QALIGN(16); float newNormalScale QALIGN(16); int vertNum; unsigned lat, lng; int numVerts; outXyz = tess.xyz[tess.numVertexes]; outNormal = tess.normal[tess.numVertexes]; newXyz = (short *)((byte *)surf + surf->ofsXyzNormals) + (backEnd.currentEntity->e.frame * surf->numVerts * 4); newNormals = newXyz + 3; newXyzScale = MD3_XYZ_SCALE * (1.0 - backlerp); newNormalScale = 1.0 - backlerp; numVerts = surf->numVerts; if ( backlerp == 0 ) { vector signed short newNormalsVec0; vector signed short newNormalsVec1; vector signed int newNormalsIntVec; vector float newNormalsFloatVec; vector float newXyzScaleVec; vector unsigned char newNormalsLoadPermute; vector unsigned char newNormalsStorePermute; vector float zero; newNormalsStorePermute = vec_lvsl(0,(float *)&newXyzScaleVec); newXyzScaleVec = *(vector float *)&newXyzScale; newXyzScaleVec = vec_perm(newXyzScaleVec,newXyzScaleVec,newNormalsStorePermute); newXyzScaleVec = vec_splat(newXyzScaleVec,0); newNormalsLoadPermute = vec_lvsl(0,newXyz); newNormalsStorePermute = vec_lvsr(0,outXyz); zero = (vector float)vec_splat_s8(0); // // just copy the vertexes // for (vertNum=0 ; vertNum < numVerts ; vertNum++, newXyz += 4, newNormals += 4, outXyz += 4, outNormal += 4) { newNormalsLoadPermute = vec_lvsl(0,newXyz); newNormalsStorePermute = vec_lvsr(0,outXyz); newNormalsVec0 = vec_ld(0,newXyz); newNormalsVec1 = vec_ld(16,newXyz); newNormalsVec0 = vec_perm(newNormalsVec0,newNormalsVec1,newNormalsLoadPermute); newNormalsIntVec = vec_unpackh(newNormalsVec0); newNormalsFloatVec = vec_ctf(newNormalsIntVec,0); newNormalsFloatVec = vec_madd(newNormalsFloatVec,newXyzScaleVec,zero); newNormalsFloatVec = vec_perm(newNormalsFloatVec,newNormalsFloatVec,newNormalsStorePermute); //outXyz[0] = newXyz[0] * newXyzScale; //outXyz[1] = newXyz[1] * newXyzScale; //outXyz[2] = newXyz[2] * newXyzScale; lat = ( newNormals[0] >> 8 ) & 0xff; lng = ( newNormals[0] & 0xff ); lat *= (FUNCTABLE_SIZE/256); lng *= (FUNCTABLE_SIZE/256); // decode X as cos( lat ) * sin( long ) // decode Y as sin( lat ) * sin( long ) // decode Z as cos( long ) outNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng]; outNormal[1] = tr.sinTable[lat] * tr.sinTable[lng]; outNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK]; vec_ste(newNormalsFloatVec,0,outXyz); vec_ste(newNormalsFloatVec,4,outXyz); vec_ste(newNormalsFloatVec,8,outXyz); } } else { // // interpolate and copy the vertex and normal // oldXyz = (short *)((byte *)surf + surf->ofsXyzNormals) + (backEnd.currentEntity->e.oldframe * surf->numVerts * 4); oldNormals = oldXyz + 3; oldXyzScale = MD3_XYZ_SCALE * backlerp; oldNormalScale = backlerp; for (vertNum=0 ; vertNum < numVerts ; vertNum++, oldXyz += 4, newXyz += 4, oldNormals += 4, newNormals += 4, outXyz += 4, outNormal += 4) { vec3_t uncompressedOldNormal, uncompressedNewNormal; // interpolate the xyz outXyz[0] = oldXyz[0] * oldXyzScale + newXyz[0] * newXyzScale; outXyz[1] = oldXyz[1] * oldXyzScale + newXyz[1] * newXyzScale; outXyz[2] = oldXyz[2] * oldXyzScale + newXyz[2] * newXyzScale; // FIXME: interpolate lat/long instead? lat = ( newNormals[0] >> 8 ) & 0xff; lng = ( newNormals[0] & 0xff ); lat *= 4; lng *= 4; uncompressedNewNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng]; uncompressedNewNormal[1] = tr.sinTable[lat] * tr.sinTable[lng]; uncompressedNewNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK]; lat = ( oldNormals[0] >> 8 ) & 0xff; lng = ( oldNormals[0] & 0xff ); lat *= 4; lng *= 4; uncompressedOldNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng]; uncompressedOldNormal[1] = tr.sinTable[lat] * tr.sinTable[lng]; uncompressedOldNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK]; outNormal[0] = uncompressedOldNormal[0] * oldNormalScale + uncompressedNewNormal[0] * newNormalScale; outNormal[1] = uncompressedOldNormal[1] * oldNormalScale + uncompressedNewNormal[1] * newNormalScale; outNormal[2] = uncompressedOldNormal[2] * oldNormalScale + uncompressedNewNormal[2] * newNormalScale; // VectorNormalize (outNormal); } VectorArrayNormalize((vec4_t *)tess.normal[tess.numVertexes], numVerts); } } #endif