diff --git a/Makefile b/Makefile index e85e1b8c..9d9757a5 100644 --- a/Makefile +++ b/Makefile @@ -348,11 +348,11 @@ ifneq (,$(findstring "$(PLATFORM)", "linux" "gnu_kfreebsd" "kfreebsd-gnu" "gnu") HAVE_VM_COMPILED=true else ifeq ($(ARCH),ppc) - BASE_CFLAGS += -maltivec + ALTIVEC_CFLAGS = -maltivec HAVE_VM_COMPILED=true endif ifeq ($(ARCH),ppc64) - BASE_CFLAGS += -maltivec + ALTIVEC_CFLAGS = -maltivec HAVE_VM_COMPILED=true endif ifeq ($(ARCH),sparc) @@ -440,10 +440,12 @@ ifeq ($(PLATFORM),darwin) -DMAC_OS_X_VERSION_MIN_REQUIRED=$(MAC_OS_X_VERSION_MIN_REQUIRED) ifeq ($(ARCH),ppc) - BASE_CFLAGS += -arch ppc -faltivec + BASE_CFLAGS += -arch ppc + ALTIVEC_CFLAGS = -faltivec endif ifeq ($(ARCH),ppc64) - BASE_CFLAGS += -arch ppc64 -faltivec + BASE_CFLAGS += -arch ppc64 + ALTIVEC_CFLAGS = -faltivec endif ifeq ($(ARCH),x86) OPTIMIZEVM += -march=prescott -mfpmath=sse @@ -1189,11 +1191,21 @@ $(echo_cmd) "CC $<" $(Q)$(CC) $(NOTSHLIBCFLAGS) $(CFLAGS) $(CLIENT_CFLAGS) $(OPTIMIZE) -o $@ -c $< endef +define DO_CC_ALTIVEC +$(echo_cmd) "CC $<" +$(Q)$(CC) $(NOTSHLIBCFLAGS) $(CFLAGS) $(CLIENT_CFLAGS) $(OPTIMIZE) $(ALTIVEC_CFLAGS) -o $@ -c $< +endef + define DO_REF_CC $(echo_cmd) "REF_CC $<" $(Q)$(CC) $(SHLIBCFLAGS) $(CFLAGS) $(CLIENT_CFLAGS) $(OPTIMIZE) -o $@ -c $< endef +define DO_REF_CC_ALTIVEC +$(echo_cmd) "REF_CC $<" +$(Q)$(CC) $(SHLIBCFLAGS) $(CFLAGS) $(CLIENT_CFLAGS) $(OPTIMIZE) $(ALTIVEC_CFLAGS) -o $@ -c $< +endef + define DO_REF_STR $(echo_cmd) "REF_STR $<" $(Q)rm -f $@ @@ -1664,6 +1676,7 @@ Q3OBJ = \ $(B)/client/net_ip.o \ $(B)/client/huffman.o \ \ + $(B)/client/snd_altivec.o \ $(B)/client/snd_adpcm.o \ $(B)/client/snd_dma.o \ $(B)/client/snd_mem.o \ @@ -1817,6 +1830,7 @@ Q3R2STRINGOBJ = \ $(B)/renderergl2/glsl/tonemap_vp.o Q3ROBJ = \ + $(B)/renderergl1/tr_altivec.o \ $(B)/renderergl1/tr_animation.o \ $(B)/renderergl1/tr_backend.o \ $(B)/renderergl1/tr_bsp.o \ @@ -2627,6 +2641,9 @@ $(B)/client/%.o: $(ASMDIR)/%.s $(B)/client/%.o: $(ASMDIR)/%.c $(DO_CC) -march=k8 +$(B)/client/snd_altivec.o: $(CDIR)/snd_altivec.c + $(DO_CC_ALTIVEC) + $(B)/client/%.o: $(CDIR)/%.c $(DO_CC) @@ -2691,6 +2708,9 @@ $(B)/renderergl1/%.o: $(RCOMMONDIR)/%.c $(B)/renderergl1/%.o: $(RGL1DIR)/%.c $(DO_REF_CC) +$(B)/renderergl1/tr_altivec.o: $(RGL1DIR)/tr_altivec.c + $(DO_REF_CC_ALTIVEC) + $(B)/renderergl2/glsl/%.c: $(RGL2DIR)/glsl/%.glsl $(DO_REF_STR) diff --git a/code/client/snd_altivec.c b/code/client/snd_altivec.c new file mode 100644 index 00000000..e051dda9 --- /dev/null +++ b/code/client/snd_altivec.c @@ -0,0 +1,229 @@ +/* +=========================================================================== +Copyright (C) 1999-2005 Id Software, Inc. + +This file is part of Quake III Arena source code. + +Quake III Arena source code is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Quake III Arena source code is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Quake III Arena source code; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +=========================================================================== +*/ + +/* This file is only compiled for PowerPC builds with Altivec support. + Altivec intrinsics need to be in a separate file, so GCC's -maltivec + command line can enable them, but give us the option to _not_ use that + on other files, where the compiler might then generate Altivec + instructions for normal floating point, crashing on G3 (etc) processors. */ + +#include "client.h" +#include "snd_local.h" + +#if idppc_altivec + +#if !defined(__APPLE__) +#include +#endif + +void S_PaintChannelFrom16_altivec( portable_samplepair_t paintbuffer[PAINTBUFFER_SIZE], int snd_vol, channel_t *ch, const sfx_t *sc, int count, int sampleOffset, int bufferOffset ) { + int data, aoff, boff; + int leftvol, rightvol; + int i, j; + portable_samplepair_t *samp; + sndBuffer *chunk; + short *samples; + float ooff, fdata[2], fdiv, fleftvol, frightvol; + + if (sc->soundChannels <= 0) { + return; + } + + samp = &paintbuffer[ bufferOffset ]; + + if (ch->doppler) { + sampleOffset = sampleOffset*ch->oldDopplerScale; + } + + if ( sc->soundChannels == 2 ) { + sampleOffset *= sc->soundChannels; + + if ( sampleOffset & 1 ) { + sampleOffset &= ~1; + } + } + + chunk = sc->soundData; + while (sampleOffset>=SND_CHUNK_SIZE) { + chunk = chunk->next; + sampleOffset -= SND_CHUNK_SIZE; + if (!chunk) { + chunk = sc->soundData; + } + } + + if (!ch->doppler || ch->dopplerScale==1.0f) { + vector signed short volume_vec; + vector unsigned int volume_shift; + int vectorCount, samplesLeft, chunkSamplesLeft; + leftvol = ch->leftvol*snd_vol; + rightvol = ch->rightvol*snd_vol; + samples = chunk->sndChunk; + ((short *)&volume_vec)[0] = leftvol; + ((short *)&volume_vec)[1] = leftvol; + ((short *)&volume_vec)[4] = leftvol; + ((short *)&volume_vec)[5] = leftvol; + ((short *)&volume_vec)[2] = rightvol; + ((short *)&volume_vec)[3] = rightvol; + ((short *)&volume_vec)[6] = rightvol; + ((short *)&volume_vec)[7] = rightvol; + volume_shift = vec_splat_u32(8); + i = 0; + + while(i < count) { + /* Try to align destination to 16-byte boundary */ + while(i < count && (((unsigned long)&samp[i] & 0x1f) || ((count-i) < 8) || ((SND_CHUNK_SIZE - sampleOffset) < 8))) { + data = samples[sampleOffset++]; + samp[i].left += (data * leftvol)>>8; + + if ( sc->soundChannels == 2 ) { + data = samples[sampleOffset++]; + } + samp[i].right += (data * rightvol)>>8; + + if (sampleOffset == SND_CHUNK_SIZE) { + chunk = chunk->next; + samples = chunk->sndChunk; + sampleOffset = 0; + } + i++; + } + /* Destination is now aligned. Process as many 8-sample + chunks as we can before we run out of room from the current + sound chunk. We do 8 per loop to avoid extra source data reads. */ + samplesLeft = count - i; + chunkSamplesLeft = SND_CHUNK_SIZE - sampleOffset; + if(samplesLeft > chunkSamplesLeft) + samplesLeft = chunkSamplesLeft; + + vectorCount = samplesLeft / 8; + + if(vectorCount) + { + vector unsigned char tmp; + vector short s0, s1, sampleData0, sampleData1; + vector signed int merge0, merge1; + vector signed int d0, d1, d2, d3; + vector unsigned char samplePermute0 = + VECCONST_UINT8(0, 1, 4, 5, 0, 1, 4, 5, 2, 3, 6, 7, 2, 3, 6, 7); + vector unsigned char samplePermute1 = + VECCONST_UINT8(8, 9, 12, 13, 8, 9, 12, 13, 10, 11, 14, 15, 10, 11, 14, 15); + vector unsigned char loadPermute0, loadPermute1; + + // Rather than permute the vectors after we load them to do the sample + // replication and rearrangement, we permute the alignment vector so + // we do everything in one step below and avoid data shuffling. + tmp = vec_lvsl(0,&samples[sampleOffset]); + loadPermute0 = vec_perm(tmp,tmp,samplePermute0); + loadPermute1 = vec_perm(tmp,tmp,samplePermute1); + + s0 = *(vector short *)&samples[sampleOffset]; + while(vectorCount) + { + /* Load up source (16-bit) sample data */ + s1 = *(vector short *)&samples[sampleOffset+7]; + + /* Load up destination sample data */ + d0 = *(vector signed int *)&samp[i]; + d1 = *(vector signed int *)&samp[i+2]; + d2 = *(vector signed int *)&samp[i+4]; + d3 = *(vector signed int *)&samp[i+6]; + + sampleData0 = vec_perm(s0,s1,loadPermute0); + sampleData1 = vec_perm(s0,s1,loadPermute1); + + merge0 = vec_mule(sampleData0,volume_vec); + merge0 = vec_sra(merge0,volume_shift); /* Shift down to proper range */ + + merge1 = vec_mulo(sampleData0,volume_vec); + merge1 = vec_sra(merge1,volume_shift); + + d0 = vec_add(merge0,d0); + d1 = vec_add(merge1,d1); + + merge0 = vec_mule(sampleData1,volume_vec); + merge0 = vec_sra(merge0,volume_shift); /* Shift down to proper range */ + + merge1 = vec_mulo(sampleData1,volume_vec); + merge1 = vec_sra(merge1,volume_shift); + + d2 = vec_add(merge0,d2); + d3 = vec_add(merge1,d3); + + /* Store destination sample data */ + *(vector signed int *)&samp[i] = d0; + *(vector signed int *)&samp[i+2] = d1; + *(vector signed int *)&samp[i+4] = d2; + *(vector signed int *)&samp[i+6] = d3; + + i += 8; + vectorCount--; + s0 = s1; + sampleOffset += 8; + } + if (sampleOffset == SND_CHUNK_SIZE) { + chunk = chunk->next; + samples = chunk->sndChunk; + sampleOffset = 0; + } + } + } + } else { + fleftvol = ch->leftvol*snd_vol; + frightvol = ch->rightvol*snd_vol; + + ooff = sampleOffset; + samples = chunk->sndChunk; + + for ( i=0 ; idopplerScale * sc->soundChannels; + boff = ooff; + fdata[0] = fdata[1] = 0; + for (j=aoff; jsoundChannels) { + if (j == SND_CHUNK_SIZE) { + chunk = chunk->next; + if (!chunk) { + chunk = sc->soundData; + } + samples = chunk->sndChunk; + ooff -= SND_CHUNK_SIZE; + } + if ( sc->soundChannels == 2 ) { + fdata[0] += samples[j&(SND_CHUNK_SIZE-1)]; + fdata[1] += samples[(j+1)&(SND_CHUNK_SIZE-1)]; + } else { + fdata[0] += samples[j&(SND_CHUNK_SIZE-1)]; + fdata[1] += samples[j&(SND_CHUNK_SIZE-1)]; + } + } + fdiv = 256 * (boff-aoff) / sc->soundChannels; + samp[i].left += (fdata[0] * fleftvol)/fdiv; + samp[i].right += (fdata[1] * frightvol)/fdiv; + } + } +} + + +#endif + diff --git a/code/client/snd_local.h b/code/client/snd_local.h index dc56f1bc..3b86d974 100644 --- a/code/client/snd_local.h +++ b/code/client/snd_local.h @@ -264,3 +264,7 @@ typedef enum typedef int srcHandle_t; qboolean S_AL_Init( soundInterface_t *si ); + +#ifdef idppc_altivec +void S_PaintChannelFrom16_altivec( portable_samplepair_t paintbuffer[PAINTBUFFER_SIZE], int snd_vol, channel_t *ch, const sfx_t *sc, int count, int sampleOffset, int bufferOffset ); +#endif diff --git a/code/client/snd_mix.c b/code/client/snd_mix.c index 99fe03c5..af0ba44a 100644 --- a/code/client/snd_mix.c +++ b/code/client/snd_mix.c @@ -23,9 +23,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "client.h" #include "snd_local.h" -#if idppc_altivec && !defined(__APPLE__) -#include -#endif static portable_samplepair_t paintbuffer[PAINTBUFFER_SIZE]; static int snd_vol; @@ -241,197 +238,6 @@ CHANNEL MIXING =============================================================================== */ -#if idppc_altivec -static void S_PaintChannelFrom16_altivec( channel_t *ch, const sfx_t *sc, int count, int sampleOffset, int bufferOffset ) { - int data, aoff, boff; - int leftvol, rightvol; - int i, j; - portable_samplepair_t *samp; - sndBuffer *chunk; - short *samples; - float ooff, fdata[2], fdiv, fleftvol, frightvol; - - if (sc->soundChannels <= 0) { - return; - } - - samp = &paintbuffer[ bufferOffset ]; - - if (ch->doppler) { - sampleOffset = sampleOffset*ch->oldDopplerScale; - } - - if ( sc->soundChannels == 2 ) { - sampleOffset *= sc->soundChannels; - - if ( sampleOffset & 1 ) { - sampleOffset &= ~1; - } - } - - chunk = sc->soundData; - while (sampleOffset>=SND_CHUNK_SIZE) { - chunk = chunk->next; - sampleOffset -= SND_CHUNK_SIZE; - if (!chunk) { - chunk = sc->soundData; - } - } - - if (!ch->doppler || ch->dopplerScale==1.0f) { - vector signed short volume_vec; - vector unsigned int volume_shift; - int vectorCount, samplesLeft, chunkSamplesLeft; - leftvol = ch->leftvol*snd_vol; - rightvol = ch->rightvol*snd_vol; - samples = chunk->sndChunk; - ((short *)&volume_vec)[0] = leftvol; - ((short *)&volume_vec)[1] = leftvol; - ((short *)&volume_vec)[4] = leftvol; - ((short *)&volume_vec)[5] = leftvol; - ((short *)&volume_vec)[2] = rightvol; - ((short *)&volume_vec)[3] = rightvol; - ((short *)&volume_vec)[6] = rightvol; - ((short *)&volume_vec)[7] = rightvol; - volume_shift = vec_splat_u32(8); - i = 0; - - while(i < count) { - /* Try to align destination to 16-byte boundary */ - while(i < count && (((unsigned long)&samp[i] & 0x1f) || ((count-i) < 8) || ((SND_CHUNK_SIZE - sampleOffset) < 8))) { - data = samples[sampleOffset++]; - samp[i].left += (data * leftvol)>>8; - - if ( sc->soundChannels == 2 ) { - data = samples[sampleOffset++]; - } - samp[i].right += (data * rightvol)>>8; - - if (sampleOffset == SND_CHUNK_SIZE) { - chunk = chunk->next; - samples = chunk->sndChunk; - sampleOffset = 0; - } - i++; - } - /* Destination is now aligned. Process as many 8-sample - chunks as we can before we run out of room from the current - sound chunk. We do 8 per loop to avoid extra source data reads. */ - samplesLeft = count - i; - chunkSamplesLeft = SND_CHUNK_SIZE - sampleOffset; - if(samplesLeft > chunkSamplesLeft) - samplesLeft = chunkSamplesLeft; - - vectorCount = samplesLeft / 8; - - if(vectorCount) - { - vector unsigned char tmp; - vector short s0, s1, sampleData0, sampleData1; - vector signed int merge0, merge1; - vector signed int d0, d1, d2, d3; - vector unsigned char samplePermute0 = - VECCONST_UINT8(0, 1, 4, 5, 0, 1, 4, 5, 2, 3, 6, 7, 2, 3, 6, 7); - vector unsigned char samplePermute1 = - VECCONST_UINT8(8, 9, 12, 13, 8, 9, 12, 13, 10, 11, 14, 15, 10, 11, 14, 15); - vector unsigned char loadPermute0, loadPermute1; - - // Rather than permute the vectors after we load them to do the sample - // replication and rearrangement, we permute the alignment vector so - // we do everything in one step below and avoid data shuffling. - tmp = vec_lvsl(0,&samples[sampleOffset]); - loadPermute0 = vec_perm(tmp,tmp,samplePermute0); - loadPermute1 = vec_perm(tmp,tmp,samplePermute1); - - s0 = *(vector short *)&samples[sampleOffset]; - while(vectorCount) - { - /* Load up source (16-bit) sample data */ - s1 = *(vector short *)&samples[sampleOffset+7]; - - /* Load up destination sample data */ - d0 = *(vector signed int *)&samp[i]; - d1 = *(vector signed int *)&samp[i+2]; - d2 = *(vector signed int *)&samp[i+4]; - d3 = *(vector signed int *)&samp[i+6]; - - sampleData0 = vec_perm(s0,s1,loadPermute0); - sampleData1 = vec_perm(s0,s1,loadPermute1); - - merge0 = vec_mule(sampleData0,volume_vec); - merge0 = vec_sra(merge0,volume_shift); /* Shift down to proper range */ - - merge1 = vec_mulo(sampleData0,volume_vec); - merge1 = vec_sra(merge1,volume_shift); - - d0 = vec_add(merge0,d0); - d1 = vec_add(merge1,d1); - - merge0 = vec_mule(sampleData1,volume_vec); - merge0 = vec_sra(merge0,volume_shift); /* Shift down to proper range */ - - merge1 = vec_mulo(sampleData1,volume_vec); - merge1 = vec_sra(merge1,volume_shift); - - d2 = vec_add(merge0,d2); - d3 = vec_add(merge1,d3); - - /* Store destination sample data */ - *(vector signed int *)&samp[i] = d0; - *(vector signed int *)&samp[i+2] = d1; - *(vector signed int *)&samp[i+4] = d2; - *(vector signed int *)&samp[i+6] = d3; - - i += 8; - vectorCount--; - s0 = s1; - sampleOffset += 8; - } - if (sampleOffset == SND_CHUNK_SIZE) { - chunk = chunk->next; - samples = chunk->sndChunk; - sampleOffset = 0; - } - } - } - } else { - fleftvol = ch->leftvol*snd_vol; - frightvol = ch->rightvol*snd_vol; - - ooff = sampleOffset; - samples = chunk->sndChunk; - - for ( i=0 ; idopplerScale * sc->soundChannels; - boff = ooff; - fdata[0] = fdata[1] = 0; - for (j=aoff; jsoundChannels) { - if (j == SND_CHUNK_SIZE) { - chunk = chunk->next; - if (!chunk) { - chunk = sc->soundData; - } - samples = chunk->sndChunk; - ooff -= SND_CHUNK_SIZE; - } - if ( sc->soundChannels == 2 ) { - fdata[0] += samples[j&(SND_CHUNK_SIZE-1)]; - fdata[1] += samples[(j+1)&(SND_CHUNK_SIZE-1)]; - } else { - fdata[0] += samples[j&(SND_CHUNK_SIZE-1)]; - fdata[1] += samples[j&(SND_CHUNK_SIZE-1)]; - } - } - fdiv = 256 * (boff-aoff) / sc->soundChannels; - samp[i].left += (fdata[0] * fleftvol)/fdiv; - samp[i].right += (fdata[1] * frightvol)/fdiv; - } - } -} -#endif - static void S_PaintChannelFrom16_scalar( channel_t *ch, const sfx_t *sc, int count, int sampleOffset, int bufferOffset ) { int data, aoff, boff; int leftvol, rightvol; @@ -530,8 +336,8 @@ static void S_PaintChannelFrom16_scalar( channel_t *ch, const sfx_t *sc, int cou static void S_PaintChannelFrom16( channel_t *ch, const sfx_t *sc, int count, int sampleOffset, int bufferOffset ) { #if idppc_altivec if (com_altivec->integer) { - // must be in a separate function or G3 systems will crash. - S_PaintChannelFrom16_altivec( ch, sc, count, sampleOffset, bufferOffset ); + // must be in a separate translation unit or G3 systems will crash. + S_PaintChannelFrom16_altivec( paintbuffer, snd_vol, ch, sc, count, sampleOffset, bufferOffset ); return; } #endif diff --git a/code/renderergl1/tr_altivec.c b/code/renderergl1/tr_altivec.c new file mode 100644 index 00000000..914aa448 --- /dev/null +++ b/code/renderergl1/tr_altivec.c @@ -0,0 +1,414 @@ +/* +=========================================================================== +Copyright (C) 1999-2005 Id Software, Inc. + +This file is part of Quake III Arena source code. + +Quake III Arena source code is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Quake III Arena source code is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Quake III Arena source code; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +=========================================================================== +*/ + +/* This file is only compiled for PowerPC builds with Altivec support. + Altivec intrinsics need to be in a separate file, so GCC's -maltivec + command line can enable them, but give us the option to _not_ use that + on other files, where the compiler might then generate Altivec + instructions for normal floating point, crashing on G3 (etc) processors. */ + +#include "tr_local.h" + +#if idppc_altivec + +#if !defined(__APPLE__) +#include +#endif + +void ProjectDlightTexture_altivec( void ) { + int i, l; + vec_t origin0, origin1, origin2; + float texCoords0, texCoords1; + vector float floatColorVec0, floatColorVec1; + vector float modulateVec, colorVec, zero; + vector short colorShort; + vector signed int colorInt; + vector unsigned char floatColorVecPerm, modulatePerm, colorChar; + vector unsigned char vSel = VECCONST_UINT8(0x00, 0x00, 0x00, 0xff, + 0x00, 0x00, 0x00, 0xff, + 0x00, 0x00, 0x00, 0xff, + 0x00, 0x00, 0x00, 0xff); + float *texCoords; + byte *colors; + byte clipBits[SHADER_MAX_VERTEXES]; + float texCoordsArray[SHADER_MAX_VERTEXES][2]; + byte colorArray[SHADER_MAX_VERTEXES][4]; + glIndex_t hitIndexes[SHADER_MAX_INDEXES]; + int numIndexes; + float scale; + float radius; + vec3_t floatColor; + float modulate = 0.0f; + + if ( !backEnd.refdef.num_dlights ) { + return; + } + + // There has to be a better way to do this so that floatColor + // and/or modulate are already 16-byte aligned. + floatColorVecPerm = vec_lvsl(0,(float *)floatColor); + modulatePerm = vec_lvsl(0,(float *)&modulate); + modulatePerm = (vector unsigned char)vec_splat((vector unsigned int)modulatePerm,0); + zero = (vector float)vec_splat_s8(0); + + for ( l = 0 ; l < backEnd.refdef.num_dlights ; l++ ) { + dlight_t *dl; + + if ( !( tess.dlightBits & ( 1 << l ) ) ) { + continue; // this surface definitely doesn't have any of this light + } + texCoords = texCoordsArray[0]; + colors = colorArray[0]; + + dl = &backEnd.refdef.dlights[l]; + origin0 = dl->transformed[0]; + origin1 = dl->transformed[1]; + origin2 = dl->transformed[2]; + radius = dl->radius; + scale = 1.0f / radius; + + if(r_greyscale->integer) + { + float luminance; + + luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f; + floatColor[0] = floatColor[1] = floatColor[2] = luminance; + } + else if(r_greyscale->value) + { + float luminance; + + luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f; + floatColor[0] = LERP(dl->color[0] * 255.0f, luminance, r_greyscale->value); + floatColor[1] = LERP(dl->color[1] * 255.0f, luminance, r_greyscale->value); + floatColor[2] = LERP(dl->color[2] * 255.0f, luminance, r_greyscale->value); + } + else + { + floatColor[0] = dl->color[0] * 255.0f; + floatColor[1] = dl->color[1] * 255.0f; + floatColor[2] = dl->color[2] * 255.0f; + } + floatColorVec0 = vec_ld(0, floatColor); + floatColorVec1 = vec_ld(11, floatColor); + floatColorVec0 = vec_perm(floatColorVec0,floatColorVec0,floatColorVecPerm); + for ( i = 0 ; i < tess.numVertexes ; i++, texCoords += 2, colors += 4 ) { + int clip = 0; + vec_t dist0, dist1, dist2; + + dist0 = origin0 - tess.xyz[i][0]; + dist1 = origin1 - tess.xyz[i][1]; + dist2 = origin2 - tess.xyz[i][2]; + + backEnd.pc.c_dlightVertexes++; + + texCoords0 = 0.5f + dist0 * scale; + texCoords1 = 0.5f + dist1 * scale; + + if( !r_dlightBacks->integer && + // dist . tess.normal[i] + ( dist0 * tess.normal[i][0] + + dist1 * tess.normal[i][1] + + dist2 * tess.normal[i][2] ) < 0.0f ) { + clip = 63; + } else { + if ( texCoords0 < 0.0f ) { + clip |= 1; + } else if ( texCoords0 > 1.0f ) { + clip |= 2; + } + if ( texCoords1 < 0.0f ) { + clip |= 4; + } else if ( texCoords1 > 1.0f ) { + clip |= 8; + } + texCoords[0] = texCoords0; + texCoords[1] = texCoords1; + + // modulate the strength based on the height and color + if ( dist2 > radius ) { + clip |= 16; + modulate = 0.0f; + } else if ( dist2 < -radius ) { + clip |= 32; + modulate = 0.0f; + } else { + dist2 = Q_fabs(dist2); + if ( dist2 < radius * 0.5f ) { + modulate = 1.0f; + } else { + modulate = 2.0f * (radius - dist2) * scale; + } + } + } + clipBits[i] = clip; + + modulateVec = vec_ld(0,(float *)&modulate); + modulateVec = vec_perm(modulateVec,modulateVec,modulatePerm); + colorVec = vec_madd(floatColorVec0,modulateVec,zero); + colorInt = vec_cts(colorVec,0); // RGBx + colorShort = vec_pack(colorInt,colorInt); // RGBxRGBx + colorChar = vec_packsu(colorShort,colorShort); // RGBxRGBxRGBxRGBx + colorChar = vec_sel(colorChar,vSel,vSel); // RGBARGBARGBARGBA replace alpha with 255 + vec_ste((vector unsigned int)colorChar,0,(unsigned int *)colors); // store color + } + + // build a list of triangles that need light + numIndexes = 0; + for ( i = 0 ; i < tess.numIndexes ; i += 3 ) { + int a, b, c; + + a = tess.indexes[i]; + b = tess.indexes[i+1]; + c = tess.indexes[i+2]; + if ( clipBits[a] & clipBits[b] & clipBits[c] ) { + continue; // not lighted + } + hitIndexes[numIndexes] = a; + hitIndexes[numIndexes+1] = b; + hitIndexes[numIndexes+2] = c; + numIndexes += 3; + } + + if ( !numIndexes ) { + continue; + } + + qglEnableClientState( GL_TEXTURE_COORD_ARRAY ); + qglTexCoordPointer( 2, GL_FLOAT, 0, texCoordsArray[0] ); + + qglEnableClientState( GL_COLOR_ARRAY ); + qglColorPointer( 4, GL_UNSIGNED_BYTE, 0, colorArray ); + + GL_Bind( tr.dlightImage ); + // include GLS_DEPTHFUNC_EQUAL so alpha tested surfaces don't add light + // where they aren't rendered + if ( dl->additive ) { + GL_State( GLS_SRCBLEND_ONE | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL ); + } + else { + GL_State( GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL ); + } + R_DrawElements( numIndexes, hitIndexes ); + backEnd.pc.c_totalIndexes += numIndexes; + backEnd.pc.c_dlightIndexes += numIndexes; + } +} + +void RB_CalcDiffuseColor_altivec( unsigned char *colors ) +{ + int i; + float *v, *normal; + trRefEntity_t *ent; + int ambientLightInt; + vec3_t lightDir; + int numVertexes; + vector unsigned char vSel = VECCONST_UINT8(0x00, 0x00, 0x00, 0xff, + 0x00, 0x00, 0x00, 0xff, + 0x00, 0x00, 0x00, 0xff, + 0x00, 0x00, 0x00, 0xff); + vector float ambientLightVec; + vector float directedLightVec; + vector float lightDirVec; + vector float normalVec0, normalVec1; + vector float incomingVec0, incomingVec1, incomingVec2; + vector float zero, jVec; + vector signed int jVecInt; + vector signed short jVecShort; + vector unsigned char jVecChar, normalPerm; + ent = backEnd.currentEntity; + ambientLightInt = ent->ambientLightInt; + // A lot of this could be simplified if we made sure + // entities light info was 16-byte aligned. + jVecChar = vec_lvsl(0, ent->ambientLight); + ambientLightVec = vec_ld(0, (vector float *)ent->ambientLight); + jVec = vec_ld(11, (vector float *)ent->ambientLight); + ambientLightVec = vec_perm(ambientLightVec,jVec,jVecChar); + + jVecChar = vec_lvsl(0, ent->directedLight); + directedLightVec = vec_ld(0,(vector float *)ent->directedLight); + jVec = vec_ld(11,(vector float *)ent->directedLight); + directedLightVec = vec_perm(directedLightVec,jVec,jVecChar); + + jVecChar = vec_lvsl(0, ent->lightDir); + lightDirVec = vec_ld(0,(vector float *)ent->lightDir); + jVec = vec_ld(11,(vector float *)ent->lightDir); + lightDirVec = vec_perm(lightDirVec,jVec,jVecChar); + + zero = (vector float)vec_splat_s8(0); + VectorCopy( ent->lightDir, lightDir ); + + v = tess.xyz[0]; + normal = tess.normal[0]; + + normalPerm = vec_lvsl(0,normal); + numVertexes = tess.numVertexes; + for (i = 0 ; i < numVertexes ; i++, v += 4, normal += 4) { + normalVec0 = vec_ld(0,(vector float *)normal); + normalVec1 = vec_ld(11,(vector float *)normal); + normalVec0 = vec_perm(normalVec0,normalVec1,normalPerm); + incomingVec0 = vec_madd(normalVec0, lightDirVec, zero); + incomingVec1 = vec_sld(incomingVec0,incomingVec0,4); + incomingVec2 = vec_add(incomingVec0,incomingVec1); + incomingVec1 = vec_sld(incomingVec1,incomingVec1,4); + incomingVec2 = vec_add(incomingVec2,incomingVec1); + incomingVec0 = vec_splat(incomingVec2,0); + incomingVec0 = vec_max(incomingVec0,zero); + normalPerm = vec_lvsl(12,normal); + jVec = vec_madd(incomingVec0, directedLightVec, ambientLightVec); + jVecInt = vec_cts(jVec,0); // RGBx + jVecShort = vec_pack(jVecInt,jVecInt); // RGBxRGBx + jVecChar = vec_packsu(jVecShort,jVecShort); // RGBxRGBxRGBxRGBx + jVecChar = vec_sel(jVecChar,vSel,vSel); // RGBARGBARGBARGBA replace alpha with 255 + vec_ste((vector unsigned int)jVecChar,0,(unsigned int *)&colors[i*4]); // store color + } +} + +void LerpMeshVertexes_altivec(md3Surface_t *surf, float backlerp) +{ + short *oldXyz, *newXyz, *oldNormals, *newNormals; + float *outXyz, *outNormal; + float oldXyzScale QALIGN(16); + float newXyzScale QALIGN(16); + float oldNormalScale QALIGN(16); + float newNormalScale QALIGN(16); + int vertNum; + unsigned lat, lng; + int numVerts; + + outXyz = tess.xyz[tess.numVertexes]; + outNormal = tess.normal[tess.numVertexes]; + + newXyz = (short *)((byte *)surf + surf->ofsXyzNormals) + + (backEnd.currentEntity->e.frame * surf->numVerts * 4); + newNormals = newXyz + 3; + + newXyzScale = MD3_XYZ_SCALE * (1.0 - backlerp); + newNormalScale = 1.0 - backlerp; + + numVerts = surf->numVerts; + + if ( backlerp == 0 ) { + vector signed short newNormalsVec0; + vector signed short newNormalsVec1; + vector signed int newNormalsIntVec; + vector float newNormalsFloatVec; + vector float newXyzScaleVec; + vector unsigned char newNormalsLoadPermute; + vector unsigned char newNormalsStorePermute; + vector float zero; + + newNormalsStorePermute = vec_lvsl(0,(float *)&newXyzScaleVec); + newXyzScaleVec = *(vector float *)&newXyzScale; + newXyzScaleVec = vec_perm(newXyzScaleVec,newXyzScaleVec,newNormalsStorePermute); + newXyzScaleVec = vec_splat(newXyzScaleVec,0); + newNormalsLoadPermute = vec_lvsl(0,newXyz); + newNormalsStorePermute = vec_lvsr(0,outXyz); + zero = (vector float)vec_splat_s8(0); + // + // just copy the vertexes + // + for (vertNum=0 ; vertNum < numVerts ; vertNum++, + newXyz += 4, newNormals += 4, + outXyz += 4, outNormal += 4) + { + newNormalsLoadPermute = vec_lvsl(0,newXyz); + newNormalsStorePermute = vec_lvsr(0,outXyz); + newNormalsVec0 = vec_ld(0,newXyz); + newNormalsVec1 = vec_ld(16,newXyz); + newNormalsVec0 = vec_perm(newNormalsVec0,newNormalsVec1,newNormalsLoadPermute); + newNormalsIntVec = vec_unpackh(newNormalsVec0); + newNormalsFloatVec = vec_ctf(newNormalsIntVec,0); + newNormalsFloatVec = vec_madd(newNormalsFloatVec,newXyzScaleVec,zero); + newNormalsFloatVec = vec_perm(newNormalsFloatVec,newNormalsFloatVec,newNormalsStorePermute); + //outXyz[0] = newXyz[0] * newXyzScale; + //outXyz[1] = newXyz[1] * newXyzScale; + //outXyz[2] = newXyz[2] * newXyzScale; + + lat = ( newNormals[0] >> 8 ) & 0xff; + lng = ( newNormals[0] & 0xff ); + lat *= (FUNCTABLE_SIZE/256); + lng *= (FUNCTABLE_SIZE/256); + + // decode X as cos( lat ) * sin( long ) + // decode Y as sin( lat ) * sin( long ) + // decode Z as cos( long ) + + outNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng]; + outNormal[1] = tr.sinTable[lat] * tr.sinTable[lng]; + outNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK]; + + vec_ste(newNormalsFloatVec,0,outXyz); + vec_ste(newNormalsFloatVec,4,outXyz); + vec_ste(newNormalsFloatVec,8,outXyz); + } + } else { + // + // interpolate and copy the vertex and normal + // + oldXyz = (short *)((byte *)surf + surf->ofsXyzNormals) + + (backEnd.currentEntity->e.oldframe * surf->numVerts * 4); + oldNormals = oldXyz + 3; + + oldXyzScale = MD3_XYZ_SCALE * backlerp; + oldNormalScale = backlerp; + + for (vertNum=0 ; vertNum < numVerts ; vertNum++, + oldXyz += 4, newXyz += 4, oldNormals += 4, newNormals += 4, + outXyz += 4, outNormal += 4) + { + vec3_t uncompressedOldNormal, uncompressedNewNormal; + + // interpolate the xyz + outXyz[0] = oldXyz[0] * oldXyzScale + newXyz[0] * newXyzScale; + outXyz[1] = oldXyz[1] * oldXyzScale + newXyz[1] * newXyzScale; + outXyz[2] = oldXyz[2] * oldXyzScale + newXyz[2] * newXyzScale; + + // FIXME: interpolate lat/long instead? + lat = ( newNormals[0] >> 8 ) & 0xff; + lng = ( newNormals[0] & 0xff ); + lat *= 4; + lng *= 4; + uncompressedNewNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng]; + uncompressedNewNormal[1] = tr.sinTable[lat] * tr.sinTable[lng]; + uncompressedNewNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK]; + + lat = ( oldNormals[0] >> 8 ) & 0xff; + lng = ( oldNormals[0] & 0xff ); + lat *= 4; + lng *= 4; + + uncompressedOldNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng]; + uncompressedOldNormal[1] = tr.sinTable[lat] * tr.sinTable[lng]; + uncompressedOldNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK]; + + outNormal[0] = uncompressedOldNormal[0] * oldNormalScale + uncompressedNewNormal[0] * newNormalScale; + outNormal[1] = uncompressedOldNormal[1] * oldNormalScale + uncompressedNewNormal[1] * newNormalScale; + outNormal[2] = uncompressedOldNormal[2] * oldNormalScale + uncompressedNewNormal[2] * newNormalScale; + +// VectorNormalize (outNormal); + } + VectorArrayNormalize((vec4_t *)tess.normal[tess.numVertexes], numVerts); + } +} + +#endif diff --git a/code/renderergl1/tr_local.h b/code/renderergl1/tr_local.h index b7789eae..bbb513af 100644 --- a/code/renderergl1/tr_local.h +++ b/code/renderergl1/tr_local.h @@ -1583,5 +1583,13 @@ size_t RE_SaveJPGToBuffer(byte *buffer, size_t bufSize, int quality, void RE_TakeVideoFrame( int width, int height, byte *captureBuffer, byte *encodeBuffer, qboolean motionJpeg ); +void R_DrawElements( int numIndexes, const glIndex_t *indexes ); +void VectorArrayNormalize( vec4_t *normals, unsigned int count ); + +#ifdef idppc_altivec +void LerpMeshVertexes_altivec( md3Surface_t *surf, float backlerp ); +void ProjectDlightTexture_altivec( void ); +void RB_CalcDiffuseColor_altivec( unsigned char *colors ); +#endif #endif //TR_LOCAL_H diff --git a/code/renderergl1/tr_shade.c b/code/renderergl1/tr_shade.c index 19310006..7ea78d13 100644 --- a/code/renderergl1/tr_shade.c +++ b/code/renderergl1/tr_shade.c @@ -22,9 +22,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA // tr_shade.c #include "tr_local.h" -#if idppc_altivec && !defined(__APPLE__) -#include -#endif /* @@ -163,7 +160,7 @@ instead of using the single glDrawElements call that may be inefficient without compiled vertex arrays. ================== */ -static void R_DrawElements( int numIndexes, const glIndex_t *indexes ) { +void R_DrawElements( int numIndexes, const glIndex_t *indexes ) { int primitives; primitives = r_primitives->integer; @@ -407,189 +404,6 @@ ProjectDlightTexture Perform dynamic lighting with another rendering pass =================== */ -#if idppc_altivec -static void ProjectDlightTexture_altivec( void ) { - int i, l; - vec_t origin0, origin1, origin2; - float texCoords0, texCoords1; - vector float floatColorVec0, floatColorVec1; - vector float modulateVec, colorVec, zero; - vector short colorShort; - vector signed int colorInt; - vector unsigned char floatColorVecPerm, modulatePerm, colorChar; - vector unsigned char vSel = VECCONST_UINT8(0x00, 0x00, 0x00, 0xff, - 0x00, 0x00, 0x00, 0xff, - 0x00, 0x00, 0x00, 0xff, - 0x00, 0x00, 0x00, 0xff); - float *texCoords; - byte *colors; - byte clipBits[SHADER_MAX_VERTEXES]; - float texCoordsArray[SHADER_MAX_VERTEXES][2]; - byte colorArray[SHADER_MAX_VERTEXES][4]; - glIndex_t hitIndexes[SHADER_MAX_INDEXES]; - int numIndexes; - float scale; - float radius; - vec3_t floatColor; - float modulate = 0.0f; - - if ( !backEnd.refdef.num_dlights ) { - return; - } - - // There has to be a better way to do this so that floatColor - // and/or modulate are already 16-byte aligned. - floatColorVecPerm = vec_lvsl(0,(float *)floatColor); - modulatePerm = vec_lvsl(0,(float *)&modulate); - modulatePerm = (vector unsigned char)vec_splat((vector unsigned int)modulatePerm,0); - zero = (vector float)vec_splat_s8(0); - - for ( l = 0 ; l < backEnd.refdef.num_dlights ; l++ ) { - dlight_t *dl; - - if ( !( tess.dlightBits & ( 1 << l ) ) ) { - continue; // this surface definitely doesn't have any of this light - } - texCoords = texCoordsArray[0]; - colors = colorArray[0]; - - dl = &backEnd.refdef.dlights[l]; - origin0 = dl->transformed[0]; - origin1 = dl->transformed[1]; - origin2 = dl->transformed[2]; - radius = dl->radius; - scale = 1.0f / radius; - - if(r_greyscale->integer) - { - float luminance; - - luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f; - floatColor[0] = floatColor[1] = floatColor[2] = luminance; - } - else if(r_greyscale->value) - { - float luminance; - - luminance = LUMA(dl->color[0], dl->color[1], dl->color[2]) * 255.0f; - floatColor[0] = LERP(dl->color[0] * 255.0f, luminance, r_greyscale->value); - floatColor[1] = LERP(dl->color[1] * 255.0f, luminance, r_greyscale->value); - floatColor[2] = LERP(dl->color[2] * 255.0f, luminance, r_greyscale->value); - } - else - { - floatColor[0] = dl->color[0] * 255.0f; - floatColor[1] = dl->color[1] * 255.0f; - floatColor[2] = dl->color[2] * 255.0f; - } - floatColorVec0 = vec_ld(0, floatColor); - floatColorVec1 = vec_ld(11, floatColor); - floatColorVec0 = vec_perm(floatColorVec0,floatColorVec0,floatColorVecPerm); - for ( i = 0 ; i < tess.numVertexes ; i++, texCoords += 2, colors += 4 ) { - int clip = 0; - vec_t dist0, dist1, dist2; - - dist0 = origin0 - tess.xyz[i][0]; - dist1 = origin1 - tess.xyz[i][1]; - dist2 = origin2 - tess.xyz[i][2]; - - backEnd.pc.c_dlightVertexes++; - - texCoords0 = 0.5f + dist0 * scale; - texCoords1 = 0.5f + dist1 * scale; - - if( !r_dlightBacks->integer && - // dist . tess.normal[i] - ( dist0 * tess.normal[i][0] + - dist1 * tess.normal[i][1] + - dist2 * tess.normal[i][2] ) < 0.0f ) { - clip = 63; - } else { - if ( texCoords0 < 0.0f ) { - clip |= 1; - } else if ( texCoords0 > 1.0f ) { - clip |= 2; - } - if ( texCoords1 < 0.0f ) { - clip |= 4; - } else if ( texCoords1 > 1.0f ) { - clip |= 8; - } - texCoords[0] = texCoords0; - texCoords[1] = texCoords1; - - // modulate the strength based on the height and color - if ( dist2 > radius ) { - clip |= 16; - modulate = 0.0f; - } else if ( dist2 < -radius ) { - clip |= 32; - modulate = 0.0f; - } else { - dist2 = Q_fabs(dist2); - if ( dist2 < radius * 0.5f ) { - modulate = 1.0f; - } else { - modulate = 2.0f * (radius - dist2) * scale; - } - } - } - clipBits[i] = clip; - - modulateVec = vec_ld(0,(float *)&modulate); - modulateVec = vec_perm(modulateVec,modulateVec,modulatePerm); - colorVec = vec_madd(floatColorVec0,modulateVec,zero); - colorInt = vec_cts(colorVec,0); // RGBx - colorShort = vec_pack(colorInt,colorInt); // RGBxRGBx - colorChar = vec_packsu(colorShort,colorShort); // RGBxRGBxRGBxRGBx - colorChar = vec_sel(colorChar,vSel,vSel); // RGBARGBARGBARGBA replace alpha with 255 - vec_ste((vector unsigned int)colorChar,0,(unsigned int *)colors); // store color - } - - // build a list of triangles that need light - numIndexes = 0; - for ( i = 0 ; i < tess.numIndexes ; i += 3 ) { - int a, b, c; - - a = tess.indexes[i]; - b = tess.indexes[i+1]; - c = tess.indexes[i+2]; - if ( clipBits[a] & clipBits[b] & clipBits[c] ) { - continue; // not lighted - } - hitIndexes[numIndexes] = a; - hitIndexes[numIndexes+1] = b; - hitIndexes[numIndexes+2] = c; - numIndexes += 3; - } - - if ( !numIndexes ) { - continue; - } - - qglEnableClientState( GL_TEXTURE_COORD_ARRAY ); - qglTexCoordPointer( 2, GL_FLOAT, 0, texCoordsArray[0] ); - - qglEnableClientState( GL_COLOR_ARRAY ); - qglColorPointer( 4, GL_UNSIGNED_BYTE, 0, colorArray ); - - GL_Bind( tr.dlightImage ); - // include GLS_DEPTHFUNC_EQUAL so alpha tested surfaces don't add light - // where they aren't rendered - if ( dl->additive ) { - GL_State( GLS_SRCBLEND_ONE | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL ); - } - else { - GL_State( GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL ); - } - R_DrawElements( numIndexes, hitIndexes ); - backEnd.pc.c_totalIndexes += numIndexes; - backEnd.pc.c_dlightIndexes += numIndexes; - } -} -#endif - - static void ProjectDlightTexture_scalar( void ) { int i, l; vec3_t origin; @@ -745,7 +559,7 @@ static void ProjectDlightTexture_scalar( void ) { static void ProjectDlightTexture( void ) { #if idppc_altivec if (com_altivec->integer) { - // must be in a separate function or G3 systems will crash. + // must be in a separate translation unit or G3 systems will crash. ProjectDlightTexture_altivec(); return; } diff --git a/code/renderergl1/tr_shade_calc.c b/code/renderergl1/tr_shade_calc.c index 511ebb4a..455424ed 100644 --- a/code/renderergl1/tr_shade_calc.c +++ b/code/renderergl1/tr_shade_calc.c @@ -22,9 +22,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA // tr_shade_calc.c #include "tr_local.h" -#if idppc_altivec && !defined(__APPLE__) -#include -#endif #define WAVEVALUE( table, base, amplitude, phase, freq ) ((base) + table[ ( (int64_t) ( ( (phase) + tess.shaderTime * (freq) ) * FUNCTABLE_SIZE ) ) & FUNCTABLE_MASK ] * (amplitude)) @@ -1082,77 +1079,6 @@ void RB_CalcSpecularAlpha( unsigned char *alphas ) { ** ** The basic vertex lighting calc */ -#if idppc_altivec -static void RB_CalcDiffuseColor_altivec( unsigned char *colors ) -{ - int i; - float *v, *normal; - trRefEntity_t *ent; - int ambientLightInt; - vec3_t lightDir; - int numVertexes; - vector unsigned char vSel = VECCONST_UINT8(0x00, 0x00, 0x00, 0xff, - 0x00, 0x00, 0x00, 0xff, - 0x00, 0x00, 0x00, 0xff, - 0x00, 0x00, 0x00, 0xff); - vector float ambientLightVec; - vector float directedLightVec; - vector float lightDirVec; - vector float normalVec0, normalVec1; - vector float incomingVec0, incomingVec1, incomingVec2; - vector float zero, jVec; - vector signed int jVecInt; - vector signed short jVecShort; - vector unsigned char jVecChar, normalPerm; - ent = backEnd.currentEntity; - ambientLightInt = ent->ambientLightInt; - // A lot of this could be simplified if we made sure - // entities light info was 16-byte aligned. - jVecChar = vec_lvsl(0, ent->ambientLight); - ambientLightVec = vec_ld(0, (vector float *)ent->ambientLight); - jVec = vec_ld(11, (vector float *)ent->ambientLight); - ambientLightVec = vec_perm(ambientLightVec,jVec,jVecChar); - - jVecChar = vec_lvsl(0, ent->directedLight); - directedLightVec = vec_ld(0,(vector float *)ent->directedLight); - jVec = vec_ld(11,(vector float *)ent->directedLight); - directedLightVec = vec_perm(directedLightVec,jVec,jVecChar); - - jVecChar = vec_lvsl(0, ent->lightDir); - lightDirVec = vec_ld(0,(vector float *)ent->lightDir); - jVec = vec_ld(11,(vector float *)ent->lightDir); - lightDirVec = vec_perm(lightDirVec,jVec,jVecChar); - - zero = (vector float)vec_splat_s8(0); - VectorCopy( ent->lightDir, lightDir ); - - v = tess.xyz[0]; - normal = tess.normal[0]; - - normalPerm = vec_lvsl(0,normal); - numVertexes = tess.numVertexes; - for (i = 0 ; i < numVertexes ; i++, v += 4, normal += 4) { - normalVec0 = vec_ld(0,(vector float *)normal); - normalVec1 = vec_ld(11,(vector float *)normal); - normalVec0 = vec_perm(normalVec0,normalVec1,normalPerm); - incomingVec0 = vec_madd(normalVec0, lightDirVec, zero); - incomingVec1 = vec_sld(incomingVec0,incomingVec0,4); - incomingVec2 = vec_add(incomingVec0,incomingVec1); - incomingVec1 = vec_sld(incomingVec1,incomingVec1,4); - incomingVec2 = vec_add(incomingVec2,incomingVec1); - incomingVec0 = vec_splat(incomingVec2,0); - incomingVec0 = vec_max(incomingVec0,zero); - normalPerm = vec_lvsl(12,normal); - jVec = vec_madd(incomingVec0, directedLightVec, ambientLightVec); - jVecInt = vec_cts(jVec,0); // RGBx - jVecShort = vec_pack(jVecInt,jVecInt); // RGBxRGBx - jVecChar = vec_packsu(jVecShort,jVecShort); // RGBxRGBxRGBxRGBx - jVecChar = vec_sel(jVecChar,vSel,vSel); // RGBARGBARGBARGBA replace alpha with 255 - vec_ste((vector unsigned int)jVecChar,0,(unsigned int *)&colors[i*4]); // store color - } -} -#endif - static void RB_CalcDiffuseColor_scalar( unsigned char *colors ) { int i, j; @@ -1206,7 +1132,7 @@ void RB_CalcDiffuseColor( unsigned char *colors ) { #if idppc_altivec if (com_altivec->integer) { - // must be in a separate function or G3 systems will crash. + // must be in a separate translation unit or G3 systems will crash. RB_CalcDiffuseColor_altivec( colors ); return; } diff --git a/code/renderergl1/tr_surface.c b/code/renderergl1/tr_surface.c index 0eb1e5c8..41e72738 100644 --- a/code/renderergl1/tr_surface.c +++ b/code/renderergl1/tr_surface.c @@ -21,9 +21,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ // tr_surf.c #include "tr_local.h" -#if idppc_altivec && !defined(__APPLE__) -#include -#endif /* @@ -557,7 +554,7 @@ static void RB_SurfaceLightningBolt( void ) { * The inputs to this routing seem to always be close to length = 1.0 (about 0.6 to 2.0) * This means that we don't have to worry about zero length or enormously long vectors. */ -static void VectorArrayNormalize(vec4_t *normals, unsigned int count) +void VectorArrayNormalize(vec4_t *normals, unsigned int count) { // assert(count); @@ -612,136 +609,6 @@ static void VectorArrayNormalize(vec4_t *normals, unsigned int count) /* ** LerpMeshVertexes */ -#if idppc_altivec -static void LerpMeshVertexes_altivec(md3Surface_t *surf, float backlerp) -{ - short *oldXyz, *newXyz, *oldNormals, *newNormals; - float *outXyz, *outNormal; - float oldXyzScale QALIGN(16); - float newXyzScale QALIGN(16); - float oldNormalScale QALIGN(16); - float newNormalScale QALIGN(16); - int vertNum; - unsigned lat, lng; - int numVerts; - - outXyz = tess.xyz[tess.numVertexes]; - outNormal = tess.normal[tess.numVertexes]; - - newXyz = (short *)((byte *)surf + surf->ofsXyzNormals) - + (backEnd.currentEntity->e.frame * surf->numVerts * 4); - newNormals = newXyz + 3; - - newXyzScale = MD3_XYZ_SCALE * (1.0 - backlerp); - newNormalScale = 1.0 - backlerp; - - numVerts = surf->numVerts; - - if ( backlerp == 0 ) { - vector signed short newNormalsVec0; - vector signed short newNormalsVec1; - vector signed int newNormalsIntVec; - vector float newNormalsFloatVec; - vector float newXyzScaleVec; - vector unsigned char newNormalsLoadPermute; - vector unsigned char newNormalsStorePermute; - vector float zero; - - newNormalsStorePermute = vec_lvsl(0,(float *)&newXyzScaleVec); - newXyzScaleVec = *(vector float *)&newXyzScale; - newXyzScaleVec = vec_perm(newXyzScaleVec,newXyzScaleVec,newNormalsStorePermute); - newXyzScaleVec = vec_splat(newXyzScaleVec,0); - newNormalsLoadPermute = vec_lvsl(0,newXyz); - newNormalsStorePermute = vec_lvsr(0,outXyz); - zero = (vector float)vec_splat_s8(0); - // - // just copy the vertexes - // - for (vertNum=0 ; vertNum < numVerts ; vertNum++, - newXyz += 4, newNormals += 4, - outXyz += 4, outNormal += 4) - { - newNormalsLoadPermute = vec_lvsl(0,newXyz); - newNormalsStorePermute = vec_lvsr(0,outXyz); - newNormalsVec0 = vec_ld(0,newXyz); - newNormalsVec1 = vec_ld(16,newXyz); - newNormalsVec0 = vec_perm(newNormalsVec0,newNormalsVec1,newNormalsLoadPermute); - newNormalsIntVec = vec_unpackh(newNormalsVec0); - newNormalsFloatVec = vec_ctf(newNormalsIntVec,0); - newNormalsFloatVec = vec_madd(newNormalsFloatVec,newXyzScaleVec,zero); - newNormalsFloatVec = vec_perm(newNormalsFloatVec,newNormalsFloatVec,newNormalsStorePermute); - //outXyz[0] = newXyz[0] * newXyzScale; - //outXyz[1] = newXyz[1] * newXyzScale; - //outXyz[2] = newXyz[2] * newXyzScale; - - lat = ( newNormals[0] >> 8 ) & 0xff; - lng = ( newNormals[0] & 0xff ); - lat *= (FUNCTABLE_SIZE/256); - lng *= (FUNCTABLE_SIZE/256); - - // decode X as cos( lat ) * sin( long ) - // decode Y as sin( lat ) * sin( long ) - // decode Z as cos( long ) - - outNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng]; - outNormal[1] = tr.sinTable[lat] * tr.sinTable[lng]; - outNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK]; - - vec_ste(newNormalsFloatVec,0,outXyz); - vec_ste(newNormalsFloatVec,4,outXyz); - vec_ste(newNormalsFloatVec,8,outXyz); - } - } else { - // - // interpolate and copy the vertex and normal - // - oldXyz = (short *)((byte *)surf + surf->ofsXyzNormals) - + (backEnd.currentEntity->e.oldframe * surf->numVerts * 4); - oldNormals = oldXyz + 3; - - oldXyzScale = MD3_XYZ_SCALE * backlerp; - oldNormalScale = backlerp; - - for (vertNum=0 ; vertNum < numVerts ; vertNum++, - oldXyz += 4, newXyz += 4, oldNormals += 4, newNormals += 4, - outXyz += 4, outNormal += 4) - { - vec3_t uncompressedOldNormal, uncompressedNewNormal; - - // interpolate the xyz - outXyz[0] = oldXyz[0] * oldXyzScale + newXyz[0] * newXyzScale; - outXyz[1] = oldXyz[1] * oldXyzScale + newXyz[1] * newXyzScale; - outXyz[2] = oldXyz[2] * oldXyzScale + newXyz[2] * newXyzScale; - - // FIXME: interpolate lat/long instead? - lat = ( newNormals[0] >> 8 ) & 0xff; - lng = ( newNormals[0] & 0xff ); - lat *= 4; - lng *= 4; - uncompressedNewNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng]; - uncompressedNewNormal[1] = tr.sinTable[lat] * tr.sinTable[lng]; - uncompressedNewNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK]; - - lat = ( oldNormals[0] >> 8 ) & 0xff; - lng = ( oldNormals[0] & 0xff ); - lat *= 4; - lng *= 4; - - uncompressedOldNormal[0] = tr.sinTable[(lat+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK] * tr.sinTable[lng]; - uncompressedOldNormal[1] = tr.sinTable[lat] * tr.sinTable[lng]; - uncompressedOldNormal[2] = tr.sinTable[(lng+(FUNCTABLE_SIZE/4))&FUNCTABLE_MASK]; - - outNormal[0] = uncompressedOldNormal[0] * oldNormalScale + uncompressedNewNormal[0] * newNormalScale; - outNormal[1] = uncompressedOldNormal[1] * oldNormalScale + uncompressedNewNormal[1] * newNormalScale; - outNormal[2] = uncompressedOldNormal[2] * oldNormalScale + uncompressedNewNormal[2] * newNormalScale; - -// VectorNormalize (outNormal); - } - VectorArrayNormalize((vec4_t *)tess.normal[tess.numVertexes], numVerts); - } -} -#endif - static void LerpMeshVertexes_scalar(md3Surface_t *surf, float backlerp) { short *oldXyz, *newXyz, *oldNormals, *newNormals; @@ -844,7 +711,7 @@ static void LerpMeshVertexes(md3Surface_t *surf, float backlerp) { #if idppc_altivec if (com_altivec->integer) { - // must be in a separate function or G3 systems will crash. + // must be in a separate translation unit or G3 systems will crash. LerpMeshVertexes_altivec( surf, backlerp ); return; } diff --git a/code/renderergl2/tr_shade.c b/code/renderergl2/tr_shade.c index 778a74c4..04eaa755 100644 --- a/code/renderergl2/tr_shade.c +++ b/code/renderergl2/tr_shade.c @@ -22,9 +22,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA // tr_shade.c #include "tr_local.h" -#if idppc_altivec && !defined(__APPLE__) -#include -#endif /* diff --git a/code/renderergl2/tr_shade_calc.c b/code/renderergl2/tr_shade_calc.c index 80e1a383..15cfb9c6 100644 --- a/code/renderergl2/tr_shade_calc.c +++ b/code/renderergl2/tr_shade_calc.c @@ -22,9 +22,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA // tr_shade_calc.c #include "tr_local.h" -#if idppc_altivec && !defined(__APPLE__) -#include -#endif #define WAVEVALUE( table, base, amplitude, phase, freq ) ((base) + table[ ( (int64_t) ( ( (phase) + tess.shaderTime * (freq) ) * FUNCTABLE_SIZE ) ) & FUNCTABLE_MASK ] * (amplitude)) diff --git a/code/renderergl2/tr_surface.c b/code/renderergl2/tr_surface.c index 5b6b933e..66e894ce 100644 --- a/code/renderergl2/tr_surface.c +++ b/code/renderergl2/tr_surface.c @@ -21,9 +21,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ // tr_surf.c #include "tr_local.h" -#if idppc_altivec && !defined(__APPLE__) -#include -#endif /*