/* Copyright (C) 1997-2001 Id Software, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "r_local.h" vec3_t r_pright, r_pup, r_ppn; #define PARTICLE_33 0 #define PARTICLE_66 1 #define PARTICLE_OPAQUE 2 typedef struct { particle_t *particle; int level; int color; } partparms_t; static partparms_t partparms; #if id386 && !defined __linux__ static unsigned s_prefetch_address; /* ** BlendParticleXX ** ** Inputs: ** EAX = color ** EDI = pdest ** ** Scratch: ** EBX = scratch (dstcolor) ** EBP = scratch ** ** Outputs: ** none */ __declspec(naked) void BlendParticle33( void ) { // return vid.alphamap[color + dstcolor*256]; __asm mov ebp, vid.alphamap __asm xor ebx, ebx __asm mov bl, byte ptr [edi] __asm shl ebx, 8 __asm add ebp, ebx __asm add ebp, eax __asm mov al, byte ptr [ebp] __asm mov byte ptr [edi], al __asm ret } __declspec(naked) void BlendParticle66( void ) { // return vid.alphamap[pcolor*256 + dstcolor]; __asm mov ebp, vid.alphamap __asm xor ebx, ebx __asm shl eax, 8 __asm mov bl, byte ptr [edi] __asm add ebp, ebx __asm add ebp, eax __asm mov al, byte ptr [ebp] __asm mov byte ptr [edi], al __asm ret } __declspec(naked) void BlendParticle100( void ) { __asm mov byte ptr [edi], al __asm ret } /* ** R_DrawParticle (asm version) ** ** Since we use __declspec( naked ) we don't have a stack frame ** that we can use. Since I want to reserve EBP anyway, I tossed ** all the important variables into statics. This routine isn't ** meant to be re-entrant, so this shouldn't cause any problems ** other than a slightly higher global memory footprint. ** */ __declspec(naked) void R_DrawParticle( void ) { static vec3_t local, transformed; static float zi; static int u, v, tmp; static short izi; static int ebpsave; static byte (*blendfunc)(void); /* ** must be memvars since x86 can't load constants ** directly. I guess I could use fld1, but that ** actually costs one more clock than fld [one]! */ static float particle_z_clip = PARTICLE_Z_CLIP; static float one = 1.0F; static float point_five = 0.5F; static float eight_thousand_hex = 0x8000; /* ** save trashed variables */ __asm mov ebpsave, ebp __asm push esi __asm push edi /* ** transform the particle */ // VectorSubtract (pparticle->origin, r_origin, local); __asm mov esi, partparms.particle __asm fld dword ptr [esi+0] ; p_o.x __asm fsub dword ptr [r_origin+0] ; p_o.x-r_o.x __asm fld dword ptr [esi+4] ; p_o.y | p_o.x-r_o.x __asm fsub dword ptr [r_origin+4] ; p_o.y-r_o.y | p_o.x-r_o.x __asm fld dword ptr [esi+8] ; p_o.z | p_o.y-r_o.y | p_o.x-r_o.x __asm fsub dword ptr [r_origin+8] ; p_o.z-r_o.z | p_o.y-r_o.y | p_o.x-r_o.x __asm fxch st(2) ; p_o.x-r_o.x | p_o.y-r_o.y | p_o.z-r_o.z __asm fstp dword ptr [local+0] ; p_o.y-r_o.y | p_o.z-r_o.z __asm fstp dword ptr [local+4] ; p_o.z-r_o.z __asm fstp dword ptr [local+8] ; (empty) // transformed[0] = DotProduct(local, r_pright); // transformed[1] = DotProduct(local, r_pup); // transformed[2] = DotProduct(local, r_ppn); __asm fld dword ptr [local+0] ; l.x __asm fmul dword ptr [r_pright+0] ; l.x*pr.x __asm fld dword ptr [local+4] ; l.y | l.x*pr.x __asm fmul dword ptr [r_pright+4] ; l.y*pr.y | l.x*pr.x __asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x __asm fmul dword ptr [r_pright+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x __asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z __asm fstp dword ptr [transformed+0] ; (empty) __asm fld dword ptr [local+0] ; l.x __asm fmul dword ptr [r_pup+0] ; l.x*pr.x __asm fld dword ptr [local+4] ; l.y | l.x*pr.x __asm fmul dword ptr [r_pup+4] ; l.y*pr.y | l.x*pr.x __asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x __asm fmul dword ptr [r_pup+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x __asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z __asm fstp dword ptr [transformed+4] ; (empty) __asm fld dword ptr [local+0] ; l.x __asm fmul dword ptr [r_ppn+0] ; l.x*pr.x __asm fld dword ptr [local+4] ; l.y | l.x*pr.x __asm fmul dword ptr [r_ppn+4] ; l.y*pr.y | l.x*pr.x __asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x __asm fmul dword ptr [r_ppn+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x __asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z __asm fstp dword ptr [transformed+8] ; (empty) /* ** make sure that the transformed particle is not in front of ** the particle Z clip plane. We can do the comparison in ** integer space since we know the sign of one of the inputs ** and can figure out the sign of the other easily enough. */ // if (transformed[2] < PARTICLE_Z_CLIP) // return; __asm mov eax, dword ptr [transformed+8] __asm and eax, eax __asm js end __asm cmp eax, particle_z_clip __asm jl end /* ** project the point by initiating the 1/z calc */ // zi = 1.0 / transformed[2]; __asm fld one __asm fdiv dword ptr [transformed+8] /* ** bind the blend function pointer to the appropriate blender ** while we're dividing */ //if ( level == PARTICLE_33 ) // blendparticle = BlendParticle33; //else if ( level == PARTICLE_66 ) // blendparticle = BlendParticle66; //else // blendparticle = BlendParticle100; __asm cmp partparms.level, PARTICLE_66 __asm je blendfunc_66 __asm jl blendfunc_33 __asm lea ebx, BlendParticle100 __asm jmp done_selecting_blend_func blendfunc_33: __asm lea ebx, BlendParticle33 __asm jmp done_selecting_blend_func blendfunc_66: __asm lea ebx, BlendParticle66 done_selecting_blend_func: __asm mov blendfunc, ebx // prefetch the next particle __asm mov ebp, s_prefetch_address __asm mov ebp, [ebp] // finish the above divide __asm fstp zi // u = (int)(xcenter + zi * transformed[0] + 0.5); // v = (int)(ycenter - zi * transformed[1] + 0.5); __asm fld zi ; zi __asm fmul dword ptr [transformed+0] ; zi * transformed[0] __asm fld zi ; zi | zi * transformed[0] __asm fmul dword ptr [transformed+4] ; zi * transformed[1] | zi * transformed[0] __asm fxch st(1) ; zi * transformed[0] | zi * transformed[1] __asm fadd xcenter ; xcenter + zi * transformed[0] | zi * transformed[1] __asm fxch st(1) ; zi * transformed[1] | xcenter + zi * transformed[0] __asm fld ycenter ; ycenter | zi * transformed[1] | xcenter + zi * transformed[0] __asm fsubrp st(1), st(0) ; ycenter - zi * transformed[1] | xcenter + zi * transformed[0] __asm fxch st(1) ; xcenter + zi * transformed[0] | ycenter + zi * transformed[1] __asm fadd point_five ; xcenter + zi * transformed[0] + 0.5 | ycenter - zi * transformed[1] __asm fxch st(1) ; ycenter - zi * transformed[1] | xcenter + zi * transformed[0] + 0.5 __asm fadd point_five ; ycenter - zi * transformed[1] + 0.5 | xcenter + zi * transformed[0] + 0.5 __asm fxch st(1) ; u | v __asm fistp dword ptr [u] ; v __asm fistp dword ptr [v] ; (empty) /* ** clip out the particle */ // if ((v > d_vrectbottom_particle) || // (u > d_vrectright_particle) || // (v < d_vrecty) || // (u < d_vrectx)) // { // return; // } __asm mov ebx, u __asm mov ecx, v __asm cmp ecx, d_vrectbottom_particle __asm jg end __asm cmp ecx, d_vrecty __asm jl end __asm cmp ebx, d_vrectright_particle __asm jg end __asm cmp ebx, d_vrectx __asm jl end /* ** compute addresses of zbuffer, framebuffer, and ** compute the Z-buffer reference value. ** ** EBX = U ** ECX = V ** ** Outputs: ** ESI = Z-buffer address ** EDI = framebuffer address */ // ESI = d_pzbuffer + (d_zwidth * v) + u; __asm mov esi, d_pzbuffer ; esi = d_pzbuffer __asm mov eax, d_zwidth ; eax = d_zwidth __asm mul ecx ; eax = d_zwidth*v __asm add eax, ebx ; eax = d_zwidth*v+u __asm shl eax, 1 ; eax = 2*(d_zwidth*v+u) __asm add esi, eax ; esi = ( short * ) ( d_pzbuffer + ( d_zwidth * v ) + u ) // initiate // izi = (int)(zi * 0x8000); __asm fld zi __asm fmul eight_thousand_hex // EDI = pdest = d_viewbuffer + d_scantable[v] + u; __asm lea edi, [d_scantable+ecx*4] __asm mov edi, [edi] __asm add edi, d_viewbuffer __asm add edi, ebx // complete // izi = (int)(zi * 0x8000); __asm fistp tmp __asm mov eax, tmp __asm mov izi, ax /* ** determine the screen area covered by the particle, ** which also means clamping to a min and max */ // pix = izi >> d_pix_shift; __asm xor edx, edx __asm mov dx, izi __asm mov ecx, d_pix_shift __asm shr dx, cl // if (pix < d_pix_min) // pix = d_pix_min; __asm cmp edx, d_pix_min __asm jge check_pix_max __asm mov edx, d_pix_min __asm jmp skip_pix_clamp // else if (pix > d_pix_max) // pix = d_pix_max; check_pix_max: __asm cmp edx, d_pix_max __asm jle skip_pix_clamp __asm mov edx, d_pix_max skip_pix_clamp: /* ** render the appropriate pixels ** ** ECX = count (used for inner loop) ** EDX = count (used for outer loop) ** ESI = zbuffer ** EDI = framebuffer */ __asm mov ecx, edx __asm cmp ecx, 1 __asm ja over over: /* ** at this point: ** ** ECX = count */ __asm push ecx __asm push edi __asm push esi top_of_pix_vert_loop: top_of_pix_horiz_loop: // for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth) // { // for (i=0 ; icolor; int i, izi, pix, count, u, v; byte (*blendparticle)( int, int ); /* ** transform the particle */ VectorSubtract (pparticle->origin, r_origin, local); transformed[0] = DotProduct(local, r_pright); transformed[1] = DotProduct(local, r_pup); transformed[2] = DotProduct(local, r_ppn); if (transformed[2] < PARTICLE_Z_CLIP) return; /* ** bind the blend function pointer to the appropriate blender */ if ( level == PARTICLE_33 ) blendparticle = BlendParticle33; else if ( level == PARTICLE_66 ) blendparticle = BlendParticle66; else blendparticle = BlendParticle100; /* ** project the point */ // FIXME: preadjust xcenter and ycenter zi = 1.0 / transformed[2]; u = (int)(xcenter + zi * transformed[0] + 0.5); v = (int)(ycenter - zi * transformed[1] + 0.5); if ((v > d_vrectbottom_particle) || (u > d_vrectright_particle) || (v < d_vrecty) || (u < d_vrectx)) { return; } /* ** compute addresses of zbuffer, framebuffer, and ** compute the Z-buffer reference value. */ pz = d_pzbuffer + (d_zwidth * v) + u; pdest = d_viewbuffer + d_scantable[v] + u; izi = (int)(zi * 0x8000); /* ** determine the screen area covered by the particle, ** which also means clamping to a min and max */ pix = izi >> d_pix_shift; if (pix < d_pix_min) pix = d_pix_min; else if (pix > d_pix_max) pix = d_pix_max; /* ** render the appropriate pixels */ count = pix; switch (level) { case PARTICLE_33 : for ( ; count ; count--, pz += d_zwidth, pdest += r_screenwidth) { //FIXME--do it in blocks of 8? for (i=0 ; ialpha > 0.66 ) partparms.level = PARTICLE_OPAQUE; else if ( p->alpha > 0.33 ) partparms.level = PARTICLE_66; else partparms.level = PARTICLE_33; partparms.particle = p; partparms.color = p->color; #if id386 && !defined __linux__ if ( i < r_newrefdef.num_particles-1 ) s_prefetch_address = ( unsigned int ) ( p + 1 ); else s_prefetch_address = ( unsigned int ) r_newrefdef.particles; #endif R_DrawParticle(); } #if id386 && !defined __linux__ __asm fldcw word ptr [fpu_chop_cw] #endif }