client (svga) almost links now. just gotta fix up net_chan.c and model.c wrt SERVERONLY

This commit is contained in:
Bill Currie 2000-05-11 13:30:04 +00:00
parent ee16312bb8
commit 44deb188ec
23 changed files with 9188 additions and 10 deletions

View file

@ -4,10 +4,10 @@ SV_name=qw-server
SV_libs=
CL_SVGA_name=qw-client-svga
SV_libs=
CL_SVGA_libs=-lvga
CL_X11_name=qw-client-x11
SV_libs=
CL_X11_libs=
DIRECTORIES=
vpath %.a $(patsubst @%,%,$(DIRECTORIES)) /usr/lib
@ -65,6 +65,8 @@ SV_dependencies = $(patsubst %,%.d,$(basename $(SV_sources)))
SV_objects = $(patsubst %.d,%.o,$(SV_dependencies))
CL_sources=\
cl_cmd.c \
cl_cvar.c \
cl_demo.c \
cl_ents.c \
cl_input.c \
@ -127,7 +129,25 @@ CL_sources=\
zone.c \
cd_linux.c \
sys_linux.c \
snd_linux.c
snd_linux.c \
d_copy.S \
d_draw.S \
d_draw16.S \
d_parta.S \
d_polysa.S \
d_scana.S \
d_spr8.S \
d_varsa.S \
math.S \
r_aclipa.S \
r_aliasa.S \
r_drawa.S \
r_edgea.S \
r_varsa.S \
snd_mixa.S \
surf16.S \
surf8.S \
sys_dosa.S
CL_dependencies = $(patsubst %,%.d,$(basename $(CL_sources)))
CL_objects = $(patsubst %.d,%.o,$(CL_dependencies))

View file

@ -29,7 +29,7 @@ Cvar_Info(cvar_t *var)
if (cls.state >= ca_connected)
{
MSG_WriteByte (&cls.netchan.message, clc_stringcmd);
SZ_Print (&cls.netchan.message, va("setinfo \"%s\" \"%s\"\n", var->name, string));
SZ_Print (&cls.netchan.message, va("setinfo \"%s\" \"%s\"\n", var->name, var->string));
}
}
}

View file

@ -595,10 +595,6 @@ char *Cmd_CompleteCommand (char *partial)
return NULL;
}
void Cmd_ForwardToServer (void)
{
}
/*
============
Cmd_ExecuteString

149
source/d_copy.S Normal file
View file

@ -0,0 +1,149 @@
//
// d_copy.s
// x86 assembly-language screen copying code.
//
#include "asm_i386.h"
#include "quakeasm.h"
#include "asm_draw.h"
.data
LCopyWidth: .long 0
LBlockSrcStep: .long 0
LBlockDestStep: .long 0
LSrcDelta: .long 0
LDestDelta: .long 0
#define bufptr 4+16
// copies 16 rows per plane at a pop; idea is that 16*512 = 8k, and since
// no Mode X mode is wider than 360, all the data should fit in the cache for
// the passes for the next 3 planes
.text
.globl C(VGA_UpdatePlanarScreen)
C(VGA_UpdatePlanarScreen):
pushl %ebp // preserve caller's stack frame
pushl %edi
pushl %esi // preserve register variables
pushl %ebx
movl C(VGA_bufferrowbytes),%eax
shll $1,%eax
movl %eax,LBlockSrcStep
movl C(VGA_rowbytes),%eax
shll $1,%eax
movl %eax,LBlockDestStep
movl $0x3C4,%edx
movb $2,%al
outb %al,%dx // point the SC to the Map Mask
incl %edx
movl bufptr(%esp),%esi
movl C(VGA_pagebase),%edi
movl C(VGA_height),%ebp
shrl $1,%ebp
movl C(VGA_width),%ecx
movl C(VGA_bufferrowbytes),%eax
subl %ecx,%eax
movl %eax,LSrcDelta
movl C(VGA_rowbytes),%eax
shll $2,%eax
subl %ecx,%eax
movl %eax,LDestDelta
shrl $4,%ecx
movl %ecx,LCopyWidth
LRowLoop:
movb $1,%al
LPlaneLoop:
outb %al,%dx
movb $2,%ah
pushl %esi
pushl %edi
LRowSetLoop:
movl LCopyWidth,%ecx
LColumnLoop:
movb 12(%esi),%bh
movb 8(%esi),%bl
shll $16,%ebx
movb 4(%esi),%bh
movb (%esi),%bl
movl %ebx,(%edi)
addl $16,%esi
addl $4,%edi
decl %ecx
jnz LColumnLoop
addl LDestDelta,%edi
addl LSrcDelta,%esi
decb %ah
jnz LRowSetLoop
popl %edi
popl %esi
incl %esi
shlb $1,%al
cmpb $16,%al
jnz LPlaneLoop
subl $4,%esi
addl LBlockSrcStep,%esi
addl LBlockDestStep,%edi
decl %ebp
jnz LRowLoop
popl %ebx // restore register variables
popl %esi
popl %edi
popl %ebp // restore the caller's stack frame
ret
#define srcptr 4+16
#define destptr 8+16
#define width 12+16
#define height 16+16
#define srcrowbytes 20+16
#define destrowbytes 24+16
.globl C(VGA_UpdateLinearScreen)
C(VGA_UpdateLinearScreen):
pushl %ebp // preserve caller's stack frame
pushl %edi
pushl %esi // preserve register variables
pushl %ebx
cld
movl srcptr(%esp),%esi
movl destptr(%esp),%edi
movl width(%esp),%ebx
movl srcrowbytes(%esp),%eax
subl %ebx,%eax
movl destrowbytes(%esp),%edx
subl %ebx,%edx
shrl $2,%ebx
movl height(%esp),%ebp
LLRowLoop:
movl %ebx,%ecx
rep/movsl (%esi),(%edi)
addl %eax,%esi
addl %edx,%edi
decl %ebp
jnz LLRowLoop
popl %ebx // restore register variables
popl %esi
popl %edi
popl %ebp // restore the caller's stack frame
ret

1037
source/d_draw.S Normal file

File diff suppressed because it is too large Load diff

974
source/d_draw16.S Normal file
View file

@ -0,0 +1,974 @@
/*
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//
// d_draw16.s
// x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel
// subdivision.
//
#include "asm_i386.h"
#include "quakeasm.h"
#include "asm_draw.h"
#include "d_ifacea.h"
#if id386
//----------------------------------------------------------------------
// 8-bpp horizontal span drawing code for polygons, with no transparency and
// 16-pixel subdivision.
//
// Assumes there is at least one span in pspans, and that every span
// contains at least one pixel
//----------------------------------------------------------------------
.data
.text
// out-of-line, rarely-needed clamping code
LClampHigh0:
movl C(bbextents),%esi
jmp LClampReentry0
LClampHighOrLow0:
jg LClampHigh0
xorl %esi,%esi
jmp LClampReentry0
LClampHigh1:
movl C(bbextentt),%edx
jmp LClampReentry1
LClampHighOrLow1:
jg LClampHigh1
xorl %edx,%edx
jmp LClampReentry1
LClampLow2:
movl $4096,%ebp
jmp LClampReentry2
LClampHigh2:
movl C(bbextents),%ebp
jmp LClampReentry2
LClampLow3:
movl $4096,%ecx
jmp LClampReentry3
LClampHigh3:
movl C(bbextentt),%ecx
jmp LClampReentry3
LClampLow4:
movl $4096,%eax
jmp LClampReentry4
LClampHigh4:
movl C(bbextents),%eax
jmp LClampReentry4
LClampLow5:
movl $4096,%ebx
jmp LClampReentry5
LClampHigh5:
movl C(bbextentt),%ebx
jmp LClampReentry5
#define pspans 4+16
.align 4
.globl C(D_DrawSpans16)
C(D_DrawSpans16):
pushl %ebp // preserve caller's stack frame
pushl %edi
pushl %esi // preserve register variables
pushl %ebx
//
// set up scaled-by-16 steps, for 16-long segments; also set up cacheblock
// and span list pointers
//
// TODO: any overlap from rearranging?
flds C(d_sdivzstepu)
fmuls fp_16
movl C(cacheblock),%edx
flds C(d_tdivzstepu)
fmuls fp_16
movl pspans(%esp),%ebx // point to the first span descriptor
flds C(d_zistepu)
fmuls fp_16
movl %edx,pbase // pbase = cacheblock
fstps zi16stepu
fstps tdivz16stepu
fstps sdivz16stepu
LSpanLoop:
//
// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
// initial s and t values
//
// FIXME: pipeline FILD?
fildl espan_t_v(%ebx)
fildl espan_t_u(%ebx)
fld %st(1) // dv | du | dv
fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
fld %st(1) // du | dv*d_sdivzstepv | du | dv
fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
// dv*d_sdivzstepv | du | dv
fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
// dv*d_sdivzstepv | du | dv
faddp %st(0),%st(2) // du*d_tdivzstepu |
// du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
// du*d_tdivzstepu | du | dv
fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
// du*d_tdivzstepu | du | dv
fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
// du*d_sdivzstepu + dv*d_sdivzstepv |
// du*d_tdivzstepu | du | dv
fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
// dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
// du*d_sdivzstepu; stays in %st(2) at end
fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
// s/z
fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
// du*d_tdivzstepu | du | s/z
fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
// du*d_tdivzstepu | du | s/z
faddp %st(0),%st(2) // dv*d_zistepv |
// dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
// dv*d_zistepv | s/z
fmuls C(d_zistepu) // du*d_zistepu |
// dv*d_tdivzstepv + du*d_tdivzstepu |
// dv*d_zistepv | s/z
fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
// du*d_zistepu | dv*d_zistepv | s/z
fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
// du*d_tdivzstepu; stays in %st(1) at end
fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
// du*d_zistepu; stays in %st(0) at end
// 1/z | fp_64k | t/z | s/z
//
// calculate and clamp s & t
//
fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z
//
// point %edi to the first pixel in the span
//
movl C(d_viewbuffer),%ecx
movl espan_t_v(%ebx),%eax
movl %ebx,pspantemp // preserve spans pointer
movl C(tadjust),%edx
movl C(sadjust),%esi
movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
addl %ecx,%edi
movl espan_t_u(%ebx),%ecx
addl %ecx,%edi // pdest = &pdestspan[scans->u];
movl espan_t_count(%ebx),%ecx
//
// now start the FDIV for the end of the span
//
cmpl $16,%ecx
ja LSetupNotLast1
decl %ecx
jz LCleanup1 // if only one pixel, no need to start an FDIV
movl %ecx,spancountminus1
// finish up the s and t calcs
fxch %st(1) // z*64k | 1/z | t/z | s/z
fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
fxch %st(1) // s | t | 1/z | t/z | s/z
fistpl s // 1/z | t | t/z | s/z
fistpl t // 1/z | t/z | s/z
fildl spancountminus1
flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1
flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
// C(d_tdivzstepu)*scm1
fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
// C(d_tdivzstepu)*scm1
faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
faddp %st(0),%st(3)
flds fp_64k
fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
// overlap
jmp LFDIVInFlight1
LCleanup1:
// finish up the s and t calcs
fxch %st(1) // z*64k | 1/z | t/z | s/z
fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
fxch %st(1) // s | t | 1/z | t/z | s/z
fistpl s // 1/z | t | t/z | s/z
fistpl t // 1/z | t/z | s/z
jmp LFDIVInFlight1
.align 4
LSetupNotLast1:
// finish up the s and t calcs
fxch %st(1) // z*64k | 1/z | t/z | s/z
fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
fxch %st(1) // s | t | 1/z | t/z | s/z
fistpl s // 1/z | t | t/z | s/z
fistpl t // 1/z | t/z | s/z
fadds zi16stepu
fxch %st(2)
fadds sdivz16stepu
fxch %st(2)
flds tdivz16stepu
faddp %st(0),%st(2)
flds fp_64k
fdiv %st(1),%st(0) // z = 1/1/z
// this is what we've gone to all this trouble to
// overlap
LFDIVInFlight1:
addl s,%esi
addl t,%edx
movl C(bbextents),%ebx
movl C(bbextentt),%ebp
cmpl %ebx,%esi
ja LClampHighOrLow0
LClampReentry0:
movl %esi,s
movl pbase,%ebx
shll $16,%esi
cmpl %ebp,%edx
movl %esi,sfracf
ja LClampHighOrLow1
LClampReentry1:
movl %edx,t
movl s,%esi // sfrac = scans->sfrac;
shll $16,%edx
movl t,%eax // tfrac = scans->tfrac;
sarl $16,%esi
movl %edx,tfracf
//
// calculate the texture starting address
//
sarl $16,%eax
movl C(cachewidth),%edx
imull %edx,%eax // (tfrac >> 16) * cachewidth
addl %ebx,%esi
addl %eax,%esi // psource = pbase + (sfrac >> 16) +
// ((tfrac >> 16) * cachewidth);
//
// determine whether last span or not
//
cmpl $16,%ecx
jna LLastSegment
//
// not the last segment; do full 16-wide segment
//
LNotLastSegment:
//
// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
// get there
//
// pick up after the FDIV that was left in flight previously
fld %st(0) // duplicate it
fmul %st(4),%st(0) // s = s/z * z
fxch %st(1)
fmul %st(3),%st(0) // t = t/z * z
fxch %st(1)
fistpl snext
fistpl tnext
movl snext,%eax
movl tnext,%edx
movb (%esi),%bl // get first source texel
subl $16,%ecx // count off this segments' pixels
movl C(sadjust),%ebp
movl %ecx,counttemp // remember count of remaining pixels
movl C(tadjust),%ecx
movb %bl,(%edi) // store first dest pixel
addl %eax,%ebp
addl %edx,%ecx
movl C(bbextents),%eax
movl C(bbextentt),%edx
cmpl $4096,%ebp
jl LClampLow2
cmpl %eax,%ebp
ja LClampHigh2
LClampReentry2:
cmpl $4096,%ecx
jl LClampLow3
cmpl %edx,%ecx
ja LClampHigh3
LClampReentry3:
movl %ebp,snext
movl %ecx,tnext
subl s,%ebp
subl t,%ecx
//
// set up advancetable
//
movl %ecx,%eax
movl %ebp,%edx
sarl $20,%eax // tstep >>= 16;
jz LZero
sarl $20,%edx // sstep >>= 16;
movl C(cachewidth),%ebx
imull %ebx,%eax
jmp LSetUp1
LZero:
sarl $20,%edx // sstep >>= 16;
movl C(cachewidth),%ebx
LSetUp1:
addl %edx,%eax // add in sstep
// (tstep >> 16) * cachewidth + (sstep >> 16);
movl tfracf,%edx
movl %eax,advancetable+4 // advance base in t
addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
// (sstep >> 16);
shll $12,%ebp // left-justify sstep fractional part
movl sfracf,%ebx
shll $12,%ecx // left-justify tstep fractional part
movl %eax,advancetable // advance extra in t
movl %ecx,tstep
addl %ecx,%edx // advance tfrac fractional part by tstep frac
sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none)
addl %ebp,%ebx // advance sfrac fractional part by sstep frac
adcl advancetable+4(,%ecx,4),%esi // point to next source texel
addl tstep,%edx
sbbl %ecx,%ecx
movb (%esi),%al
addl %ebp,%ebx
movb %al,1(%edi)
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
sbbl %ecx,%ecx
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
sbbl %ecx,%ecx
movb %al,2(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
sbbl %ecx,%ecx
movb %al,3(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
sbbl %ecx,%ecx
movb %al,4(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
sbbl %ecx,%ecx
movb %al,5(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
sbbl %ecx,%ecx
movb %al,6(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
sbbl %ecx,%ecx
movb %al,7(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
//
// start FDIV for end of next segment in flight, so it can overlap
//
movl counttemp,%ecx
cmpl $16,%ecx // more than one segment after this?
ja LSetupNotLast2 // yes
decl %ecx
jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
movl %ecx,spancountminus1
fildl spancountminus1
flds C(d_zistepu) // C(d_zistepu) | spancountminus1
fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1
flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1
fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1
fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
flds fp_64k // 64k | C(d_sdivzstepu)*scm1
fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k
faddp %st(0),%st(4) // 64k
fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
// overlap
jmp LFDIVInFlight2
.align 4
LSetupNotLast2:
fadds zi16stepu
fxch %st(2)
fadds sdivz16stepu
fxch %st(2)
flds tdivz16stepu
faddp %st(0),%st(2)
flds fp_64k
fdiv %st(1),%st(0) // z = 1/1/z
// this is what we've gone to all this trouble to
// overlap
LFDIVInFlight2:
movl %ecx,counttemp
addl tstep,%edx
sbbl %ecx,%ecx
movb %al,8(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
sbbl %ecx,%ecx
movb %al,9(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
sbbl %ecx,%ecx
movb %al,10(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
sbbl %ecx,%ecx
movb %al,11(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
sbbl %ecx,%ecx
movb %al,12(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
sbbl %ecx,%ecx
movb %al,13(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
sbbl %ecx,%ecx
movb %al,14(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl $16,%edi
movl %edx,tfracf
movl snext,%edx
movl %ebx,sfracf
movl tnext,%ebx
movl %edx,s
movl %ebx,t
movl counttemp,%ecx // retrieve count
//
// determine whether last span or not
//
cmpl $16,%ecx // are there multiple segments remaining?
movb %al,-1(%edi)
ja LNotLastSegment // yes
//
// last segment of scan
//
LLastSegment:
//
// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
// get there. The number of pixels left is variable, and we want to land on the
// last pixel, not step one past it, so we can't run into arithmetic problems
//
testl %ecx,%ecx
jz LNoSteps // just draw the last pixel and we're done
// pick up after the FDIV that was left in flight previously
fld %st(0) // duplicate it
fmul %st(4),%st(0) // s = s/z * z
fxch %st(1)
fmul %st(3),%st(0) // t = t/z * z
fxch %st(1)
fistpl snext
fistpl tnext
movb (%esi),%al // load first texel in segment
movl C(tadjust),%ebx
movb %al,(%edi) // store first pixel in segment
movl C(sadjust),%eax
addl snext,%eax
addl tnext,%ebx
movl C(bbextents),%ebp
movl C(bbextentt),%edx
cmpl $4096,%eax
jl LClampLow4
cmpl %ebp,%eax
ja LClampHigh4
LClampReentry4:
movl %eax,snext
cmpl $4096,%ebx
jl LClampLow5
cmpl %edx,%ebx
ja LClampHigh5
LClampReentry5:
cmpl $1,%ecx // don't bother
je LOnlyOneStep // if two pixels in segment, there's only one step,
// of the segment length
subl s,%eax
subl t,%ebx
addl %eax,%eax // convert to 15.17 format so multiply by 1.31
addl %ebx,%ebx // reciprocal yields 16.48
imull reciprocal_table_16-8(,%ecx,4) // sstep = (snext - s) /
// (spancount-1)
movl %edx,%ebp
movl %ebx,%eax
imull reciprocal_table_16-8(,%ecx,4) // tstep = (tnext - t) /
// (spancount-1)
LSetEntryvec:
//
// set up advancetable
//
movl entryvec_table_16(,%ecx,4),%ebx
movl %edx,%eax
movl %ebx,jumptemp // entry point into code for RET later
movl %ebp,%ecx
sarl $16,%edx // tstep >>= 16;
movl C(cachewidth),%ebx
sarl $16,%ecx // sstep >>= 16;
imull %ebx,%edx
addl %ecx,%edx // add in sstep
// (tstep >> 16) * cachewidth + (sstep >> 16);
movl tfracf,%ecx
movl %edx,advancetable+4 // advance base in t
addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
// (sstep >> 16);
shll $16,%ebp // left-justify sstep fractional part
movl sfracf,%ebx
shll $16,%eax // left-justify tstep fractional part
movl %edx,advancetable // advance extra in t
movl %eax,tstep
movl %ecx,%edx
addl %eax,%edx
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
jmp *jumptemp // jump to the number-of-pixels handler
//----------------------------------------
LNoSteps:
movb (%esi),%al // load first texel in segment
subl $15,%edi // adjust for hardwired offset
jmp LEndSpan
LOnlyOneStep:
subl s,%eax
subl t,%ebx
movl %eax,%ebp
movl %ebx,%edx
jmp LSetEntryvec
//----------------------------------------
.globl Entry2_16, Entry3_16, Entry4_16, Entry5_16
.globl Entry6_16, Entry7_16, Entry8_16, Entry9_16
.globl Entry10_16, Entry11_16, Entry12_16, Entry13_16
.globl Entry14_16, Entry15_16, Entry16_16
Entry2_16:
subl $14,%edi // adjust for hardwired offsets
movb (%esi),%al
jmp LEntry2_16
//----------------------------------------
Entry3_16:
subl $13,%edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
jmp LEntry3_16
//----------------------------------------
Entry4_16:
subl $12,%edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
jmp LEntry4_16
//----------------------------------------
Entry5_16:
subl $11,%edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
jmp LEntry5_16
//----------------------------------------
Entry6_16:
subl $10,%edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
jmp LEntry6_16
//----------------------------------------
Entry7_16:
subl $9,%edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
jmp LEntry7_16
//----------------------------------------
Entry8_16:
subl $8,%edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
jmp LEntry8_16
//----------------------------------------
Entry9_16:
subl $7,%edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
jmp LEntry9_16
//----------------------------------------
Entry10_16:
subl $6,%edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
jmp LEntry10_16
//----------------------------------------
Entry11_16:
subl $5,%edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
jmp LEntry11_16
//----------------------------------------
Entry12_16:
subl $4,%edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
jmp LEntry12_16
//----------------------------------------
Entry13_16:
subl $3,%edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
jmp LEntry13_16
//----------------------------------------
Entry14_16:
subl $2,%edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
jmp LEntry14_16
//----------------------------------------
Entry15_16:
decl %edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
jmp LEntry15_16
//----------------------------------------
Entry16_16:
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
sbbl %ecx,%ecx
movb %al,1(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
LEntry15_16:
sbbl %ecx,%ecx
movb %al,2(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
LEntry14_16:
sbbl %ecx,%ecx
movb %al,3(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
LEntry13_16:
sbbl %ecx,%ecx
movb %al,4(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
LEntry12_16:
sbbl %ecx,%ecx
movb %al,5(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
LEntry11_16:
sbbl %ecx,%ecx
movb %al,6(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
LEntry10_16:
sbbl %ecx,%ecx
movb %al,7(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
LEntry9_16:
sbbl %ecx,%ecx
movb %al,8(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
LEntry8_16:
sbbl %ecx,%ecx
movb %al,9(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
LEntry7_16:
sbbl %ecx,%ecx
movb %al,10(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
LEntry6_16:
sbbl %ecx,%ecx
movb %al,11(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
LEntry5_16:
sbbl %ecx,%ecx
movb %al,12(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
LEntry4_16:
sbbl %ecx,%ecx
movb %al,13(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
LEntry3_16:
movb %al,14(%edi)
movb (%esi),%al
LEntry2_16:
LEndSpan:
//
// clear s/z, t/z, 1/z from FP stack
//
fstp %st(0)
fstp %st(0)
fstp %st(0)
movl pspantemp,%ebx // restore spans pointer
movl espan_t_pnext(%ebx),%ebx // point to next span
testl %ebx,%ebx // any more spans?
movb %al,15(%edi)
jnz LSpanLoop // more spans
popl %ebx // restore register variables
popl %esi
popl %edi
popl %ebp // restore the caller's stack frame
ret
#endif // id386

477
source/d_parta.S Normal file
View file

@ -0,0 +1,477 @@
/*
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//
// d_parta.s
// x86 assembly-language 8-bpp particle-drawing code.
//
#include "asm_i386.h"
#include "quakeasm.h"
#include "d_ifacea.h"
#include "asm_draw.h"
#if id386
//----------------------------------------------------------------------
// 8-bpp particle drawing code.
//----------------------------------------------------------------------
//FIXME: comments, full optimization
//----------------------------------------------------------------------
// 8-bpp particle queueing code.
//----------------------------------------------------------------------
.text
#define P 12+4
.align 4
.globl C(D_DrawParticle)
C(D_DrawParticle):
pushl %ebp // preserve caller's stack frame
pushl %edi // preserve register variables
pushl %ebx
movl P(%esp),%edi
// FIXME: better FP overlap in general here
// transform point
// VectorSubtract (p->org, r_origin, local);
flds C(r_origin)
fsubrs pt_org(%edi)
flds pt_org+4(%edi)
fsubs C(r_origin)+4
flds pt_org+8(%edi)
fsubs C(r_origin)+8
fxch %st(2) // local[0] | local[1] | local[2]
// transformed[2] = DotProduct(local, r_ppn);
flds C(r_ppn) // r_ppn[0] | local[0] | local[1] | local[2]
fmul %st(1),%st(0) // dot0 | local[0] | local[1] | local[2]
flds C(r_ppn)+4 // r_ppn[1] | dot0 | local[0] | local[1] | local[2]
fmul %st(3),%st(0) // dot1 | dot0 | local[0] | local[1] | local[2]
flds C(r_ppn)+8 // r_ppn[2] | dot1 | dot0 | local[0] |
// local[1] | local[2]
fmul %st(5),%st(0) // dot2 | dot1 | dot0 | local[0] | local[1] | local[2]
fxch %st(2) // dot0 | dot1 | dot2 | local[0] | local[1] | local[2]
faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] |
// local[2]
faddp %st(0),%st(1) // z | local[0] | local[1] | local[2]
fld %st(0) // z | z | local[0] | local[1] |
// local[2]
fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2]
fxch %st(1) // z | 1/z | local[0] | local[1] | local[2]
// if (transformed[2] < PARTICLE_Z_CLIP)
// return;
fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2]
fxch %st(3) // local[2] | local[0] | local[1] | 1/z
flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z
fmul %st(2),%st(0) // dot0 | local[2] | local[0] | local[1] | 1/z
flds C(r_pup)+4 // r_pup[1] | dot0 | local[2] | local[0] |
// local[1] | 1/z
fnstsw %ax
testb $1,%ah
jnz LPop6AndDone
// transformed[1] = DotProduct(local, r_pup);
fmul %st(4),%st(0) // dot1 | dot0 | local[2] | local[0] | local[1] | 1/z
flds C(r_pup)+8 // r_pup[2] | dot1 | dot0 | local[2] |
// local[0] | local[1] | 1/z
fmul %st(3),%st(0) // dot2 | dot1 | dot0 | local[2] | local[0] |
// local[1] | 1/z
fxch %st(2) // dot0 | dot1 | dot2 | local[2] | local[0] |
// local[1] | 1/z
faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] |
// local[1] | 1/z
faddp %st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z
fxch %st(3) // local[1] | local[2] | local[0] | y | 1/z
// transformed[0] = DotProduct(local, r_pright);
fmuls C(r_pright)+4 // dot1 | local[2] | local[0] | y | 1/z
fxch %st(2) // local[0] | local[2] | dot1 | y | 1/z
fmuls C(r_pright) // dot0 | local[2] | dot1 | y | 1/z
fxch %st(1) // local[2] | dot0 | dot1 | y | 1/z
fmuls C(r_pright)+8 // dot2 | dot0 | dot1 | y | 1/z
fxch %st(2) // dot1 | dot0 | dot2 | y | 1/z
faddp %st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z
faddp %st(0),%st(1) // x | y | 1/z
fxch %st(1) // y | x | 1/z
// project the point
fmul %st(2),%st(0) // y/z | x | 1/z
fxch %st(1) // x | y/z | 1/z
fmul %st(2),%st(0) // x/z | y/z | 1/z
fxch %st(1) // y/z | x/z | 1/z
fsubrs C(ycenter) // v | x/z | 1/z
fxch %st(1) // x/z | v | 1/z
fadds C(xcenter) // u | v | 1/z
// FIXME: preadjust xcenter and ycenter
fxch %st(1) // v | u | 1/z
fadds float_point5 // v | u | 1/z
fxch %st(1) // u | v | 1/z
fadds float_point5 // u | v | 1/z
fxch %st(2) // 1/z | v | u
fmuls DP_32768 // 1/z * 0x8000 | v | u
fxch %st(2) // u | v | 1/z * 0x8000
// FIXME: use Terje's fp->int trick here?
// FIXME: check we're getting proper rounding here
fistpl DP_u // v | 1/z * 0x8000
fistpl DP_v // 1/z * 0x8000
movl DP_u,%eax
movl DP_v,%edx
// if ((v > d_vrectbottom_particle) ||
// (u > d_vrectright_particle) ||
// (v < d_vrecty) ||
// (u < d_vrectx))
// {
// continue;
// }
movl C(d_vrectbottom_particle),%ebx
movl C(d_vrectright_particle),%ecx
cmpl %ebx,%edx
jg LPop1AndDone
cmpl %ecx,%eax
jg LPop1AndDone
movl C(d_vrecty),%ebx
movl C(d_vrectx),%ecx
cmpl %ebx,%edx
jl LPop1AndDone
cmpl %ecx,%eax
jl LPop1AndDone
flds pt_color(%edi) // color | 1/z * 0x8000
// FIXME: use Terje's fast fp->int trick?
fistpl DP_Color // 1/z * 0x8000
movl C(d_viewbuffer),%ebx
addl %eax,%ebx
movl C(d_scantable)(,%edx,4),%edi // point to the pixel
imull C(d_zrowbytes),%edx // point to the z pixel
leal (%edx,%eax,2),%edx
movl C(d_pzbuffer),%eax
fistpl izi
addl %ebx,%edi
addl %eax,%edx
// pix = izi >> d_pix_shift;
movl izi,%eax
movl C(d_pix_shift),%ecx
shrl %cl,%eax
movl izi,%ebp
// if (pix < d_pix_min)
// pix = d_pix_min;
// else if (pix > d_pix_max)
// pix = d_pix_max;
movl C(d_pix_min),%ebx
movl C(d_pix_max),%ecx
cmpl %ebx,%eax
jnl LTestPixMax
movl %ebx,%eax
jmp LTestDone
LTestPixMax:
cmpl %ecx,%eax
jng LTestDone
movl %ecx,%eax
LTestDone:
movb DP_Color,%ch
movl C(d_y_aspect_shift),%ebx
testl %ebx,%ebx
jnz LDefault
cmpl $4,%eax
ja LDefault
jmp DP_EntryTable-4(,%eax,4)
// 1x1
.globl DP_1x1
DP_1x1:
cmpw %bp,(%edx) // just one pixel to do
jg LDone
movw %bp,(%edx)
movb %ch,(%edi)
jmp LDone
// 2x2
.globl DP_2x2
DP_2x2:
pushl %esi
movl C(screenwidth),%ebx
movl C(d_zrowbytes),%esi
cmpw %bp,(%edx)
jg L2x2_1
movw %bp,(%edx)
movb %ch,(%edi)
L2x2_1:
cmpw %bp,2(%edx)
jg L2x2_2
movw %bp,2(%edx)
movb %ch,1(%edi)
L2x2_2:
cmpw %bp,(%edx,%esi,1)
jg L2x2_3
movw %bp,(%edx,%esi,1)
movb %ch,(%edi,%ebx,1)
L2x2_3:
cmpw %bp,2(%edx,%esi,1)
jg L2x2_4
movw %bp,2(%edx,%esi,1)
movb %ch,1(%edi,%ebx,1)
L2x2_4:
popl %esi
jmp LDone
// 3x3
.globl DP_3x3
DP_3x3:
pushl %esi
movl C(screenwidth),%ebx
movl C(d_zrowbytes),%esi
cmpw %bp,(%edx)
jg L3x3_1
movw %bp,(%edx)
movb %ch,(%edi)
L3x3_1:
cmpw %bp,2(%edx)
jg L3x3_2
movw %bp,2(%edx)
movb %ch,1(%edi)
L3x3_2:
cmpw %bp,4(%edx)
jg L3x3_3
movw %bp,4(%edx)
movb %ch,2(%edi)
L3x3_3:
cmpw %bp,(%edx,%esi,1)
jg L3x3_4
movw %bp,(%edx,%esi,1)
movb %ch,(%edi,%ebx,1)
L3x3_4:
cmpw %bp,2(%edx,%esi,1)
jg L3x3_5
movw %bp,2(%edx,%esi,1)
movb %ch,1(%edi,%ebx,1)
L3x3_5:
cmpw %bp,4(%edx,%esi,1)
jg L3x3_6
movw %bp,4(%edx,%esi,1)
movb %ch,2(%edi,%ebx,1)
L3x3_6:
cmpw %bp,(%edx,%esi,2)
jg L3x3_7
movw %bp,(%edx,%esi,2)
movb %ch,(%edi,%ebx,2)
L3x3_7:
cmpw %bp,2(%edx,%esi,2)
jg L3x3_8
movw %bp,2(%edx,%esi,2)
movb %ch,1(%edi,%ebx,2)
L3x3_8:
cmpw %bp,4(%edx,%esi,2)
jg L3x3_9
movw %bp,4(%edx,%esi,2)
movb %ch,2(%edi,%ebx,2)
L3x3_9:
popl %esi
jmp LDone
// 4x4
.globl DP_4x4
DP_4x4:
pushl %esi
movl C(screenwidth),%ebx
movl C(d_zrowbytes),%esi
cmpw %bp,(%edx)
jg L4x4_1
movw %bp,(%edx)
movb %ch,(%edi)
L4x4_1:
cmpw %bp,2(%edx)
jg L4x4_2
movw %bp,2(%edx)
movb %ch,1(%edi)
L4x4_2:
cmpw %bp,4(%edx)
jg L4x4_3
movw %bp,4(%edx)
movb %ch,2(%edi)
L4x4_3:
cmpw %bp,6(%edx)
jg L4x4_4
movw %bp,6(%edx)
movb %ch,3(%edi)
L4x4_4:
cmpw %bp,(%edx,%esi,1)
jg L4x4_5
movw %bp,(%edx,%esi,1)
movb %ch,(%edi,%ebx,1)
L4x4_5:
cmpw %bp,2(%edx,%esi,1)
jg L4x4_6
movw %bp,2(%edx,%esi,1)
movb %ch,1(%edi,%ebx,1)
L4x4_6:
cmpw %bp,4(%edx,%esi,1)
jg L4x4_7
movw %bp,4(%edx,%esi,1)
movb %ch,2(%edi,%ebx,1)
L4x4_7:
cmpw %bp,6(%edx,%esi,1)
jg L4x4_8
movw %bp,6(%edx,%esi,1)
movb %ch,3(%edi,%ebx,1)
L4x4_8:
leal (%edx,%esi,2),%edx
leal (%edi,%ebx,2),%edi
cmpw %bp,(%edx)
jg L4x4_9
movw %bp,(%edx)
movb %ch,(%edi)
L4x4_9:
cmpw %bp,2(%edx)
jg L4x4_10
movw %bp,2(%edx)
movb %ch,1(%edi)
L4x4_10:
cmpw %bp,4(%edx)
jg L4x4_11
movw %bp,4(%edx)
movb %ch,2(%edi)
L4x4_11:
cmpw %bp,6(%edx)
jg L4x4_12
movw %bp,6(%edx)
movb %ch,3(%edi)
L4x4_12:
cmpw %bp,(%edx,%esi,1)
jg L4x4_13
movw %bp,(%edx,%esi,1)
movb %ch,(%edi,%ebx,1)
L4x4_13:
cmpw %bp,2(%edx,%esi,1)
jg L4x4_14
movw %bp,2(%edx,%esi,1)
movb %ch,1(%edi,%ebx,1)
L4x4_14:
cmpw %bp,4(%edx,%esi,1)
jg L4x4_15
movw %bp,4(%edx,%esi,1)
movb %ch,2(%edi,%ebx,1)
L4x4_15:
cmpw %bp,6(%edx,%esi,1)
jg L4x4_16
movw %bp,6(%edx,%esi,1)
movb %ch,3(%edi,%ebx,1)
L4x4_16:
popl %esi
jmp LDone
// default case, handling any size particle
LDefault:
// count = pix << d_y_aspect_shift;
movl %eax,%ebx
movl %eax,DP_Pix
movb C(d_y_aspect_shift),%cl
shll %cl,%ebx
// for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
// {
// for (i=0 ; i<pix ; i++)
// {
// if (pz[i] <= izi)
// {
// pz[i] = izi;
// pdest[i] = color;
// }
// }
// }
LGenRowLoop:
movl DP_Pix,%eax
LGenColLoop:
cmpw %bp,-2(%edx,%eax,2)
jg LGSkip
movw %bp,-2(%edx,%eax,2)
movb %ch,-1(%edi,%eax,1)
LGSkip:
decl %eax // --pix
jnz LGenColLoop
addl C(d_zrowbytes),%edx
addl C(screenwidth),%edi
decl %ebx // --count
jnz LGenRowLoop
LDone:
popl %ebx // restore register variables
popl %edi
popl %ebp // restore the caller's stack frame
ret
LPop6AndDone:
fstp %st(0)
fstp %st(0)
fstp %st(0)
fstp %st(0)
fstp %st(0)
LPop1AndDone:
fstp %st(0)
jmp LDone
#endif // id386

1744
source/d_polysa.S Normal file

File diff suppressed because it is too large Load diff

89
source/d_scana.S Normal file
View file

@ -0,0 +1,89 @@
/*
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//
// d_scana.s
// x86 assembly-language turbulent texture mapping code
//
#include "asm_i386.h"
#include "quakeasm.h"
#include "asm_draw.h"
#include "d_ifacea.h"
#if id386
.data
.text
//----------------------------------------------------------------------
// turbulent texture mapping code
//----------------------------------------------------------------------
.align 4
.globl C(D_DrawTurbulent8Span)
C(D_DrawTurbulent8Span):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
movl C(r_turb_s),%esi
movl C(r_turb_t),%ecx
movl C(r_turb_pdest),%edi
movl C(r_turb_spancount),%ebx
Llp:
movl %ecx,%eax
movl %esi,%edx
sarl $16,%eax
movl C(r_turb_turb),%ebp
sarl $16,%edx
andl $(CYCLE-1),%eax
andl $(CYCLE-1),%edx
movl (%ebp,%eax,4),%eax
movl (%ebp,%edx,4),%edx
addl %esi,%eax
sarl $16,%eax
addl %ecx,%edx
sarl $16,%edx
andl $(TURB_TEX_SIZE-1),%eax
andl $(TURB_TEX_SIZE-1),%edx
shll $6,%edx
movl C(r_turb_pbase),%ebp
addl %eax,%edx
incl %edi
addl C(r_turb_sstep),%esi
addl C(r_turb_tstep),%ecx
movb (%ebp,%edx,1),%dl
decl %ebx
movb %dl,-1(%edi)
jnz Llp
movl %edi,C(r_turb_pdest)
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
#endif // id386

900
source/d_spr8.S Normal file
View file

@ -0,0 +1,900 @@
/*
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//
// d_spr8.s
// x86 assembly-language horizontal 8-bpp transparent span-drawing code.
//
#include "asm_i386.h"
#include "quakeasm.h"
#include "asm_draw.h"
#if id386
//----------------------------------------------------------------------
// 8-bpp horizontal span drawing code for polygons, with transparency.
//----------------------------------------------------------------------
.text
// out-of-line, rarely-needed clamping code
LClampHigh0:
movl C(bbextents),%esi
jmp LClampReentry0
LClampHighOrLow0:
jg LClampHigh0
xorl %esi,%esi
jmp LClampReentry0
LClampHigh1:
movl C(bbextentt),%edx
jmp LClampReentry1
LClampHighOrLow1:
jg LClampHigh1
xorl %edx,%edx
jmp LClampReentry1
LClampLow2:
movl $2048,%ebp
jmp LClampReentry2
LClampHigh2:
movl C(bbextents),%ebp
jmp LClampReentry2
LClampLow3:
movl $2048,%ecx
jmp LClampReentry3
LClampHigh3:
movl C(bbextentt),%ecx
jmp LClampReentry3
LClampLow4:
movl $2048,%eax
jmp LClampReentry4
LClampHigh4:
movl C(bbextents),%eax
jmp LClampReentry4
LClampLow5:
movl $2048,%ebx
jmp LClampReentry5
LClampHigh5:
movl C(bbextentt),%ebx
jmp LClampReentry5
#define pspans 4+16
.align 4
.globl C(D_SpriteDrawSpans)
C(D_SpriteDrawSpans):
pushl %ebp // preserve caller's stack frame
pushl %edi
pushl %esi // preserve register variables
pushl %ebx
//
// set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
// and span list pointers, and 1/z step in 0.32 fixed-point
//
// FIXME: any overlap from rearranging?
flds C(d_sdivzstepu)
fmuls fp_8
movl C(cacheblock),%edx
flds C(d_tdivzstepu)
fmuls fp_8
movl pspans(%esp),%ebx // point to the first span descriptor
flds C(d_zistepu)
fmuls fp_8
movl %edx,pbase // pbase = cacheblock
flds C(d_zistepu)
fmuls fp_64kx64k
fxch %st(3)
fstps sdivz8stepu
fstps zi8stepu
fstps tdivz8stepu
fistpl izistep
movl izistep,%eax
rorl $16,%eax // put upper 16 bits in low word
movl sspan_t_count(%ebx),%ecx
movl %eax,izistep
cmpl $0,%ecx
jle LNextSpan
LSpanLoop:
//
// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
// initial s and t values
//
// FIXME: pipeline FILD?
fildl sspan_t_v(%ebx)
fildl sspan_t_u(%ebx)
fld %st(1) // dv | du | dv
fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
fld %st(1) // du | dv*d_sdivzstepv | du | dv
fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
// dv*d_sdivzstepv | du | dv
fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
// dv*d_sdivzstepv | du | dv
faddp %st(0),%st(2) // du*d_tdivzstepu |
// du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
// du*d_tdivzstepu | du | dv
fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
// du*d_tdivzstepu | du | dv
fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
// du*d_sdivzstepu + dv*d_sdivzstepv |
// du*d_tdivzstepu | du | dv
fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
// dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
// du*d_sdivzstepu; stays in %st(2) at end
fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
// s/z
fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
// du*d_tdivzstepu | du | s/z
fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
// du*d_tdivzstepu | du | s/z
faddp %st(0),%st(2) // dv*d_zistepv |
// dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
// dv*d_zistepv | s/z
fmuls C(d_zistepu) // du*d_zistepu |
// dv*d_tdivzstepv + du*d_tdivzstepu |
// dv*d_zistepv | s/z
fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
// du*d_zistepu | dv*d_zistepv | s/z
fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
// du*d_tdivzstepu; stays in %st(1) at end
fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
// du*d_zistepu; stays in %st(0) at end
// 1/z | fp_64k | t/z | s/z
fld %st(0) // FIXME: get rid of stall on FMUL?
fmuls fp_64kx64k
fxch %st(1)
//
// calculate and clamp s & t
//
fdivr %st(0),%st(2) // 1/z | z*64k | t/z | s/z
fxch %st(1)
fistpl izi // 0.32 fixed-point 1/z
movl izi,%ebp
//
// set pz to point to the first z-buffer pixel in the span
//
rorl $16,%ebp // put upper 16 bits in low word
movl sspan_t_v(%ebx),%eax
movl %ebp,izi
movl sspan_t_u(%ebx),%ebp
imull C(d_zrowbytes)
shll $1,%ebp // a word per pixel
addl C(d_pzbuffer),%eax
addl %ebp,%eax
movl %eax,pz
//
// point %edi to the first pixel in the span
//
movl C(d_viewbuffer),%ebp
movl sspan_t_v(%ebx),%eax
pushl %ebx // preserve spans pointer
movl C(tadjust),%edx
movl C(sadjust),%esi
movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
addl %ebp,%edi
movl sspan_t_u(%ebx),%ebp
addl %ebp,%edi // pdest = &pdestspan[scans->u];
//
// now start the FDIV for the end of the span
//
cmpl $8,%ecx
ja LSetupNotLast1
decl %ecx
jz LCleanup1 // if only one pixel, no need to start an FDIV
movl %ecx,spancountminus1
// finish up the s and t calcs
fxch %st(1) // z*64k | 1/z | t/z | s/z
fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
fxch %st(1) // s | t | 1/z | t/z | s/z
fistpl s // 1/z | t | t/z | s/z
fistpl t // 1/z | t/z | s/z
fildl spancountminus1
flds C(d_tdivzstepu) // _d_tdivzstepu | spancountminus1
flds C(d_zistepu) // _d_zistepu | _d_tdivzstepu | spancountminus1
fmul %st(2),%st(0) // _d_zistepu*scm1 | _d_tdivzstepu | scm1
fxch %st(1) // _d_tdivzstepu | _d_zistepu*scm1 | scm1
fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
fxch %st(2) // scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
// _d_tdivzstepu*scm1
fxch %st(1) // _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
// _d_tdivzstepu*scm1
faddp %st(0),%st(3) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
faddp %st(0),%st(3) // _d_sdivzstepu*scm1
faddp %st(0),%st(3)
flds fp_64k
fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
// overlap
jmp LFDIVInFlight1
LCleanup1:
// finish up the s and t calcs
fxch %st(1) // z*64k | 1/z | t/z | s/z
fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
fxch %st(1) // s | t | 1/z | t/z | s/z
fistpl s // 1/z | t | t/z | s/z
fistpl t // 1/z | t/z | s/z
jmp LFDIVInFlight1
.align 4
LSetupNotLast1:
// finish up the s and t calcs
fxch %st(1) // z*64k | 1/z | t/z | s/z
fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
fxch %st(1) // s | t | 1/z | t/z | s/z
fistpl s // 1/z | t | t/z | s/z
fistpl t // 1/z | t/z | s/z
fadds zi8stepu
fxch %st(2)
fadds sdivz8stepu
fxch %st(2)
flds tdivz8stepu
faddp %st(0),%st(2)
flds fp_64k
fdiv %st(1),%st(0) // z = 1/1/z
// this is what we've gone to all this trouble to
// overlap
LFDIVInFlight1:
addl s,%esi
addl t,%edx
movl C(bbextents),%ebx
movl C(bbextentt),%ebp
cmpl %ebx,%esi
ja LClampHighOrLow0
LClampReentry0:
movl %esi,s
movl pbase,%ebx
shll $16,%esi
cmpl %ebp,%edx
movl %esi,sfracf
ja LClampHighOrLow1
LClampReentry1:
movl %edx,t
movl s,%esi // sfrac = scans->sfrac;
shll $16,%edx
movl t,%eax // tfrac = scans->tfrac;
sarl $16,%esi
movl %edx,tfracf
//
// calculate the texture starting address
//
sarl $16,%eax
addl %ebx,%esi
imull C(cachewidth),%eax // (tfrac >> 16) * cachewidth
addl %eax,%esi // psource = pbase + (sfrac >> 16) +
// ((tfrac >> 16) * cachewidth);
//
// determine whether last span or not
//
cmpl $8,%ecx
jna LLastSegment
//
// not the last segment; do full 8-wide segment
//
LNotLastSegment:
//
// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
// get there
//
// pick up after the FDIV that was left in flight previously
fld %st(0) // duplicate it
fmul %st(4),%st(0) // s = s/z * z
fxch %st(1)
fmul %st(3),%st(0) // t = t/z * z
fxch %st(1)
fistpl snext
fistpl tnext
movl snext,%eax
movl tnext,%edx
subl $8,%ecx // count off this segments' pixels
movl C(sadjust),%ebp
pushl %ecx // remember count of remaining pixels
movl C(tadjust),%ecx
addl %eax,%ebp
addl %edx,%ecx
movl C(bbextents),%eax
movl C(bbextentt),%edx
cmpl $2048,%ebp
jl LClampLow2
cmpl %eax,%ebp
ja LClampHigh2
LClampReentry2:
cmpl $2048,%ecx
jl LClampLow3
cmpl %edx,%ecx
ja LClampHigh3
LClampReentry3:
movl %ebp,snext
movl %ecx,tnext
subl s,%ebp
subl t,%ecx
//
// set up advancetable
//
movl %ecx,%eax
movl %ebp,%edx
sarl $19,%edx // sstep >>= 16;
movl C(cachewidth),%ebx
sarl $19,%eax // tstep >>= 16;
jz LIsZero
imull %ebx,%eax // (tstep >> 16) * cachewidth;
LIsZero:
addl %edx,%eax // add in sstep
// (tstep >> 16) * cachewidth + (sstep >> 16);
movl tfracf,%edx
movl %eax,advancetable+4 // advance base in t
addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
// (sstep >> 16);
shll $13,%ebp // left-justify sstep fractional part
movl %ebp,sstep
movl sfracf,%ebx
shll $13,%ecx // left-justify tstep fractional part
movl %eax,advancetable // advance extra in t
movl %ecx,tstep
movl pz,%ecx
movl izi,%ebp
cmpw (%ecx),%bp
jl Lp1
movb (%esi),%al // get first source texel
cmpb $(TRANSPARENT_COLOR),%al
jz Lp1
movw %bp,(%ecx)
movb %al,(%edi) // store first dest pixel
Lp1:
addl izistep,%ebp
adcl $0,%ebp
addl tstep,%edx // advance tfrac fractional part by tstep frac
sbbl %eax,%eax // turn tstep carry into -1 (0 if none)
addl sstep,%ebx // advance sfrac fractional part by sstep frac
adcl advancetable+4(,%eax,4),%esi // point to next source texel
cmpw 2(%ecx),%bp
jl Lp2
movb (%esi),%al
cmpb $(TRANSPARENT_COLOR),%al
jz Lp2
movw %bp,2(%ecx)
movb %al,1(%edi)
Lp2:
addl izistep,%ebp
adcl $0,%ebp
addl tstep,%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
cmpw 4(%ecx),%bp
jl Lp3
movb (%esi),%al
cmpb $(TRANSPARENT_COLOR),%al
jz Lp3
movw %bp,4(%ecx)
movb %al,2(%edi)
Lp3:
addl izistep,%ebp
adcl $0,%ebp
addl tstep,%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
cmpw 6(%ecx),%bp
jl Lp4
movb (%esi),%al
cmpb $(TRANSPARENT_COLOR),%al
jz Lp4
movw %bp,6(%ecx)
movb %al,3(%edi)
Lp4:
addl izistep,%ebp
adcl $0,%ebp
addl tstep,%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
cmpw 8(%ecx),%bp
jl Lp5
movb (%esi),%al
cmpb $(TRANSPARENT_COLOR),%al
jz Lp5
movw %bp,8(%ecx)
movb %al,4(%edi)
Lp5:
addl izistep,%ebp
adcl $0,%ebp
addl tstep,%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
//
// start FDIV for end of next segment in flight, so it can overlap
//
popl %eax
cmpl $8,%eax // more than one segment after this?
ja LSetupNotLast2 // yes
decl %eax
jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
movl %eax,spancountminus1
fildl spancountminus1
flds C(d_zistepu) // _d_zistepu | spancountminus1
fmul %st(1),%st(0) // _d_zistepu*scm1 | scm1
flds C(d_tdivzstepu) // _d_tdivzstepu | _d_zistepu*scm1 | scm1
fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
fxch %st(1) // _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
faddp %st(0),%st(3) // _d_tdivzstepu*scm1 | scm1
fxch %st(1) // scm1 | _d_tdivzstepu*scm1
fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
faddp %st(0),%st(3) // _d_sdivzstepu*scm1
flds fp_64k // 64k | _d_sdivzstepu*scm1
fxch %st(1) // _d_sdivzstepu*scm1 | 64k
faddp %st(0),%st(4) // 64k
fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
// overlap
jmp LFDIVInFlight2
.align 4
LSetupNotLast2:
fadds zi8stepu
fxch %st(2)
fadds sdivz8stepu
fxch %st(2)
flds tdivz8stepu
faddp %st(0),%st(2)
flds fp_64k
fdiv %st(1),%st(0) // z = 1/1/z
// this is what we've gone to all this trouble to
// overlap
LFDIVInFlight2:
pushl %eax
cmpw 10(%ecx),%bp
jl Lp6
movb (%esi),%al
cmpb $(TRANSPARENT_COLOR),%al
jz Lp6
movw %bp,10(%ecx)
movb %al,5(%edi)
Lp6:
addl izistep,%ebp
adcl $0,%ebp
addl tstep,%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
cmpw 12(%ecx),%bp
jl Lp7
movb (%esi),%al
cmpb $(TRANSPARENT_COLOR),%al
jz Lp7
movw %bp,12(%ecx)
movb %al,6(%edi)
Lp7:
addl izistep,%ebp
adcl $0,%ebp
addl tstep,%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
cmpw 14(%ecx),%bp
jl Lp8
movb (%esi),%al
cmpb $(TRANSPARENT_COLOR),%al
jz Lp8
movw %bp,14(%ecx)
movb %al,7(%edi)
Lp8:
addl izistep,%ebp
adcl $0,%ebp
addl tstep,%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
addl $8,%edi
addl $16,%ecx
movl %edx,tfracf
movl snext,%edx
movl %ebx,sfracf
movl tnext,%ebx
movl %edx,s
movl %ebx,t
movl %ecx,pz
movl %ebp,izi
popl %ecx // retrieve count
//
// determine whether last span or not
//
cmpl $8,%ecx // are there multiple segments remaining?
ja LNotLastSegment // yes
//
// last segment of scan
//
LLastSegment:
//
// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
// get there. The number of pixels left is variable, and we want to land on the
// last pixel, not step one past it, so we can't run into arithmetic problems
//
testl %ecx,%ecx
jz LNoSteps // just draw the last pixel and we're done
// pick up after the FDIV that was left in flight previously
fld %st(0) // duplicate it
fmul %st(4),%st(0) // s = s/z * z
fxch %st(1)
fmul %st(3),%st(0) // t = t/z * z
fxch %st(1)
fistpl snext
fistpl tnext
movl C(tadjust),%ebx
movl C(sadjust),%eax
addl snext,%eax
addl tnext,%ebx
movl C(bbextents),%ebp
movl C(bbextentt),%edx
cmpl $2048,%eax
jl LClampLow4
cmpl %ebp,%eax
ja LClampHigh4
LClampReentry4:
movl %eax,snext
cmpl $2048,%ebx
jl LClampLow5
cmpl %edx,%ebx
ja LClampHigh5
LClampReentry5:
cmpl $1,%ecx // don't bother
je LOnlyOneStep // if two pixels in segment, there's only one step,
// of the segment length
subl s,%eax
subl t,%ebx
addl %eax,%eax // convert to 15.17 format so multiply by 1.31
addl %ebx,%ebx // reciprocal yields 16.48
imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
movl %edx,%ebp
movl %ebx,%eax
imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
LSetEntryvec:
//
// set up advancetable
//
movl spr8entryvec_table(,%ecx,4),%ebx
movl %edx,%eax
pushl %ebx // entry point into code for RET later
movl %ebp,%ecx
sarl $16,%ecx // sstep >>= 16;
movl C(cachewidth),%ebx
sarl $16,%edx // tstep >>= 16;
jz LIsZeroLast
imull %ebx,%edx // (tstep >> 16) * cachewidth;
LIsZeroLast:
addl %ecx,%edx // add in sstep
// (tstep >> 16) * cachewidth + (sstep >> 16);
movl tfracf,%ecx
movl %edx,advancetable+4 // advance base in t
addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
// (sstep >> 16);
shll $16,%ebp // left-justify sstep fractional part
movl sfracf,%ebx
shll $16,%eax // left-justify tstep fractional part
movl %edx,advancetable // advance extra in t
movl %eax,tstep
movl %ebp,sstep
movl %ecx,%edx
movl pz,%ecx
movl izi,%ebp
ret // jump to the number-of-pixels handler
//----------------------------------------
LNoSteps:
movl pz,%ecx
subl $7,%edi // adjust for hardwired offset
subl $14,%ecx
jmp LEndSpan
LOnlyOneStep:
subl s,%eax
subl t,%ebx
movl %eax,%ebp
movl %ebx,%edx
jmp LSetEntryvec
//----------------------------------------
.globl Spr8Entry2_8
Spr8Entry2_8:
subl $6,%edi // adjust for hardwired offsets
subl $12,%ecx
movb (%esi),%al
jmp LLEntry2_8
//----------------------------------------
.globl Spr8Entry3_8
Spr8Entry3_8:
subl $5,%edi // adjust for hardwired offsets
subl $10,%ecx
jmp LLEntry3_8
//----------------------------------------
.globl Spr8Entry4_8
Spr8Entry4_8:
subl $4,%edi // adjust for hardwired offsets
subl $8,%ecx
jmp LLEntry4_8
//----------------------------------------
.globl Spr8Entry5_8
Spr8Entry5_8:
subl $3,%edi // adjust for hardwired offsets
subl $6,%ecx
jmp LLEntry5_8
//----------------------------------------
.globl Spr8Entry6_8
Spr8Entry6_8:
subl $2,%edi // adjust for hardwired offsets
subl $4,%ecx
jmp LLEntry6_8
//----------------------------------------
.globl Spr8Entry7_8
Spr8Entry7_8:
decl %edi // adjust for hardwired offsets
subl $2,%ecx
jmp LLEntry7_8
//----------------------------------------
.globl Spr8Entry8_8
Spr8Entry8_8:
cmpw (%ecx),%bp
jl Lp9
movb (%esi),%al
cmpb $(TRANSPARENT_COLOR),%al
jz Lp9
movw %bp,(%ecx)
movb %al,(%edi)
Lp9:
addl izistep,%ebp
adcl $0,%ebp
addl tstep,%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
LLEntry7_8:
cmpw 2(%ecx),%bp
jl Lp10
movb (%esi),%al
cmpb $(TRANSPARENT_COLOR),%al
jz Lp10
movw %bp,2(%ecx)
movb %al,1(%edi)
Lp10:
addl izistep,%ebp
adcl $0,%ebp
addl tstep,%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
LLEntry6_8:
cmpw 4(%ecx),%bp
jl Lp11
movb (%esi),%al
cmpb $(TRANSPARENT_COLOR),%al
jz Lp11
movw %bp,4(%ecx)
movb %al,2(%edi)
Lp11:
addl izistep,%ebp
adcl $0,%ebp
addl tstep,%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
LLEntry5_8:
cmpw 6(%ecx),%bp
jl Lp12
movb (%esi),%al
cmpb $(TRANSPARENT_COLOR),%al
jz Lp12
movw %bp,6(%ecx)
movb %al,3(%edi)
Lp12:
addl izistep,%ebp
adcl $0,%ebp
addl tstep,%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
LLEntry4_8:
cmpw 8(%ecx),%bp
jl Lp13
movb (%esi),%al
cmpb $(TRANSPARENT_COLOR),%al
jz Lp13
movw %bp,8(%ecx)
movb %al,4(%edi)
Lp13:
addl izistep,%ebp
adcl $0,%ebp
addl tstep,%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
LLEntry3_8:
cmpw 10(%ecx),%bp
jl Lp14
movb (%esi),%al
cmpb $(TRANSPARENT_COLOR),%al
jz Lp14
movw %bp,10(%ecx)
movb %al,5(%edi)
Lp14:
addl izistep,%ebp
adcl $0,%ebp
addl tstep,%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
LLEntry2_8:
cmpw 12(%ecx),%bp
jl Lp15
movb (%esi),%al
cmpb $(TRANSPARENT_COLOR),%al
jz Lp15
movw %bp,12(%ecx)
movb %al,6(%edi)
Lp15:
addl izistep,%ebp
adcl $0,%ebp
addl tstep,%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
LEndSpan:
cmpw 14(%ecx),%bp
jl Lp16
movb (%esi),%al // load first texel in segment
cmpb $(TRANSPARENT_COLOR),%al
jz Lp16
movw %bp,14(%ecx)
movb %al,7(%edi)
Lp16:
//
// clear s/z, t/z, 1/z from FP stack
//
fstp %st(0)
fstp %st(0)
fstp %st(0)
popl %ebx // restore spans pointer
LNextSpan:
addl $(sspan_t_size),%ebx // point to next span
movl sspan_t_count(%ebx),%ecx
cmpl $0,%ecx // any more spans?
jg LSpanLoop // yes
jz LNextSpan // yes, but this one's empty
popl %ebx // restore register variables
popl %esi
popl %edi
popl %ebp // restore the caller's stack frame
ret
#endif // id386

213
source/d_varsa.S Normal file
View file

@ -0,0 +1,213 @@
/*
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//
// d_varsa.s
//
#include "asm_i386.h"
#include "quakeasm.h"
#include "asm_draw.h"
#include "d_ifacea.h"
#if id386
.data
//-------------------------------------------------------
// global refresh variables
//-------------------------------------------------------
// FIXME: put all refresh variables into one contiguous block. Make into one
// big structure, like cl or sv?
.align 4
.globl C(d_sdivzstepu)
.globl C(d_tdivzstepu)
.globl C(d_zistepu)
.globl C(d_sdivzstepv)
.globl C(d_tdivzstepv)
.globl C(d_zistepv)
.globl C(d_sdivzorigin)
.globl C(d_tdivzorigin)
.globl C(d_ziorigin)
C(d_sdivzstepu): .single 0
C(d_tdivzstepu): .single 0
C(d_zistepu): .single 0
C(d_sdivzstepv): .single 0
C(d_tdivzstepv): .single 0
C(d_zistepv): .single 0
C(d_sdivzorigin): .single 0
C(d_tdivzorigin): .single 0
C(d_ziorigin): .single 0
.globl C(sadjust)
.globl C(tadjust)
.globl C(bbextents)
.globl C(bbextentt)
C(sadjust): .long 0
C(tadjust): .long 0
C(bbextents): .long 0
C(bbextentt): .long 0
.globl C(cacheblock)
.globl C(d_viewbuffer)
.globl C(cachewidth)
.globl C(d_pzbuffer)
.globl C(d_zrowbytes)
.globl C(d_zwidth)
C(cacheblock): .long 0
C(cachewidth): .long 0
C(d_viewbuffer): .long 0
C(d_pzbuffer): .long 0
C(d_zrowbytes): .long 0
C(d_zwidth): .long 0
//-------------------------------------------------------
// ASM-only variables
//-------------------------------------------------------
.globl izi
izi: .long 0
.globl pbase, s, t, sfracf, tfracf, snext, tnext
.globl spancountminus1, zi16stepu, sdivz16stepu, tdivz16stepu
.globl zi8stepu, sdivz8stepu, tdivz8stepu, pz
s: .long 0
t: .long 0
snext: .long 0
tnext: .long 0
sfracf: .long 0
tfracf: .long 0
pbase: .long 0
zi8stepu: .long 0
sdivz8stepu: .long 0
tdivz8stepu: .long 0
zi16stepu: .long 0
sdivz16stepu: .long 0
tdivz16stepu: .long 0
spancountminus1: .long 0
pz: .long 0
.globl izistep
izistep: .long 0
//-------------------------------------------------------
// local variables for d_draw16.s
//-------------------------------------------------------
.globl reciprocal_table_16, entryvec_table_16
// 1/2, 1/3, 1/4, 1/5, 1/6, 1/7, 1/8, 1/9, 1/10, 1/11, 1/12, 1/13,
// 1/14, and 1/15 in 0.32 form
reciprocal_table_16: .long 0x40000000, 0x2aaaaaaa, 0x20000000
.long 0x19999999, 0x15555555, 0x12492492
.long 0x10000000, 0xe38e38e, 0xccccccc, 0xba2e8ba
.long 0xaaaaaaa, 0x9d89d89, 0x9249249, 0x8888888
#ifndef NeXT
.extern Entry2_16
.extern Entry3_16
.extern Entry4_16
.extern Entry5_16
.extern Entry6_16
.extern Entry7_16
.extern Entry8_16
.extern Entry9_16
.extern Entry10_16
.extern Entry11_16
.extern Entry12_16
.extern Entry13_16
.extern Entry14_16
.extern Entry15_16
.extern Entry16_16
#endif
entryvec_table_16: .long 0, Entry2_16, Entry3_16, Entry4_16
.long Entry5_16, Entry6_16, Entry7_16, Entry8_16
.long Entry9_16, Entry10_16, Entry11_16, Entry12_16
.long Entry13_16, Entry14_16, Entry15_16, Entry16_16
//-------------------------------------------------------
// local variables for d_parta.s
//-------------------------------------------------------
.globl DP_Count, DP_u, DP_v, DP_32768, DP_Color, DP_Pix, DP_EntryTable
DP_Count: .long 0
DP_u: .long 0
DP_v: .long 0
DP_32768: .single 32768.0
DP_Color: .long 0
DP_Pix: .long 0
#ifndef NeXT
.extern DP_1x1
.extern DP_2x2
.extern DP_3x3
.extern DP_4x4
#endif
DP_EntryTable: .long DP_1x1, DP_2x2, DP_3x3, DP_4x4
//
// advancetable is 8 bytes, but points to the middle of that range so negative
// offsets will work
//
.globl advancetable, sstep, tstep, pspantemp, counttemp, jumptemp
advancetable: .long 0, 0
sstep: .long 0
tstep: .long 0
pspantemp: .long 0
counttemp: .long 0
jumptemp: .long 0
// 1/2, 1/3, 1/4, 1/5, 1/6, and 1/7 in 0.32 form
.globl reciprocal_table, entryvec_table
reciprocal_table: .long 0x40000000, 0x2aaaaaaa, 0x20000000
.long 0x19999999, 0x15555555, 0x12492492
#ifndef NeXT
.extern Entry2_8
.extern Entry3_8
.extern Entry4_8
.extern Entry5_8
.extern Entry6_8
.extern Entry7_8
.extern Entry8_8
#endif
entryvec_table: .long 0, Entry2_8, Entry3_8, Entry4_8
.long Entry5_8, Entry6_8, Entry7_8, Entry8_8
#ifndef NeXT
.extern Spr8Entry2_8
.extern Spr8Entry3_8
.extern Spr8Entry4_8
.extern Spr8Entry5_8
.extern Spr8Entry6_8
.extern Spr8Entry7_8
.extern Spr8Entry8_8
#endif
.globl spr8entryvec_table
spr8entryvec_table: .long 0, Spr8Entry2_8, Spr8Entry3_8, Spr8Entry4_8
.long Spr8Entry5_8, Spr8Entry6_8, Spr8Entry7_8, Spr8Entry8_8
#endif // id386

View file

@ -1,3 +1,22 @@
/*
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//
// math.s
// x86 assembly-language math routines.
@ -6,7 +25,7 @@
#include "quakeasm.h"
#if id386
#if id386
.data
@ -16,6 +35,73 @@ Ljmptab: .long Lcase0, Lcase1, Lcase2, Lcase3
.text
// TODO: rounding needed?
// stack parameter offset
#define val 4
.globl C(Invert24To16)
C(Invert24To16):
movl val(%esp),%ecx
movl $0x100,%edx // 0x10000000000 as dividend
cmpl %edx,%ecx
jle LOutOfRange
subl %eax,%eax
divl %ecx
ret
LOutOfRange:
movl $0xFFFFFFFF,%eax
ret
#define in 4
#define out 8
.align 2
.globl C(TransformVector)
C(TransformVector):
movl in(%esp),%eax
movl out(%esp),%edx
flds (%eax) // in[0]
fmuls C(vright) // in[0]*vright[0]
flds (%eax) // in[0] | in[0]*vright[0]
fmuls C(vup) // in[0]*vup[0] | in[0]*vright[0]
flds (%eax) // in[0] | in[0]*vup[0] | in[0]*vright[0]
fmuls C(vpn) // in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0]
flds 4(%eax) // in[1] | ...
fmuls C(vright)+4 // in[1]*vright[1] | ...
flds 4(%eax) // in[1] | in[1]*vright[1] | ...
fmuls C(vup)+4 // in[1]*vup[1] | in[1]*vright[1] | ...
flds 4(%eax) // in[1] | in[1]*vup[1] | in[1]*vright[1] | ...
fmuls C(vpn)+4 // in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ...
fxch %st(2) // in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ...
faddp %st(0),%st(5) // in[1]*vup[1] | in[1]*vpn[1] | ...
faddp %st(0),%st(3) // in[1]*vpn[1] | ...
faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum
flds 8(%eax) // in[2] | ...
fmuls C(vright)+8 // in[2]*vright[2] | ...
flds 8(%eax) // in[2] | in[2]*vright[2] | ...
fmuls C(vup)+8 // in[2]*vup[2] | in[2]*vright[2] | ...
flds 8(%eax) // in[2] | in[2]*vup[2] | in[2]*vright[2] | ...
fmuls C(vpn)+8 // in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ...
fxch %st(2) // in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ...
faddp %st(0),%st(5) // in[2]*vup[2] | in[2]*vpn[2] | ...
faddp %st(0),%st(3) // in[2]*vpn[2] | ...
faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum
fstps 8(%edx) // out[2]
fstps 4(%edx) // out[1]
fstps (%edx) // out[0]
ret
#define EMINS 4+4
#define EMAXS 4+8
@ -35,7 +121,7 @@ C(BoxOnPlaneSide):
jge Lerror
flds pl_normal(%edx) // p->normal[0]
fld %st(0) // p->normal[0] | p->normal[0]
jmp *Ljmptab(,%eax,4)
jmp Ljmptab(,%eax,4)
//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];

216
source/r_aclipa.S Normal file
View file

@ -0,0 +1,216 @@
/*
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//
// r_aliasa.s
// x86 assembly-language Alias model transform and project code.
//
#include "asm_i386.h"
#include "quakeasm.h"
#include "asm_draw.h"
#include "d_ifacea.h"
#if id386
.data
Ltemp0: .long 0
Ltemp1: .long 0
.text
#define pfv0 8+4
#define pfv1 8+8
#define out 8+12
.globl C(R_Alias_clip_bottom)
C(R_Alias_clip_bottom):
pushl %esi
pushl %edi
movl pfv0(%esp),%esi
movl pfv1(%esp),%edi
movl C(r_refdef)+rd_aliasvrectbottom,%eax
LDoForwardOrBackward:
movl fv_v+4(%esi),%edx
movl fv_v+4(%edi),%ecx
cmpl %ecx,%edx
jl LDoForward
movl fv_v+4(%esi),%ecx
movl fv_v+4(%edi),%edx
movl pfv0(%esp),%edi
movl pfv1(%esp),%esi
LDoForward:
subl %edx,%ecx
subl %edx,%eax
movl %ecx,Ltemp1
movl %eax,Ltemp0
fildl Ltemp1
fildl Ltemp0
movl out(%esp),%edx
movl $2,%eax
fdivp %st(0),%st(1) // scale
LDo3Forward:
fildl fv_v+0(%esi) // fv0v0 | scale
fildl fv_v+0(%edi) // fv1v0 | fv0v0 | scale
fildl fv_v+4(%esi) // fv0v1 | fv1v0 | fv0v0 | scale
fildl fv_v+4(%edi) // fv1v1 | fv0v1 | fv1v0 | fv0v0 | scale
fildl fv_v+8(%esi) // fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv0v0 | scale
fildl fv_v+8(%edi) // fv1v2 | fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv0v0 |
// scale
fxch %st(5) // fv0v0 | fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv1v2 |
// scale
fsubr %st(0),%st(4) // fv0v0 | fv0v2 | fv1v1 | fv0v1 | fv1v0-fv0v0 |
// fv1v2 | scale
fxch %st(3) // fv0v1 | fv0v2 | fv1v1 | fv0v0 | fv1v0-fv0v0 |
// fv1v2 | scale
fsubr %st(0),%st(2) // fv0v1 | fv0v2 | fv1v1-fv0v1 | fv0v0 |
// fv1v0-fv0v0 | fv1v2 | scale
fxch %st(1) // fv0v2 | fv0v1 | fv1v1-fv0v1 | fv0v0 |
// fv1v0-fv0v0 | fv1v2 | scale
fsubr %st(0),%st(5) // fv0v2 | fv0v1 | fv1v1-fv0v1 | fv0v0 |
// fv1v0-fv0v0 | fv1v2-fv0v2 | scale
fxch %st(6) // scale | fv0v1 | fv1v1-fv0v1 | fv0v0 |
// fv1v0-fv0v0 | fv1v2-fv0v2 | fv0v2
fmul %st(0),%st(4) // scale | fv0v1 | fv1v1-fv0v1 | fv0v0 |
// (fv1v0-fv0v0)*scale | fv1v2-fv0v2 | fv0v2
addl $12,%edi
fmul %st(0),%st(2) // scale | fv0v1 | (fv1v1-fv0v1)*scale | fv0v0 |
// (fv1v0-fv0v0)*scale | fv1v2-fv0v2 | fv0v2
addl $12,%esi
addl $12,%edx
fmul %st(0),%st(5) // scale | fv0v1 | (fv1v1-fv0v1)*scale | fv0v0 |
// (fv1v0-fv0v0)*scale | (fv1v2-fv0v2)*scale |
// fv0v2
fxch %st(3) // fv0v0 | fv0v1 | (fv1v1-fv0v1)*scale | scale |
// (fv1v0-fv0v0)*scale | (fv1v2-fv0v2)*scale |
// fv0v2
faddp %st(0),%st(4) // fv0v1 | (fv1v1-fv0v1)*scale | scale |
// fv0v0+(fv1v0-fv0v0)*scale |
// (fv1v2-fv0v2)*scale | fv0v2
faddp %st(0),%st(1) // fv0v1+(fv1v1-fv0v1)*scale | scale |
// fv0v0+(fv1v0-fv0v0)*scale |
// (fv1v2-fv0v2)*scale | fv0v2
fxch %st(4) // fv0v2 | scale | fv0v0+(fv1v0-fv0v0)*scale |
// (fv1v2-fv0v2)*scale | fv0v1+(fv1v1-fv0v1)*scale
faddp %st(0),%st(3) // scale | fv0v0+(fv1v0-fv0v0)*scale |
// fv0v2+(fv1v2-fv0v2)*scale |
// fv0v1+(fv1v1-fv0v1)*scale
fxch %st(1) // fv0v0+(fv1v0-fv0v0)*scale | scale |
// fv0v2+(fv1v2-fv0v2)*scale |
// fv0v1+(fv1v1-fv0v1)*scale
fadds float_point5
fxch %st(3) // fv0v1+(fv1v1-fv0v1)*scale | scale |
// fv0v2+(fv1v2-fv0v2)*scale |
// fv0v0+(fv1v0-fv0v0)*scale
fadds float_point5
fxch %st(2) // fv0v2+(fv1v2-fv0v2)*scale | scale |
// fv0v1+(fv1v1-fv0v1)*scale |
// fv0v0+(fv1v0-fv0v0)*scale
fadds float_point5
fxch %st(3) // fv0v0+(fv1v0-fv0v0)*scale | scale |
// fv0v1+(fv1v1-fv0v1)*scale |
// fv0v2+(fv1v2-fv0v2)*scale
fistpl fv_v+0-12(%edx) // scale | fv0v1+(fv1v1-fv0v1)*scale |
// fv0v2+(fv1v2-fv0v2)*scale
fxch %st(1) // fv0v1+(fv1v1-fv0v1)*scale | scale |
// fv0v2+(fv1v2-fv0v2)*scale | scale
fistpl fv_v+4-12(%edx) // scale | fv0v2+(fv1v2-fv0v2)*scale
fxch %st(1) // fv0v2+(fv1v2-fv0v2)*sc | scale
fistpl fv_v+8-12(%edx) // scale
decl %eax
jnz LDo3Forward
fstp %st(0)
popl %edi
popl %esi
ret
.globl C(R_Alias_clip_top)
C(R_Alias_clip_top):
pushl %esi
pushl %edi
movl pfv0(%esp),%esi
movl pfv1(%esp),%edi
movl C(r_refdef)+rd_aliasvrect+4,%eax
jmp LDoForwardOrBackward
.globl C(R_Alias_clip_right)
C(R_Alias_clip_right):
pushl %esi
pushl %edi
movl pfv0(%esp),%esi
movl pfv1(%esp),%edi
movl C(r_refdef)+rd_aliasvrectright,%eax
LRightLeftEntry:
movl fv_v+4(%esi),%edx
movl fv_v+4(%edi),%ecx
cmpl %ecx,%edx
movl fv_v+0(%esi),%edx
movl fv_v+0(%edi),%ecx
jl LDoForward2
movl fv_v+0(%esi),%ecx
movl fv_v+0(%edi),%edx
movl pfv0(%esp),%edi
movl pfv1(%esp),%esi
LDoForward2:
jmp LDoForward
.globl C(R_Alias_clip_left)
C(R_Alias_clip_left):
pushl %esi
pushl %edi
movl pfv0(%esp),%esi
movl pfv1(%esp),%edi
movl C(r_refdef)+rd_aliasvrect+0,%eax
jmp LRightLeftEntry
#endif // id386

237
source/r_aliasa.S Normal file
View file

@ -0,0 +1,237 @@
/*
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//
// r_aliasa.s
// x86 assembly-language Alias model transform and project code.
//
#include "asm_i386.h"
#include "quakeasm.h"
#include "asm_draw.h"
#include "d_ifacea.h"
#if id386
.data
Lfloat_1: .single 1.0
Ltemp: .long 0
Lcoords: .long 0, 0, 0
.text
#define fv 12+4
#define pstverts 12+8
.globl C(R_AliasTransformAndProjectFinalVerts)
C(R_AliasTransformAndProjectFinalVerts):
pushl %ebp // preserve caller's stack frame
pushl %edi
pushl %esi // preserve register variables
// int i, temp;
// float lightcos, *plightnormal, zi;
// trivertx_t *pverts;
// pverts = r_apverts;
movl C(r_apverts),%esi
// for (i=0 ; i<r_anumverts ; i++, fv++, pverts++, pstverts++)
// {
movl pstverts(%esp),%ebp
movl fv(%esp),%edi
movl C(r_anumverts),%ecx
subl %edx,%edx
Lloop:
// // transform and project
// zi = 1.0 / (DotProduct(pverts->v, aliastransform[2]) +
// aliastransform[2][3]);
movb (%esi),%dl
movb %dl,Lcoords
fildl Lcoords // v[0]
movb 1(%esi),%dl
movb %dl,Lcoords+4
fildl Lcoords+4 // v[1] | v[0]
movb 2(%esi),%dl
movb %dl,Lcoords+8
fildl Lcoords+8 // v[2] | v[1] | v[0]
fld %st(2) // v[0] | v[2] | v[1] | v[0]
fmuls C(aliastransform)+32 // accum | v[2] | v[1] | v[0]
fld %st(2) // v[1] | accum | v[2] | v[1] | v[0]
fmuls C(aliastransform)+36 // accum2 | accum | v[2] | v[1] | v[0]
fxch %st(1) // accum | accum2 | v[2] | v[1] | v[0]
fadds C(aliastransform)+44 // accum | accum2 | v[2] | v[1] | v[0]
fld %st(2) // v[2] | accum | accum2 | v[2] | v[1] | v[0]
fmuls C(aliastransform)+40 // accum3 | accum | accum2 | v[2] | v[1] |
// v[0]
fxch %st(1) // accum | accum3 | accum2 | v[2] | v[1] | v[0]
faddp %st(0),%st(2) // accum3 | accum | v[2] | v[1] | v[0]
movb tv_lightnormalindex(%esi),%dl
movl stv_s(%ebp),%eax
movl %eax,fv_v+8(%edi)
faddp %st(0),%st(1) // z | v[2] | v[1] | v[0]
movl stv_t(%ebp),%eax
movl %eax,fv_v+12(%edi)
// // lighting
// plightnormal = r_avertexnormals[pverts->lightnormalindex];
fdivrs Lfloat_1 // zi | v[2] | v[1] | v[0]
// fv->v[2] = pstverts->s;
// fv->v[3] = pstverts->t;
// fv->flags = pstverts->onseam;
movl stv_onseam(%ebp),%eax
movl %eax,fv_flags(%edi)
movl fv_size(%edi),%eax
movl stv_size(%ebp),%eax
movl 4(%esi),%eax
leal (%edx,%edx,2),%eax // index*3
fxch %st(3) // v[0] | v[2] | v[1] | zi
// lightcos = DotProduct (plightnormal, r_plightvec);
flds C(r_avertexnormals)(,%eax,4)
fmuls C(r_plightvec)
flds C(r_avertexnormals)+4(,%eax,4)
fmuls C(r_plightvec)+4
flds C(r_avertexnormals)+8(,%eax,4)
fmuls C(r_plightvec)+8
fxch %st(1)
faddp %st(0),%st(2)
fld %st(2) // v[0] | laccum | laccum2 | v[0] | v[2] |
// v[1] | zi
fmuls C(aliastransform)+0 // xaccum | laccum | laccum2 | v[0] | v[2] |
// v[1] | zi
fxch %st(2) // laccum2 | laccum | xaccum | v[0] | v[2] |
// v[1] | zi
faddp %st(0),%st(1) // laccum | xaccum | v[0] | v[2] | v[1] | zi
// temp = r_ambientlight;
// if (lightcos < 0)
// {
fsts Ltemp
movl C(r_ambientlight),%eax
movb Ltemp+3,%dl
testb $0x80,%dl
jz Lsavelight // no need to clamp if only ambient lit, because
// r_ambientlight is preclamped
// temp += (int)(r_shadelight * lightcos);
fmuls C(r_shadelight)
// FIXME: fast float->int conversion?
fistpl Ltemp
addl Ltemp,%eax
// // clamp; because we limited the minimum ambient and shading light, we
// // don't have to clamp low light, just bright
// if (temp < 0)
// temp = 0;
jns Lp1
subl %eax,%eax
// }
Lp1:
// fv->v[4] = temp;
//
// // x, y, and z are scaled down by 1/2**31 in the transform, so 1/z is
// // scaled up by 1/2**31, and the scaling cancels out for x and y in the
// // projection
// fv->v[0] = ((DotProduct(pverts->v, aliastransform[0]) +
// aliastransform[0][3]) * zi) + aliasxcenter;
// fv->v[1] = ((DotProduct(pverts->v, aliastransform[1]) +
// aliastransform[1][3]) * zi) + aliasycenter;
// fv->v[5] = zi;
fxch %st(1) // v[0] | xaccum | v[2] | v[1] | zi
fmuls C(aliastransform)+16 // yaccum | xaccum | v[2] | v[1] | zi
fxch %st(3) // v[1] | xaccum | v[2] | yaccum | zi
fld %st(0) // v[1] | v[1] | xaccum | v[2] | yaccum | zi
fmuls C(aliastransform)+4 // xaccum2 | v[1] | xaccum | v[2] | yaccum |zi
fxch %st(1) // v[1] | xaccum2 | xaccum | v[2] | yaccum |zi
movl %eax,fv_v+16(%edi)
fmuls C(aliastransform)+20 // yaccum2 | xaccum2 | xaccum | v[2] | yaccum|
// zi
fxch %st(2) // xaccum | xaccum2 | yaccum2 | v[2] | yaccum|
// zi
fadds C(aliastransform)+12 // xaccum | xaccum2 | yaccum2 | v[2] | yaccum|
// zi
fxch %st(4) // yaccum | xaccum2 | yaccum2 | v[2] | xaccum|
// zi
fadds C(aliastransform)+28 // yaccum | xaccum2 | yaccum2 | v[2] | xaccum|
// zi
fxch %st(3) // v[2] | xaccum2 | yaccum2 | yaccum | xaccum|
// zi
fld %st(0) // v[2] | v[2] | xaccum2 | yaccum2 | yaccum |
// xaccum | zi
fmuls C(aliastransform)+8 // xaccum3 | v[2] | xaccum2 | yaccum2 |yaccum|
// xaccum | zi
fxch %st(1) // v[2] | xaccum3 | xaccum2 | yaccum2 |yaccum|
// xaccum | zi
fmuls C(aliastransform)+24 // yaccum3 | xaccum3 | xaccum2 | yaccum2 |
// yaccum | xaccum | zi
fxch %st(5) // xaccum | xaccum3 | xaccum2 | yaccum2 |
// yaccum | yaccum3 | zi
faddp %st(0),%st(2) // xaccum3 | xaccum | yaccum2 | yaccum |
// yaccum3 | zi
fxch %st(3) // yaccum | xaccum | yaccum2 | xaccum3 |
// yaccum3 | zi
faddp %st(0),%st(2) // xaccum | yaccum | xaccum3 | yaccum3 | zi
addl $(tv_size),%esi
faddp %st(0),%st(2) // yaccum | x | yaccum3 | zi
faddp %st(0),%st(2) // x | y | zi
addl $(stv_size),%ebp
fmul %st(2),%st(0) // x/z | y | zi
fxch %st(1) // y | x/z | zi
fmul %st(2),%st(0) // y/z | x/z | zi
fxch %st(1) // x/z | y/z | zi
fadds C(aliasxcenter) // u | y/z | zi
fxch %st(1) // y/z | u | zi
fadds C(aliasycenter) // v | u | zi
fxch %st(2) // zi | u | v
// FIXME: fast float->int conversion?
fistpl fv_v+20(%edi) // u | v
fistpl fv_v+0(%edi) // v
fistpl fv_v+4(%edi)
// }
addl $(fv_size),%edi
decl %ecx
jnz Lloop
popl %esi // restore register variables
popl %edi
popl %ebp // restore the caller's stack frame
ret
Lsavelight:
fstp %st(0)
jmp Lp1
#endif // id386

838
source/r_drawa.S Normal file
View file

@ -0,0 +1,838 @@
/*
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//
// r_drawa.s
// x86 assembly-language edge clipping and emission code
//
#include "asm_i386.h"
#include "quakeasm.h"
#include "asm_draw.h"
#include "d_ifacea.h"
#if id386
// !!! if these are changed, they must be changed in r_draw.c too !!!
#define FULLY_CLIPPED_CACHED 0x80000000
#define FRAMECOUNT_MASK 0x7FFFFFFF
.data
Ld0: .single 0.0
Ld1: .single 0.0
Lstack: .long 0
Lfp_near_clip: .single NEAR_CLIP
Lceilv0: .long 0
Lv: .long 0
Lu0: .long 0
Lv0: .long 0
Lzi0: .long 0
.text
//----------------------------------------------------------------------
// edge clipping code
//----------------------------------------------------------------------
#define pv0 4+12
#define pv1 8+12
#define clip 12+12
.align 4
.globl C(R_ClipEdge)
C(R_ClipEdge):
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
movl %esp,Lstack // for clearing the stack later
// float d0, d1, f;
// mvertex_t clipvert;
movl clip(%esp),%ebx
movl pv0(%esp),%esi
movl pv1(%esp),%edx
// if (clip)
// {
testl %ebx,%ebx
jz Lemit
// do
// {
Lcliploop:
// d0 = DotProduct (pv0->position, clip->normal) - clip->dist;
// d1 = DotProduct (pv1->position, clip->normal) - clip->dist;
flds mv_position+0(%esi)
fmuls cp_normal+0(%ebx)
flds mv_position+4(%esi)
fmuls cp_normal+4(%ebx)
flds mv_position+8(%esi)
fmuls cp_normal+8(%ebx)
fxch %st(1)
faddp %st(0),%st(2) // d0mul2 | d0add0
flds mv_position+0(%edx)
fmuls cp_normal+0(%ebx)
flds mv_position+4(%edx)
fmuls cp_normal+4(%ebx)
flds mv_position+8(%edx)
fmuls cp_normal+8(%ebx)
fxch %st(1)
faddp %st(0),%st(2) // d1mul2 | d1add0 | d0mul2 | d0add0
fxch %st(3) // d0add0 | d1add0 | d0mul2 | d1mul2
faddp %st(0),%st(2) // d1add0 | dot0 | d1mul2
faddp %st(0),%st(2) // dot0 | dot1
fsubs cp_dist(%ebx) // d0 | dot1
fxch %st(1) // dot1 | d0
fsubs cp_dist(%ebx) // d1 | d0
fxch %st(1)
fstps Ld0
fstps Ld1
// if (d0 >= 0)
// {
movl Ld0,%eax
movl Ld1,%ecx
orl %eax,%ecx
js Lp2
// both points are unclipped
Lcontinue:
//
// R_ClipEdge (&clipvert, pv1, clip->next);
// return;
// }
// } while ((clip = clip->next) != NULL);
movl cp_next(%ebx),%ebx
testl %ebx,%ebx
jnz Lcliploop
// }
//// add the edge
// R_EmitEdge (pv0, pv1);
Lemit:
//
// set integer rounding to ceil mode, set to single precision
//
// FIXME: do away with by manually extracting integers from floats?
// FIXME: set less often
fldcw ceil_cw
// edge_t *edge, *pcheck;
// int u_check;
// float u, u_step;
// vec3_t local, transformed;
// float *world;
// int v, v2, ceilv0;
// float scale, lzi0, u0, v0;
// int side;
// if (r_lastvertvalid)
// {
cmpl $0,C(r_lastvertvalid)
jz LCalcFirst
// u0 = r_u1;
// v0 = r_v1;
// lzi0 = r_lzi1;
// ceilv0 = r_ceilv1;
movl C(r_lzi1),%eax
movl C(r_u1),%ecx
movl %eax,Lzi0
movl %ecx,Lu0
movl C(r_v1),%ecx
movl C(r_ceilv1),%eax
movl %ecx,Lv0
movl %eax,Lceilv0
jmp LCalcSecond
// }
LCalcFirst:
// else
// {
// world = &pv0->position[0];
call LTransformAndProject // v0 | lzi0 | u0
fsts Lv0
fxch %st(2) // u0 | lzi0 | v0
fstps Lu0 // lzi0 | v0
fstps Lzi0 // v0
// ceilv0 = (int)(v0 - 2000) + 2000; // ceil(v0);
fistpl Lceilv0
// }
LCalcSecond:
// world = &pv1->position[0];
movl %edx,%esi
call LTransformAndProject // v1 | lzi1 | u1
flds Lu0 // u0 | v1 | lzi1 | u1
fxch %st(3) // u1 | v1 | lzi1 | u0
flds Lzi0 // lzi0 | u1 | v1 | lzi1 | u0
fxch %st(3) // lzi1 | u1 | v1 | lzi0 | u0
flds Lv0 // v0 | lzi1 | u1 | v1 | lzi0 | u0
fxch %st(3) // v1 | lzi1 | u1 | v0 | lzi0 | u0
// r_ceilv1 = (int)(r_v1 - 2000) + 2000; // ceil(r_v1);
fistl C(r_ceilv1)
fldcw single_cw // put back normal floating-point state
fsts C(r_v1)
fxch %st(4) // lzi0 | lzi1 | u1 | v0 | v1 | u0
// if (r_lzi1 > lzi0)
// lzi0 = r_lzi1;
fcom %st(1)
fnstsw %ax
testb $1,%ah
jz LP0
fstp %st(0)
fld %st(0)
LP0:
fxch %st(1) // lzi1 | lzi0 | u1 | v0 | v1 | u0
fstps C(r_lzi1) // lzi0 | u1 | v0 | v1 | u0
fxch %st(1)
fsts C(r_u1)
fxch %st(1)
// if (lzi0 > r_nearzi) // for mipmap finding
// r_nearzi = lzi0;
fcoms C(r_nearzi)
fnstsw %ax
testb $0x45,%ah
jnz LP1
fsts C(r_nearzi)
LP1:
// // for right edges, all we want is the effect on 1/z
// if (r_nearzionly)
// return;
movl C(r_nearzionly),%eax
testl %eax,%eax
jz LP2
LPop5AndDone:
movl C(cacheoffset),%eax
movl C(r_framecount),%edx
cmpl $0x7FFFFFFF,%eax
jz LDoPop
andl $(FRAMECOUNT_MASK),%edx
orl $(FULLY_CLIPPED_CACHED),%edx
movl %edx,C(cacheoffset)
LDoPop:
fstp %st(0) // u1 | v0 | v1 | u0
fstp %st(0) // v0 | v1 | u0
fstp %st(0) // v1 | u0
fstp %st(0) // u0
fstp %st(0)
jmp Ldone
LP2:
// // create the edge
// if (ceilv0 == r_ceilv1)
// return; // horizontal edge
movl Lceilv0,%ebx
movl C(edge_p),%edi
movl C(r_ceilv1),%ecx
movl %edi,%edx
movl C(r_pedge),%esi
addl $(et_size),%edx
cmpl %ecx,%ebx
jz LPop5AndDone
movl C(r_pedge),%eax
movl %eax,et_owner(%edi)
// side = ceilv0 > r_ceilv1;
//
// edge->nearzi = lzi0;
fstps et_nearzi(%edi) // u1 | v0 | v1 | u0
// if (side == 1)
// {
jc LSide0
LSide1:
// // leading edge (go from p2 to p1)
// u_step = ((u0 - r_u1) / (v0 - r_v1));
fsubrp %st(0),%st(3) // v0 | v1 | u0-u1
fsub %st(1),%st(0) // v0-v1 | v1 | u0-u1
fdivrp %st(0),%st(2) // v1 | ustep
// r_emitted = 1;
movl $1,C(r_emitted)
// edge = edge_p++;
movl %edx,C(edge_p)
// pretouch next edge
movl (%edx),%eax
// v2 = ceilv0 - 1;
// v = r_ceilv1;
movl %ecx,%eax
leal -1(%ebx),%ecx
movl %eax,%ebx
// edge->surfs[0] = 0;
// edge->surfs[1] = surface_p - surfaces;
movl C(surface_p),%eax
movl C(surfaces),%esi
subl %edx,%edx
subl %esi,%eax
shrl $(SURF_T_SHIFT),%eax
movl %edx,et_surfs(%edi)
movl %eax,et_surfs+2(%edi)
subl %esi,%esi
// u = r_u1 + ((float)v - r_v1) * u_step;
movl %ebx,Lv
fildl Lv // v | v1 | ustep
fsubp %st(0),%st(1) // v-v1 | ustep
fmul %st(1),%st(0) // (v-v1)*ustep | ustep
fadds C(r_u1) // u | ustep
jmp LSideDone
// }
LSide0:
// else
// {
// // trailing edge (go from p1 to p2)
// u_step = ((r_u1 - u0) / (r_v1 - v0));
fsub %st(3),%st(0) // u1-u0 | v0 | v1 | u0
fxch %st(2) // v1 | v0 | u1-u0 | u0
fsub %st(1),%st(0) // v1-v0 | v0 | u1-u0 | u0
fdivrp %st(0),%st(2) // v0 | ustep | u0
// r_emitted = 1;
movl $1,C(r_emitted)
// edge = edge_p++;
movl %edx,C(edge_p)
// pretouch next edge
movl (%edx),%eax
// v = ceilv0;
// v2 = r_ceilv1 - 1;
decl %ecx
// edge->surfs[0] = surface_p - surfaces;
// edge->surfs[1] = 0;
movl C(surface_p),%eax
movl C(surfaces),%esi
subl %edx,%edx
subl %esi,%eax
shrl $(SURF_T_SHIFT),%eax
movl %edx,et_surfs+2(%edi)
movl %eax,et_surfs(%edi)
movl $1,%esi
// u = u0 + ((float)v - v0) * u_step;
movl %ebx,Lv
fildl Lv // v | v0 | ustep | u0
fsubp %st(0),%st(1) // v-v0 | ustep | u0
fmul %st(1),%st(0) // (v-v0)*ustep | ustep | u0
faddp %st(0),%st(2) // ustep | u
fxch %st(1) // u | ustep
// }
LSideDone:
// edge->u_step = u_step*0x100000;
// edge->u = u*0x100000 + 0xFFFFF;
fmuls fp_1m // u*0x100000 | ustep
fxch %st(1) // ustep | u*0x100000
fmuls fp_1m // ustep*0x100000 | u*0x100000
fxch %st(1) // u*0x100000 | ustep*0x100000
fadds fp_1m_minus_1 // u*0x100000 + 0xFFFFF | ustep*0x100000
fxch %st(1) // ustep*0x100000 | u*0x100000 + 0xFFFFF
fistpl et_u_step(%edi) // u*0x100000 + 0xFFFFF
fistpl et_u(%edi)
// // we need to do this to avoid stepping off the edges if a very nearly
// // horizontal edge is less than epsilon above a scan, and numeric error
// // causes it to incorrectly extend to the scan, and the extension of the
// // line goes off the edge of the screen
// // FIXME: is this actually needed?
// if (edge->u < r_refdef.vrect_x_adj_shift20)
// edge->u = r_refdef.vrect_x_adj_shift20;
// if (edge->u > r_refdef.vrectright_adj_shift20)
// edge->u = r_refdef.vrectright_adj_shift20;
movl et_u(%edi),%eax
movl C(r_refdef)+rd_vrect_x_adj_shift20,%edx
cmpl %edx,%eax
jl LP4
movl C(r_refdef)+rd_vrectright_adj_shift20,%edx
cmpl %edx,%eax
jng LP5
LP4:
movl %edx,et_u(%edi)
movl %edx,%eax
LP5:
// // sort the edge in normally
// u_check = edge->u;
//
// if (edge->surfs[0])
// u_check++; // sort trailers after leaders
addl %esi,%eax
// if (!newedges[v] || newedges[v]->u >= u_check)
// {
movl C(newedges)(,%ebx,4),%esi
testl %esi,%esi
jz LDoFirst
cmpl %eax,et_u(%esi)
jl LNotFirst
LDoFirst:
// edge->next = newedges[v];
// newedges[v] = edge;
movl %esi,et_next(%edi)
movl %edi,C(newedges)(,%ebx,4)
jmp LSetRemove
// }
LNotFirst:
// else
// {
// pcheck = newedges[v];
//
// while (pcheck->next && pcheck->next->u < u_check)
// pcheck = pcheck->next;
LFindInsertLoop:
movl %esi,%edx
movl et_next(%esi),%esi
testl %esi,%esi
jz LInsertFound
cmpl %eax,et_u(%esi)
jl LFindInsertLoop
LInsertFound:
// edge->next = pcheck->next;
// pcheck->next = edge;
movl %esi,et_next(%edi)
movl %edi,et_next(%edx)
// }
LSetRemove:
// edge->nextremove = removeedges[v2];
// removeedges[v2] = edge;
movl C(removeedges)(,%ecx,4),%eax
movl %edi,C(removeedges)(,%ecx,4)
movl %eax,et_nextremove(%edi)
Ldone:
movl Lstack,%esp // clear temporary variables from stack
popl %ebx // restore register variables
popl %edi
popl %esi
ret
// at least one point is clipped
Lp2:
testl %eax,%eax
jns Lp1
// else
// {
// // point 0 is clipped
// if (d1 < 0)
// {
movl Ld1,%eax
testl %eax,%eax
jns Lp3
// // both points are clipped
// // we do cache fully clipped edges
// if (!leftclipped)
movl C(r_leftclipped),%eax
movl C(r_pedge),%ecx
testl %eax,%eax
jnz Ldone
// r_pedge->framecount = r_framecount;
movl C(r_framecount),%eax
andl $(FRAMECOUNT_MASK),%eax
orl $(FULLY_CLIPPED_CACHED),%eax
movl %eax,C(cacheoffset)
// return;
jmp Ldone
// }
Lp1:
// // point 0 is unclipped
// if (d1 >= 0)
// {
// // both points are unclipped
// continue;
// // only point 1 is clipped
// f = d0 / (d0 - d1);
flds Ld0
flds Ld1
fsubr %st(1),%st(0)
// // we don't cache partially clipped edges
movl $0x7FFFFFFF,C(cacheoffset)
fdivrp %st(0),%st(1)
subl $(mv_size),%esp // allocate space for clipvert
// clipvert.position[0] = pv0->position[0] +
// f * (pv1->position[0] - pv0->position[0]);
// clipvert.position[1] = pv0->position[1] +
// f * (pv1->position[1] - pv0->position[1]);
// clipvert.position[2] = pv0->position[2] +
// f * (pv1->position[2] - pv0->position[2]);
flds mv_position+8(%edx)
fsubs mv_position+8(%esi)
flds mv_position+4(%edx)
fsubs mv_position+4(%esi)
flds mv_position+0(%edx)
fsubs mv_position+0(%esi) // 0 | 1 | 2
// replace pv1 with the clip point
movl %esp,%edx
movl cp_leftedge(%ebx),%eax
testb %al,%al
fmul %st(3),%st(0)
fxch %st(1) // 1 | 0 | 2
fmul %st(3),%st(0)
fxch %st(2) // 2 | 0 | 1
fmulp %st(0),%st(3) // 0 | 1 | 2
fadds mv_position+0(%esi)
fxch %st(1) // 1 | 0 | 2
fadds mv_position+4(%esi)
fxch %st(2) // 2 | 0 | 1
fadds mv_position+8(%esi)
fxch %st(1) // 0 | 2 | 1
fstps mv_position+0(%esp) // 2 | 1
fstps mv_position+8(%esp) // 1
fstps mv_position+4(%esp)
// if (clip->leftedge)
// {
jz Ltestright
// r_leftclipped = true;
// r_leftexit = clipvert;
movl $1,C(r_leftclipped)
movl mv_position+0(%esp),%eax
movl %eax,C(r_leftexit)+mv_position+0
movl mv_position+4(%esp),%eax
movl %eax,C(r_leftexit)+mv_position+4
movl mv_position+8(%esp),%eax
movl %eax,C(r_leftexit)+mv_position+8
jmp Lcontinue
// }
Ltestright:
// else if (clip->rightedge)
// {
testb %ah,%ah
jz Lcontinue
// r_rightclipped = true;
// r_rightexit = clipvert;
movl $1,C(r_rightclipped)
movl mv_position+0(%esp),%eax
movl %eax,C(r_rightexit)+mv_position+0
movl mv_position+4(%esp),%eax
movl %eax,C(r_rightexit)+mv_position+4
movl mv_position+8(%esp),%eax
movl %eax,C(r_rightexit)+mv_position+8
// }
//
// R_ClipEdge (pv0, &clipvert, clip->next);
// return;
// }
jmp Lcontinue
// }
Lp3:
// // only point 0 is clipped
// r_lastvertvalid = false;
movl $0,C(r_lastvertvalid)
// f = d0 / (d0 - d1);
flds Ld0
flds Ld1
fsubr %st(1),%st(0)
// // we don't cache partially clipped edges
movl $0x7FFFFFFF,C(cacheoffset)
fdivrp %st(0),%st(1)
subl $(mv_size),%esp // allocate space for clipvert
// clipvert.position[0] = pv0->position[0] +
// f * (pv1->position[0] - pv0->position[0]);
// clipvert.position[1] = pv0->position[1] +
// f * (pv1->position[1] - pv0->position[1]);
// clipvert.position[2] = pv0->position[2] +
// f * (pv1->position[2] - pv0->position[2]);
flds mv_position+8(%edx)
fsubs mv_position+8(%esi)
flds mv_position+4(%edx)
fsubs mv_position+4(%esi)
flds mv_position+0(%edx)
fsubs mv_position+0(%esi) // 0 | 1 | 2
movl cp_leftedge(%ebx),%eax
testb %al,%al
fmul %st(3),%st(0)
fxch %st(1) // 1 | 0 | 2
fmul %st(3),%st(0)
fxch %st(2) // 2 | 0 | 1
fmulp %st(0),%st(3) // 0 | 1 | 2
fadds mv_position+0(%esi)
fxch %st(1) // 1 | 0 | 2
fadds mv_position+4(%esi)
fxch %st(2) // 2 | 0 | 1
fadds mv_position+8(%esi)
fxch %st(1) // 0 | 2 | 1
fstps mv_position+0(%esp) // 2 | 1
fstps mv_position+8(%esp) // 1
fstps mv_position+4(%esp)
// replace pv0 with the clip point
movl %esp,%esi
// if (clip->leftedge)
// {
jz Ltestright2
// r_leftclipped = true;
// r_leftenter = clipvert;
movl $1,C(r_leftclipped)
movl mv_position+0(%esp),%eax
movl %eax,C(r_leftenter)+mv_position+0
movl mv_position+4(%esp),%eax
movl %eax,C(r_leftenter)+mv_position+4
movl mv_position+8(%esp),%eax
movl %eax,C(r_leftenter)+mv_position+8
jmp Lcontinue
// }
Ltestright2:
// else if (clip->rightedge)
// {
testb %ah,%ah
jz Lcontinue
// r_rightclipped = true;
// r_rightenter = clipvert;
movl $1,C(r_rightclipped)
movl mv_position+0(%esp),%eax
movl %eax,C(r_rightenter)+mv_position+0
movl mv_position+4(%esp),%eax
movl %eax,C(r_rightenter)+mv_position+4
movl mv_position+8(%esp),%eax
movl %eax,C(r_rightenter)+mv_position+8
// }
jmp Lcontinue
// %esi = vec3_t point to transform and project
// %edx preserved
LTransformAndProject:
// // transform and project
// VectorSubtract (world, modelorg, local);
flds mv_position+0(%esi)
fsubs C(modelorg)+0
flds mv_position+4(%esi)
fsubs C(modelorg)+4
flds mv_position+8(%esi)
fsubs C(modelorg)+8
fxch %st(2) // local[0] | local[1] | local[2]
// TransformVector (local, transformed);
//
// if (transformed[2] < NEAR_CLIP)
// transformed[2] = NEAR_CLIP;
//
// lzi0 = 1.0 / transformed[2];
fld %st(0) // local[0] | local[0] | local[1] | local[2]
fmuls C(vpn)+0 // zm0 | local[0] | local[1] | local[2]
fld %st(1) // local[0] | zm0 | local[0] | local[1] |
// local[2]
fmuls C(vright)+0 // xm0 | zm0 | local[0] | local[1] | local[2]
fxch %st(2) // local[0] | zm0 | xm0 | local[1] | local[2]
fmuls C(vup)+0 // ym0 | zm0 | xm0 | local[1] | local[2]
fld %st(3) // local[1] | ym0 | zm0 | xm0 | local[1] |
// local[2]
fmuls C(vpn)+4 // zm1 | ym0 | zm0 | xm0 | local[1] |
// local[2]
fld %st(4) // local[1] | zm1 | ym0 | zm0 | xm0 |
// local[1] | local[2]
fmuls C(vright)+4 // xm1 | zm1 | ym0 | zm0 | xm0 |
// local[1] | local[2]
fxch %st(5) // local[1] | zm1 | ym0 | zm0 | xm0 |
// xm1 | local[2]
fmuls C(vup)+4 // ym1 | zm1 | ym0 | zm0 | xm0 |
// xm1 | local[2]
fxch %st(1) // zm1 | ym1 | ym0 | zm0 | xm0 |
// xm1 | local[2]
faddp %st(0),%st(3) // ym1 | ym0 | zm2 | xm0 | xm1 | local[2]
fxch %st(3) // xm0 | ym0 | zm2 | ym1 | xm1 | local[2]
faddp %st(0),%st(4) // ym0 | zm2 | ym1 | xm2 | local[2]
faddp %st(0),%st(2) // zm2 | ym2 | xm2 | local[2]
fld %st(3) // local[2] | zm2 | ym2 | xm2 | local[2]
fmuls C(vpn)+8 // zm3 | zm2 | ym2 | xm2 | local[2]
fld %st(4) // local[2] | zm3 | zm2 | ym2 | xm2 | local[2]
fmuls C(vright)+8 // xm3 | zm3 | zm2 | ym2 | xm2 | local[2]
fxch %st(5) // local[2] | zm3 | zm2 | ym2 | xm2 | xm3
fmuls C(vup)+8 // ym3 | zm3 | zm2 | ym2 | xm2 | xm3
fxch %st(1) // zm3 | ym3 | zm2 | ym2 | xm2 | xm3
faddp %st(0),%st(2) // ym3 | zm4 | ym2 | xm2 | xm3
fxch %st(4) // xm3 | zm4 | ym2 | xm2 | ym3
faddp %st(0),%st(3) // zm4 | ym2 | xm4 | ym3
fxch %st(1) // ym2 | zm4 | xm4 | ym3
faddp %st(0),%st(3) // zm4 | xm4 | ym4
fcoms Lfp_near_clip
fnstsw %ax
testb $1,%ah
jz LNoClip
fstp %st(0)
flds Lfp_near_clip
LNoClip:
fdivrs float_1 // lzi0 | x | y
fxch %st(1) // x | lzi0 | y
// // FIXME: build x/yscale into transform?
// scale = xscale * lzi0;
// u0 = (xcenter + scale*transformed[0]);
flds C(xscale) // xscale | x | lzi0 | y
fmul %st(2),%st(0) // scale | x | lzi0 | y
fmulp %st(0),%st(1) // scale*x | lzi0 | y
fadds C(xcenter) // u0 | lzi0 | y
// if (u0 < r_refdef.fvrectx_adj)
// u0 = r_refdef.fvrectx_adj;
// if (u0 > r_refdef.fvrectright_adj)
// u0 = r_refdef.fvrectright_adj;
// FIXME: use integer compares of floats?
fcoms C(r_refdef)+rd_fvrectx_adj
fnstsw %ax
testb $1,%ah
jz LClampP0
fstp %st(0)
flds C(r_refdef)+rd_fvrectx_adj
LClampP0:
fcoms C(r_refdef)+rd_fvrectright_adj
fnstsw %ax
testb $0x45,%ah
jnz LClampP1
fstp %st(0)
flds C(r_refdef)+rd_fvrectright_adj
LClampP1:
fld %st(1) // lzi0 | u0 | lzi0 | y
// scale = yscale * lzi0;
// v0 = (ycenter - scale*transformed[1]);
fmuls C(yscale) // scale | u0 | lzi0 | y
fmulp %st(0),%st(3) // u0 | lzi0 | scale*y
fxch %st(2) // scale*y | lzi0 | u0
fsubrs C(ycenter) // v0 | lzi0 | u0
// if (v0 < r_refdef.fvrecty_adj)
// v0 = r_refdef.fvrecty_adj;
// if (v0 > r_refdef.fvrectbottom_adj)
// v0 = r_refdef.fvrectbottom_adj;
// FIXME: use integer compares of floats?
fcoms C(r_refdef)+rd_fvrecty_adj
fnstsw %ax
testb $1,%ah
jz LClampP2
fstp %st(0)
flds C(r_refdef)+rd_fvrecty_adj
LClampP2:
fcoms C(r_refdef)+rd_fvrectbottom_adj
fnstsw %ax
testb $0x45,%ah
jnz LClampP3
fstp %st(0)
flds C(r_refdef)+rd_fvrectbottom_adj
LClampP3:
ret
#endif // id386

750
source/r_edgea.S Normal file
View file

@ -0,0 +1,750 @@
/*
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//
// r_edgea.s
// x86 assembly-language edge-processing code.
//
#include "asm_i386.h"
#include "quakeasm.h"
#include "asm_draw.h"
#if id386
.data
Ltemp: .long 0
float_1_div_0100000h: .long 0x35800000 // 1.0/(float)0x100000
float_point_999: .single 0.999
float_1_point_001: .single 1.001
.text
//--------------------------------------------------------------------
#define edgestoadd 4+8 // note odd stack offsets because of interleaving
#define edgelist 8+12 // with pushes
.globl C(R_EdgeCodeStart)
C(R_EdgeCodeStart):
.globl C(R_InsertNewEdges)
C(R_InsertNewEdges):
pushl %edi
pushl %esi // preserve register variables
movl edgestoadd(%esp),%edx
pushl %ebx
movl edgelist(%esp),%ecx
LDoNextEdge:
movl et_u(%edx),%eax
movl %edx,%edi
LContinueSearch:
movl et_u(%ecx),%ebx
movl et_next(%ecx),%esi
cmpl %ebx,%eax
jle LAddedge
movl et_u(%esi),%ebx
movl et_next(%esi),%ecx
cmpl %ebx,%eax
jle LAddedge2
movl et_u(%ecx),%ebx
movl et_next(%ecx),%esi
cmpl %ebx,%eax
jle LAddedge
movl et_u(%esi),%ebx
movl et_next(%esi),%ecx
cmpl %ebx,%eax
jg LContinueSearch
LAddedge2:
movl et_next(%edx),%edx
movl et_prev(%esi),%ebx
movl %esi,et_next(%edi)
movl %ebx,et_prev(%edi)
movl %edi,et_next(%ebx)
movl %edi,et_prev(%esi)
movl %esi,%ecx
cmpl $0,%edx
jnz LDoNextEdge
jmp LDone
.align 4
LAddedge:
movl et_next(%edx),%edx
movl et_prev(%ecx),%ebx
movl %ecx,et_next(%edi)
movl %ebx,et_prev(%edi)
movl %edi,et_next(%ebx)
movl %edi,et_prev(%ecx)
cmpl $0,%edx
jnz LDoNextEdge
LDone:
popl %ebx // restore register variables
popl %esi
popl %edi
ret
//--------------------------------------------------------------------
#define predge 4+4
.globl C(R_RemoveEdges)
C(R_RemoveEdges):
pushl %ebx
movl predge(%esp),%eax
Lre_loop:
movl et_next(%eax),%ecx
movl et_nextremove(%eax),%ebx
movl et_prev(%eax),%edx
testl %ebx,%ebx
movl %edx,et_prev(%ecx)
jz Lre_done
movl %ecx,et_next(%edx)
movl et_next(%ebx),%ecx
movl et_prev(%ebx),%edx
movl et_nextremove(%ebx),%eax
movl %edx,et_prev(%ecx)
testl %eax,%eax
movl %ecx,et_next(%edx)
jnz Lre_loop
popl %ebx
ret
Lre_done:
movl %ecx,et_next(%edx)
popl %ebx
ret
//--------------------------------------------------------------------
#define pedgelist 4+4 // note odd stack offset because of interleaving
// with pushes
.globl C(R_StepActiveU)
C(R_StepActiveU):
pushl %edi
movl pedgelist(%esp),%edx
pushl %esi // preserve register variables
pushl %ebx
movl et_prev(%edx),%esi
LNewEdge:
movl et_u(%esi),%edi
LNextEdge:
movl et_u(%edx),%eax
movl et_u_step(%edx),%ebx
addl %ebx,%eax
movl et_next(%edx),%esi
movl %eax,et_u(%edx)
cmpl %edi,%eax
jl LPushBack
movl et_u(%esi),%edi
movl et_u_step(%esi),%ebx
addl %ebx,%edi
movl et_next(%esi),%edx
movl %edi,et_u(%esi)
cmpl %eax,%edi
jl LPushBack2
movl et_u(%edx),%eax
movl et_u_step(%edx),%ebx
addl %ebx,%eax
movl et_next(%edx),%esi
movl %eax,et_u(%edx)
cmpl %edi,%eax
jl LPushBack
movl et_u(%esi),%edi
movl et_u_step(%esi),%ebx
addl %ebx,%edi
movl et_next(%esi),%edx
movl %edi,et_u(%esi)
cmpl %eax,%edi
jnl LNextEdge
LPushBack2:
movl %edx,%ebx
movl %edi,%eax
movl %esi,%edx
movl %ebx,%esi
LPushBack:
// push it back to keep it sorted
movl et_prev(%edx),%ecx
movl et_next(%edx),%ebx
// done if the -1 in edge_aftertail triggered this
cmpl $(C(edge_aftertail)),%edx
jz LUDone
// pull the edge out of the edge list
movl et_prev(%ecx),%edi
movl %ecx,et_prev(%esi)
movl %ebx,et_next(%ecx)
// find out where the edge goes in the edge list
LPushBackLoop:
movl et_prev(%edi),%ecx
movl et_u(%edi),%ebx
cmpl %ebx,%eax
jnl LPushBackFound
movl et_prev(%ecx),%edi
movl et_u(%ecx),%ebx
cmpl %ebx,%eax
jl LPushBackLoop
movl %ecx,%edi
// put the edge back into the edge list
LPushBackFound:
movl et_next(%edi),%ebx
movl %edi,et_prev(%edx)
movl %ebx,et_next(%edx)
movl %edx,et_next(%edi)
movl %edx,et_prev(%ebx)
movl %esi,%edx
movl et_prev(%esi),%esi
cmpl $(C(edge_tail)),%edx
jnz LNewEdge
LUDone:
popl %ebx // restore register variables
popl %esi
popl %edi
ret
//--------------------------------------------------------------------
#define surf 4 // note this is loaded before any pushes
.align 4
TrailingEdge:
movl st_spanstate(%esi),%eax // check for edge inversion
decl %eax
jnz LInverted
movl %eax,st_spanstate(%esi)
movl st_insubmodel(%esi),%ecx
movl 0x12345678,%edx // surfaces[1].st_next
LPatch0:
movl C(r_bmodelactive),%eax
subl %ecx,%eax
cmpl %esi,%edx
movl %eax,C(r_bmodelactive)
jnz LNoEmit // surface isn't on top, just remove
// emit a span (current top going away)
movl et_u(%ebx),%eax
shrl $20,%eax // iu = integral pixel u
movl st_last_u(%esi),%edx
movl st_next(%esi),%ecx
cmpl %edx,%eax
jle LNoEmit2 // iu <= surf->last_u, so nothing to emit
movl %eax,st_last_u(%ecx) // surf->next->last_u = iu;
subl %edx,%eax
movl %edx,espan_t_u(%ebp) // span->u = surf->last_u;
movl %eax,espan_t_count(%ebp) // span->count = iu - span->u;
movl C(current_iv),%eax
movl %eax,espan_t_v(%ebp) // span->v = current_iv;
movl st_spans(%esi),%eax
movl %eax,espan_t_pnext(%ebp) // span->pnext = surf->spans;
movl %ebp,st_spans(%esi) // surf->spans = span;
addl $(espan_t_size),%ebp
movl st_next(%esi),%edx // remove the surface from the surface
movl st_prev(%esi),%esi // stack
movl %edx,st_next(%esi)
movl %esi,st_prev(%edx)
ret
LNoEmit2:
movl %eax,st_last_u(%ecx) // surf->next->last_u = iu;
movl st_next(%esi),%edx // remove the surface from the surface
movl st_prev(%esi),%esi // stack
movl %edx,st_next(%esi)
movl %esi,st_prev(%edx)
ret
LNoEmit:
movl st_next(%esi),%edx // remove the surface from the surface
movl st_prev(%esi),%esi // stack
movl %edx,st_next(%esi)
movl %esi,st_prev(%edx)
ret
LInverted:
movl %eax,st_spanstate(%esi)
ret
//--------------------------------------------------------------------
// trailing edge only
Lgs_trailing:
pushl $Lgs_nextedge
jmp TrailingEdge
.globl C(R_GenerateSpans)
C(R_GenerateSpans):
pushl %ebp // preserve caller's stack frame
pushl %edi
pushl %esi // preserve register variables
pushl %ebx
// clear active surfaces to just the background surface
movl C(surfaces),%eax
movl C(edge_head_u_shift20),%edx
addl $(st_size),%eax
// %ebp = span_p throughout
movl C(span_p),%ebp
movl $0,C(r_bmodelactive)
movl %eax,st_next(%eax)
movl %eax,st_prev(%eax)
movl %edx,st_last_u(%eax)
movl C(edge_head)+et_next,%ebx // edge=edge_head.next
// generate spans
cmpl $(C(edge_tail)),%ebx // done if empty list
jz Lgs_lastspan
Lgs_edgeloop:
movl et_surfs(%ebx),%edi
movl C(surfaces),%eax
movl %edi,%esi
andl $0xFFFF0000,%edi
andl $0xFFFF,%esi
jz Lgs_leading // not a trailing edge
// it has a left surface, so a surface is going away for this span
shll $(SURF_T_SHIFT),%esi
addl %eax,%esi
testl %edi,%edi
jz Lgs_trailing
// both leading and trailing
call TrailingEdge
movl C(surfaces),%eax
// ---------------------------------------------------------------
// handle a leading edge
// ---------------------------------------------------------------
Lgs_leading:
shrl $16-SURF_T_SHIFT,%edi
movl C(surfaces),%eax
addl %eax,%edi
movl 0x12345678,%esi // surf2 = surfaces[1].next;
LPatch2:
movl st_spanstate(%edi),%edx
movl st_insubmodel(%edi),%eax
testl %eax,%eax
jnz Lbmodel_leading
// handle a leading non-bmodel edge
// don't start a span if this is an inverted span, with the end edge preceding
// the start edge (that is, we've already seen the end edge)
testl %edx,%edx
jnz Lxl_done
// if (surf->key < surf2->key)
// goto newtop;
incl %edx
movl st_key(%edi),%eax
movl %edx,st_spanstate(%edi)
movl st_key(%esi),%ecx
cmpl %ecx,%eax
jl Lnewtop
// main sorting loop to search through surface stack until insertion point
// found. Always terminates because background surface is sentinel
// do
// {
// surf2 = surf2->next;
// } while (surf->key >= surf2->key);
Lsortloopnb:
movl st_next(%esi),%esi
movl st_key(%esi),%ecx
cmpl %ecx,%eax
jge Lsortloopnb
jmp LInsertAndExit
// handle a leading bmodel edge
.align 4
Lbmodel_leading:
// don't start a span if this is an inverted span, with the end edge preceding
// the start edge (that is, we've already seen the end edge)
testl %edx,%edx
jnz Lxl_done
movl C(r_bmodelactive),%ecx
incl %edx
incl %ecx
movl %edx,st_spanstate(%edi)
movl %ecx,C(r_bmodelactive)
// if (surf->key < surf2->key)
// goto newtop;
movl st_key(%edi),%eax
movl st_key(%esi),%ecx
cmpl %ecx,%eax
jl Lnewtop
// if ((surf->key == surf2->key) && surf->insubmodel)
// {
jz Lzcheck_for_newtop
// main sorting loop to search through surface stack until insertion point
// found. Always terminates because background surface is sentinel
// do
// {
// surf2 = surf2->next;
// } while (surf->key > surf2->key);
Lsortloop:
movl st_next(%esi),%esi
movl st_key(%esi),%ecx
cmpl %ecx,%eax
jg Lsortloop
jne LInsertAndExit
// Do 1/z sorting to see if we've arrived in the right position
movl et_u(%ebx),%eax
subl $0xFFFFF,%eax
movl %eax,Ltemp
fildl Ltemp
fmuls float_1_div_0100000h // fu = (float)(edge->u - 0xFFFFF) *
// (1.0 / 0x100000);
fld %st(0) // fu | fu
fmuls st_d_zistepu(%edi) // fu*surf->d_zistepu | fu
flds C(fv) // fv | fu*surf->d_zistepu | fu
fmuls st_d_zistepv(%edi) // fv*surf->d_zistepv | fu*surf->d_zistepu | fu
fxch %st(1) // fu*surf->d_zistepu | fv*surf->d_zistepv | fu
fadds st_d_ziorigin(%edi) // fu*surf->d_zistepu + surf->d_ziorigin |
// fv*surf->d_zistepv | fu
flds st_d_zistepu(%esi) // surf2->d_zistepu |
// fu*surf->d_zistepu + surf->d_ziorigin |
// fv*surf->d_zistepv | fu
fmul %st(3),%st(0) // fu*surf2->d_zistepu |
// fu*surf->d_zistepu + surf->d_ziorigin |
// fv*surf->d_zistepv | fu
fxch %st(1) // fu*surf->d_zistepu + surf->d_ziorigin |
// fu*surf2->d_zistepu |
// fv*surf->d_zistepv | fu
faddp %st(0),%st(2) // fu*surf2->d_zistepu | newzi | fu
flds C(fv) // fv | fu*surf2->d_zistepu | newzi | fu
fmuls st_d_zistepv(%esi) // fv*surf2->d_zistepv |
// fu*surf2->d_zistepu | newzi | fu
fld %st(2) // newzi | fv*surf2->d_zistepv |
// fu*surf2->d_zistepu | newzi | fu
fmuls float_point_999 // newzibottom | fv*surf2->d_zistepv |
// fu*surf2->d_zistepu | newzi | fu
fxch %st(2) // fu*surf2->d_zistepu | fv*surf2->d_zistepv |
// newzibottom | newzi | fu
fadds st_d_ziorigin(%esi) // fu*surf2->d_zistepu + surf2->d_ziorigin |
// fv*surf2->d_zistepv | newzibottom | newzi |
// fu
faddp %st(0),%st(1) // testzi | newzibottom | newzi | fu
fxch %st(1) // newzibottom | testzi | newzi | fu
// if (newzibottom >= testzi)
// goto Lgotposition;
fcomp %st(1) // testzi | newzi | fu
fxch %st(1) // newzi | testzi | fu
fmuls float_1_point_001 // newzitop | testzi | fu
fxch %st(1) // testzi | newzitop | fu
fnstsw %ax
testb $0x01,%ah
jz Lgotposition_fpop3
// if (newzitop >= testzi)
// {
fcomp %st(1) // newzitop | fu
fnstsw %ax
testb $0x45,%ah
jz Lsortloop_fpop2
// if (surf->d_zistepu >= surf2->d_zistepu)
// goto newtop;
flds st_d_zistepu(%edi) // surf->d_zistepu | newzitop| fu
fcomps st_d_zistepu(%esi) // newzitop | fu
fnstsw %ax
testb $0x01,%ah
jz Lgotposition_fpop2
fstp %st(0) // clear the FPstack
fstp %st(0)
movl st_key(%edi),%eax
jmp Lsortloop
Lgotposition_fpop3:
fstp %st(0)
Lgotposition_fpop2:
fstp %st(0)
fstp %st(0)
jmp LInsertAndExit
// emit a span (obscures current top)
Lnewtop_fpop3:
fstp %st(0)
Lnewtop_fpop2:
fstp %st(0)
fstp %st(0)
movl st_key(%edi),%eax // reload the sorting key
Lnewtop:
movl et_u(%ebx),%eax
movl st_last_u(%esi),%edx
shrl $20,%eax // iu = integral pixel u
movl %eax,st_last_u(%edi) // surf->last_u = iu;
cmpl %edx,%eax
jle LInsertAndExit // iu <= surf->last_u, so nothing to emit
subl %edx,%eax
movl %edx,espan_t_u(%ebp) // span->u = surf->last_u;
movl %eax,espan_t_count(%ebp) // span->count = iu - span->u;
movl C(current_iv),%eax
movl %eax,espan_t_v(%ebp) // span->v = current_iv;
movl st_spans(%esi),%eax
movl %eax,espan_t_pnext(%ebp) // span->pnext = surf->spans;
movl %ebp,st_spans(%esi) // surf->spans = span;
addl $(espan_t_size),%ebp
LInsertAndExit:
// insert before surf2
movl %esi,st_next(%edi) // surf->next = surf2;
movl st_prev(%esi),%eax
movl %eax,st_prev(%edi) // surf->prev = surf2->prev;
movl %edi,st_prev(%esi) // surf2->prev = surf;
movl %edi,st_next(%eax) // surf2->prev->next = surf;
// ---------------------------------------------------------------
// leading edge done
// ---------------------------------------------------------------
// ---------------------------------------------------------------
// see if there are any more edges
// ---------------------------------------------------------------
Lgs_nextedge:
movl et_next(%ebx),%ebx
cmpl $(C(edge_tail)),%ebx
jnz Lgs_edgeloop
// clean up at the right edge
Lgs_lastspan:
// now that we've reached the right edge of the screen, we're done with any
// unfinished surfaces, so emit a span for whatever's on top
movl 0x12345678,%esi // surfaces[1].st_next
LPatch3:
movl C(edge_tail_u_shift20),%eax
xorl %ecx,%ecx
movl st_last_u(%esi),%edx
subl %edx,%eax
jle Lgs_resetspanstate
movl %edx,espan_t_u(%ebp)
movl %eax,espan_t_count(%ebp)
movl C(current_iv),%eax
movl %eax,espan_t_v(%ebp)
movl st_spans(%esi),%eax
movl %eax,espan_t_pnext(%ebp)
movl %ebp,st_spans(%esi)
addl $(espan_t_size),%ebp
// reset spanstate for all surfaces in the surface stack
Lgs_resetspanstate:
movl %ecx,st_spanstate(%esi)
movl st_next(%esi),%esi
cmpl $0x12345678,%esi // &surfaces[1]
LPatch4:
jnz Lgs_resetspanstate
// store the final span_p
movl %ebp,C(span_p)
popl %ebx // restore register variables
popl %esi
popl %edi
popl %ebp // restore the caller's stack frame
ret
// ---------------------------------------------------------------
// 1/z sorting for bmodels in the same leaf
// ---------------------------------------------------------------
.align 4
Lxl_done:
incl %edx
movl %edx,st_spanstate(%edi)
jmp Lgs_nextedge
.align 4
Lzcheck_for_newtop:
movl et_u(%ebx),%eax
subl $0xFFFFF,%eax
movl %eax,Ltemp
fildl Ltemp
fmuls float_1_div_0100000h // fu = (float)(edge->u - 0xFFFFF) *
// (1.0 / 0x100000);
fld %st(0) // fu | fu
fmuls st_d_zistepu(%edi) // fu*surf->d_zistepu | fu
flds C(fv) // fv | fu*surf->d_zistepu | fu
fmuls st_d_zistepv(%edi) // fv*surf->d_zistepv | fu*surf->d_zistepu | fu
fxch %st(1) // fu*surf->d_zistepu | fv*surf->d_zistepv | fu
fadds st_d_ziorigin(%edi) // fu*surf->d_zistepu + surf->d_ziorigin |
// fv*surf->d_zistepv | fu
flds st_d_zistepu(%esi) // surf2->d_zistepu |
// fu*surf->d_zistepu + surf->d_ziorigin |
// fv*surf->d_zistepv | fu
fmul %st(3),%st(0) // fu*surf2->d_zistepu |
// fu*surf->d_zistepu + surf->d_ziorigin |
// fv*surf->d_zistepv | fu
fxch %st(1) // fu*surf->d_zistepu + surf->d_ziorigin |
// fu*surf2->d_zistepu |
// fv*surf->d_zistepv | fu
faddp %st(0),%st(2) // fu*surf2->d_zistepu | newzi | fu
flds C(fv) // fv | fu*surf2->d_zistepu | newzi | fu
fmuls st_d_zistepv(%esi) // fv*surf2->d_zistepv |
// fu*surf2->d_zistepu | newzi | fu
fld %st(2) // newzi | fv*surf2->d_zistepv |
// fu*surf2->d_zistepu | newzi | fu
fmuls float_point_999 // newzibottom | fv*surf2->d_zistepv |
// fu*surf2->d_zistepu | newzi | fu
fxch %st(2) // fu*surf2->d_zistepu | fv*surf2->d_zistepv |
// newzibottom | newzi | fu
fadds st_d_ziorigin(%esi) // fu*surf2->d_zistepu + surf2->d_ziorigin |
// fv*surf2->d_zistepv | newzibottom | newzi |
// fu
faddp %st(0),%st(1) // testzi | newzibottom | newzi | fu
fxch %st(1) // newzibottom | testzi | newzi | fu
// if (newzibottom >= testzi)
// goto newtop;
fcomp %st(1) // testzi | newzi | fu
fxch %st(1) // newzi | testzi | fu
fmuls float_1_point_001 // newzitop | testzi | fu
fxch %st(1) // testzi | newzitop | fu
fnstsw %ax
testb $0x01,%ah
jz Lnewtop_fpop3
// if (newzitop >= testzi)
// {
fcomp %st(1) // newzitop | fu
fnstsw %ax
testb $0x45,%ah
jz Lsortloop_fpop2
// if (surf->d_zistepu >= surf2->d_zistepu)
// goto newtop;
flds st_d_zistepu(%edi) // surf->d_zistepu | newzitop | fu
fcomps st_d_zistepu(%esi) // newzitop | fu
fnstsw %ax
testb $0x01,%ah
jz Lnewtop_fpop2
Lsortloop_fpop2:
fstp %st(0) // clear the FP stack
fstp %st(0)
movl st_key(%edi),%eax
jmp Lsortloop
.globl C(R_EdgeCodeEnd)
C(R_EdgeCodeEnd):
//----------------------------------------------------------------------
// Surface array address code patching routine
//----------------------------------------------------------------------
.align 4
.globl C(R_SurfacePatch)
C(R_SurfacePatch):
movl C(surfaces),%eax
addl $(st_size),%eax
movl %eax,LPatch4-4
addl $(st_next),%eax
movl %eax,LPatch0-4
movl %eax,LPatch2-4
movl %eax,LPatch3-4
ret
#endif // id386

64
source/r_varsa.S Normal file
View file

@ -0,0 +1,64 @@
/*
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//
// r_varsa.s
//
#include "asm_i386.h"
#include "quakeasm.h"
#include "asm_draw.h"
#include "d_ifacea.h"
#if id386
.data
//-------------------------------------------------------
// ASM-only variables
//-------------------------------------------------------
.globl float_1, float_particle_z_clip, float_point5
.globl float_minus_1, float_0
float_0: .single 0.0
float_1: .single 1.0
float_minus_1: .single -1.0
float_particle_z_clip: .single PARTICLE_Z_CLIP
float_point5: .single 0.5
.globl fp_16, fp_64k, fp_1m, fp_64kx64k
.globl fp_1m_minus_1
.globl fp_8
fp_1m: .single 1048576.0
fp_1m_minus_1: .single 1048575.0
fp_64k: .single 65536.0
fp_8: .single 8.0
fp_16: .single 16.0
fp_64kx64k: .long 0x4f000000 // (float)0x8000*0x10000
.globl FloatZero, Float2ToThe31nd, FloatMinus2ToThe31nd
FloatZero: .long 0
Float2ToThe31nd: .long 0x4f000000
FloatMinus2ToThe31nd: .long 0xcf000000
.globl C(r_bmodelactive)
C(r_bmodelactive): .long 0
#endif // id386

218
source/snd_mixa.S Normal file
View file

@ -0,0 +1,218 @@
/*
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//
// snd_mixa.s
// x86 assembly-language sound code
//
#include "asm_i386.h"
#include "quakeasm.h"
#if id386
.text
//----------------------------------------------------------------------
// 8-bit sound-mixing code
//----------------------------------------------------------------------
#define ch 4+16
#define sc 8+16
#define count 12+16
.globl C(SND_PaintChannelFrom8)
C(SND_PaintChannelFrom8):
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
pushl %ebp
// int data;
// short *lscale, *rscale;
// unsigned char *sfx;
// int i;
movl ch(%esp),%ebx
movl sc(%esp),%esi
// if (ch->leftvol > 255)
// ch->leftvol = 255;
// if (ch->rightvol > 255)
// ch->rightvol = 255;
movl ch_leftvol(%ebx),%eax
movl ch_rightvol(%ebx),%edx
cmpl $255,%eax
jna LLeftSet
movl $255,%eax
LLeftSet:
cmpl $255,%edx
jna LRightSet
movl $255,%edx
LRightSet:
// lscale = snd_scaletable[ch->leftvol >> 3];
// rscale = snd_scaletable[ch->rightvol >> 3];
// sfx = (signed char *)sc->data + ch->pos;
// ch->pos += count;
andl $0xF8,%eax
addl $(sfxc_data),%esi
andl $0xF8,%edx
movl ch_pos(%ebx),%edi
movl count(%esp),%ecx
addl %edi,%esi
shll $7,%eax
addl %ecx,%edi
shll $7,%edx
movl %edi,ch_pos(%ebx)
addl $(C(snd_scaletable)),%eax
addl $(C(snd_scaletable)),%edx
subl %ebx,%ebx
movb -1(%esi,%ecx,1),%bl
testl $1,%ecx
jz LMix8Loop
movl (%eax,%ebx,4),%edi
movl (%edx,%ebx,4),%ebp
addl C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size),%edi
addl C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size),%ebp
movl %edi,C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size)
movl %ebp,C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size)
movb -2(%esi,%ecx,1),%bl
decl %ecx
jz LDone
// for (i=0 ; i<count ; i++)
// {
LMix8Loop:
// data = sfx[i];
// paintbuffer[i].left += lscale[data];
// paintbuffer[i].right += rscale[data];
movl (%eax,%ebx,4),%edi
movl (%edx,%ebx,4),%ebp
addl C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size),%edi
addl C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size),%ebp
movb -2(%esi,%ecx,1),%bl
movl %edi,C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size)
movl %ebp,C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size)
movl (%eax,%ebx,4),%edi
movl (%edx,%ebx,4),%ebp
movb -3(%esi,%ecx,1),%bl
addl C(paintbuffer)+psp_left-psp_size*2(,%ecx,psp_size),%edi
addl C(paintbuffer)+psp_right-psp_size*2(,%ecx,psp_size),%ebp
movl %edi,C(paintbuffer)+psp_left-psp_size*2(,%ecx,psp_size)
movl %ebp,C(paintbuffer)+psp_right-psp_size*2(,%ecx,psp_size)
// }
subl $2,%ecx
jnz LMix8Loop
LDone:
popl %ebp
popl %ebx
popl %edi
popl %esi
ret
//----------------------------------------------------------------------
// Transfer of stereo buffer to 16-bit DMA buffer code
//----------------------------------------------------------------------
.globl C(Snd_WriteLinearBlastStereo16)
C(Snd_WriteLinearBlastStereo16):
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
// int i;
// int val;
movl C(snd_linear_count),%ecx
movl C(snd_p),%ebx
movl C(snd_vol),%esi
movl C(snd_out),%edi
// for (i=0 ; i<snd_linear_count ; i+=2)
// {
LWLBLoopTop:
// val = (snd_p[i]*snd_vol)>>8;
// if (val > 0x7fff)
// snd_out[i] = 0x7fff;
// else if (val < (short)0x8000)
// snd_out[i] = (short)0x8000;
// else
// snd_out[i] = val;
movl -8(%ebx,%ecx,4),%eax
imull %esi,%eax
sarl $8,%eax
cmpl $0x7FFF,%eax
jg LClampHigh
cmpl $0xFFFF8000,%eax
jnl LClampDone
movl $0xFFFF8000,%eax
jmp LClampDone
LClampHigh:
movl $0x7FFF,%eax
LClampDone:
// val = (snd_p[i+1]*snd_vol)>>8;
// if (val > 0x7fff)
// snd_out[i+1] = 0x7fff;
// else if (val < (short)0x8000)
// snd_out[i+1] = (short)0x8000;
// else
// snd_out[i+1] = val;
movl -4(%ebx,%ecx,4),%edx
imull %esi,%edx
sarl $8,%edx
cmpl $0x7FFF,%edx
jg LClampHigh2
cmpl $0xFFFF8000,%edx
jnl LClampDone2
movl $0xFFFF8000,%edx
jmp LClampDone2
LClampHigh2:
movl $0x7FFF,%edx
LClampDone2:
shll $16,%edx
andl $0xFFFF,%eax
orl %eax,%edx
movl %edx,-4(%edi,%ecx,2)
// }
subl $2,%ecx
jnz LWLBLoopTop
// snd_p += snd_linear_count;
popl %ebx
popl %edi
popl %esi
ret
#endif // id386

172
source/surf16.S Normal file
View file

@ -0,0 +1,172 @@
/*
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//
// surf16.s
// x86 assembly-language 16 bpp surface block drawing code.
//
#include "asm_i386.h"
#include "quakeasm.h"
#include "asm_draw.h"
#if id386
//----------------------------------------------------------------------
// Surface block drawer
//----------------------------------------------------------------------
.data
k: .long 0
loopentry: .long 0
.align 4
blockjumptable16:
.long LEnter2_16
.long LEnter4_16
.long 0, LEnter8_16
.long 0, 0, 0, LEnter16_16
.text
.align 4
.globl C(R_Surf16Start)
C(R_Surf16Start):
.align 4
.globl C(R_DrawSurfaceBlock16)
C(R_DrawSurfaceBlock16):
pushl %ebp // preserve caller's stack frame
pushl %edi
pushl %esi // preserve register variables
pushl %ebx
movl C(blocksize),%eax
movl C(prowdestbase),%edi
movl C(pbasesource),%esi
movl C(sourcesstep),%ebx
movl blockjumptable16-4(,%eax,2),%ecx
movl %eax,k
movl %ecx,loopentry
movl C(lightleft),%edx
movl C(lightright),%ebp
Lblockloop16:
subl %edx,%ebp
movb C(blockdivshift),%cl
sarl %cl,%ebp
jns Lp1_16
testl C(blockdivmask),%ebp
jz Lp1_16
incl %ebp
Lp1_16:
subl %eax,%eax
subl %ecx,%ecx // high words must be 0 in loop for addressing
jmp *loopentry
.align 4
#include "block16.h"
movl C(pbasesource),%esi
movl C(lightleft),%edx
movl C(lightright),%ebp
movl C(sourcetstep),%eax
movl C(lightrightstep),%ecx
movl C(prowdestbase),%edi
addl %eax,%esi
addl %ecx,%ebp
movl C(lightleftstep),%eax
movl C(surfrowbytes),%ecx
addl %eax,%edx
addl %ecx,%edi
movl %esi,C(pbasesource)
movl %ebp,C(lightright)
movl k,%eax
movl %edx,C(lightleft)
decl %eax
movl %edi,C(prowdestbase)
movl %eax,k
jnz Lblockloop16
popl %ebx // restore register variables
popl %esi
popl %edi
popl %ebp // restore the caller's stack frame
ret
.globl C(R_Surf16End)
C(R_Surf16End):
//----------------------------------------------------------------------
// Code patching routines
//----------------------------------------------------------------------
.data
.align 4
LPatchTable16:
.long LBPatch0-4
.long LBPatch1-4
.long LBPatch2-4
.long LBPatch3-4
.long LBPatch4-4
.long LBPatch5-4
.long LBPatch6-4
.long LBPatch7-4
.long LBPatch8-4
.long LBPatch9-4
.long LBPatch10-4
.long LBPatch11-4
.long LBPatch12-4
.long LBPatch13-4
.long LBPatch14-4
.long LBPatch15-4
.text
.align 4
.globl C(R_Surf16Patch)
C(R_Surf16Patch):
pushl %ebx
movl C(colormap),%eax
movl $LPatchTable16,%ebx
movl $16,%ecx
LPatchLoop16:
movl (%ebx),%edx
addl $4,%ebx
movl %eax,(%edx)
decl %ecx
jnz LPatchLoop16
popl %ebx
ret
#endif // id386

783
source/surf8.S Normal file
View file

@ -0,0 +1,783 @@
/*
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//
// surf8.s
// x86 assembly-language 8 bpp surface block drawing code.
//
#include "asm_i386.h"
#include "quakeasm.h"
#include "asm_draw.h"
#if id386
.data
sb_v: .long 0
.text
.align 4
.globl C(R_Surf8Start)
C(R_Surf8Start):
//----------------------------------------------------------------------
// Surface block drawer for mip level 0
//----------------------------------------------------------------------
.align 4
.globl C(R_DrawSurfaceBlock8_mip0)
C(R_DrawSurfaceBlock8_mip0):
pushl %ebp // preserve caller's stack frame
pushl %edi
pushl %esi // preserve register variables
pushl %ebx
// for (v=0 ; v<numvblocks ; v++)
// {
movl C(r_lightptr),%ebx
movl C(r_numvblocks),%eax
movl %eax,sb_v
movl C(prowdestbase),%edi
movl C(pbasesource),%esi
Lv_loop_mip0:
// lightleft = lightptr[0];
// lightright = lightptr[1];
// lightdelta = (lightleft - lightright) & 0xFFFFF;
movl (%ebx),%eax // lightleft
movl 4(%ebx),%edx // lightright
movl %eax,%ebp
movl C(r_lightwidth),%ecx
movl %edx,C(lightright)
subl %edx,%ebp
andl $0xFFFFF,%ebp
leal (%ebx,%ecx,4),%ebx
// lightptr += lightwidth;
movl %ebx,C(r_lightptr)
// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
// 0xF0000000;
movl 4(%ebx),%ecx // lightptr[1]
movl (%ebx),%ebx // lightptr[0]
subl %eax,%ebx
subl %edx,%ecx
sarl $4,%ecx
orl $0xF0000000,%ebp
sarl $4,%ebx
movl %ecx,C(lightrightstep)
subl %ecx,%ebx
andl $0xFFFFF,%ebx
orl $0xF0000000,%ebx
subl %ecx,%ecx // high word must be 0 in loop for addressing
movl %ebx,C(lightdeltastep)
subl %ebx,%ebx // high word must be 0 in loop for addressing
Lblockloop8_mip0:
movl %ebp,C(lightdelta)
movb 14(%esi),%cl
sarl $4,%ebp
movb %dh,%bh
movb 15(%esi),%bl
addl %ebp,%edx
movb %dh,%ch
addl %ebp,%edx
movb 0x12345678(%ebx),%ah
LBPatch0:
movb 13(%esi),%bl
movb 0x12345678(%ecx),%al
LBPatch1:
movb 12(%esi),%cl
movb %dh,%bh
addl %ebp,%edx
rorl $16,%eax
movb %dh,%ch
addl %ebp,%edx
movb 0x12345678(%ebx),%ah
LBPatch2:
movb 11(%esi),%bl
movb 0x12345678(%ecx),%al
LBPatch3:
movb 10(%esi),%cl
movl %eax,12(%edi)
movb %dh,%bh
addl %ebp,%edx
movb %dh,%ch
addl %ebp,%edx
movb 0x12345678(%ebx),%ah
LBPatch4:
movb 9(%esi),%bl
movb 0x12345678(%ecx),%al
LBPatch5:
movb 8(%esi),%cl
movb %dh,%bh
addl %ebp,%edx
rorl $16,%eax
movb %dh,%ch
addl %ebp,%edx
movb 0x12345678(%ebx),%ah
LBPatch6:
movb 7(%esi),%bl
movb 0x12345678(%ecx),%al
LBPatch7:
movb 6(%esi),%cl
movl %eax,8(%edi)
movb %dh,%bh
addl %ebp,%edx
movb %dh,%ch
addl %ebp,%edx
movb 0x12345678(%ebx),%ah
LBPatch8:
movb 5(%esi),%bl
movb 0x12345678(%ecx),%al
LBPatch9:
movb 4(%esi),%cl
movb %dh,%bh
addl %ebp,%edx
rorl $16,%eax
movb %dh,%ch
addl %ebp,%edx
movb 0x12345678(%ebx),%ah
LBPatch10:
movb 3(%esi),%bl
movb 0x12345678(%ecx),%al
LBPatch11:
movb 2(%esi),%cl
movl %eax,4(%edi)
movb %dh,%bh
addl %ebp,%edx
movb %dh,%ch
addl %ebp,%edx
movb 0x12345678(%ebx),%ah
LBPatch12:
movb 1(%esi),%bl
movb 0x12345678(%ecx),%al
LBPatch13:
movb (%esi),%cl
movb %dh,%bh
addl %ebp,%edx
rorl $16,%eax
movb %dh,%ch
movb 0x12345678(%ebx),%ah
LBPatch14:
movl C(lightright),%edx
movb 0x12345678(%ecx),%al
LBPatch15:
movl C(lightdelta),%ebp
movl %eax,(%edi)
addl C(sourcetstep),%esi
addl C(surfrowbytes),%edi
addl C(lightrightstep),%edx
addl C(lightdeltastep),%ebp
movl %edx,C(lightright)
jc Lblockloop8_mip0
// if (pbasesource >= r_sourcemax)
// pbasesource -= stepback;
cmpl C(r_sourcemax),%esi
jb LSkip_mip0
subl C(r_stepback),%esi
LSkip_mip0:
movl C(r_lightptr),%ebx
decl sb_v
jnz Lv_loop_mip0
popl %ebx // restore register variables
popl %esi
popl %edi
popl %ebp // restore the caller's stack frame
ret
//----------------------------------------------------------------------
// Surface block drawer for mip level 1
//----------------------------------------------------------------------
.align 4
.globl C(R_DrawSurfaceBlock8_mip1)
C(R_DrawSurfaceBlock8_mip1):
pushl %ebp // preserve caller's stack frame
pushl %edi
pushl %esi // preserve register variables
pushl %ebx
// for (v=0 ; v<numvblocks ; v++)
// {
movl C(r_lightptr),%ebx
movl C(r_numvblocks),%eax
movl %eax,sb_v
movl C(prowdestbase),%edi
movl C(pbasesource),%esi
Lv_loop_mip1:
// lightleft = lightptr[0];
// lightright = lightptr[1];
// lightdelta = (lightleft - lightright) & 0xFFFFF;
movl (%ebx),%eax // lightleft
movl 4(%ebx),%edx // lightright
movl %eax,%ebp
movl C(r_lightwidth),%ecx
movl %edx,C(lightright)
subl %edx,%ebp
andl $0xFFFFF,%ebp
leal (%ebx,%ecx,4),%ebx
// lightptr += lightwidth;
movl %ebx,C(r_lightptr)
// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
// 0xF0000000;
movl 4(%ebx),%ecx // lightptr[1]
movl (%ebx),%ebx // lightptr[0]
subl %eax,%ebx
subl %edx,%ecx
sarl $3,%ecx
orl $0x70000000,%ebp
sarl $3,%ebx
movl %ecx,C(lightrightstep)
subl %ecx,%ebx
andl $0xFFFFF,%ebx
orl $0xF0000000,%ebx
subl %ecx,%ecx // high word must be 0 in loop for addressing
movl %ebx,C(lightdeltastep)
subl %ebx,%ebx // high word must be 0 in loop for addressing
Lblockloop8_mip1:
movl %ebp,C(lightdelta)
movb 6(%esi),%cl
sarl $3,%ebp
movb %dh,%bh
movb 7(%esi),%bl
addl %ebp,%edx
movb %dh,%ch
addl %ebp,%edx
movb 0x12345678(%ebx),%ah
LBPatch22:
movb 5(%esi),%bl
movb 0x12345678(%ecx),%al
LBPatch23:
movb 4(%esi),%cl
movb %dh,%bh
addl %ebp,%edx
rorl $16,%eax
movb %dh,%ch
addl %ebp,%edx
movb 0x12345678(%ebx),%ah
LBPatch24:
movb 3(%esi),%bl
movb 0x12345678(%ecx),%al
LBPatch25:
movb 2(%esi),%cl
movl %eax,4(%edi)
movb %dh,%bh
addl %ebp,%edx
movb %dh,%ch
addl %ebp,%edx
movb 0x12345678(%ebx),%ah
LBPatch26:
movb 1(%esi),%bl
movb 0x12345678(%ecx),%al
LBPatch27:
movb (%esi),%cl
movb %dh,%bh
addl %ebp,%edx
rorl $16,%eax
movb %dh,%ch
movb 0x12345678(%ebx),%ah
LBPatch28:
movl C(lightright),%edx
movb 0x12345678(%ecx),%al
LBPatch29:
movl C(lightdelta),%ebp
movl %eax,(%edi)
movl C(sourcetstep),%eax
addl %eax,%esi
movl C(surfrowbytes),%eax
addl %eax,%edi
movl C(lightrightstep),%eax
addl %eax,%edx
movl C(lightdeltastep),%eax
addl %eax,%ebp
movl %edx,C(lightright)
jc Lblockloop8_mip1
// if (pbasesource >= r_sourcemax)
// pbasesource -= stepback;
cmpl C(r_sourcemax),%esi
jb LSkip_mip1
subl C(r_stepback),%esi
LSkip_mip1:
movl C(r_lightptr),%ebx
decl sb_v
jnz Lv_loop_mip1
popl %ebx // restore register variables
popl %esi
popl %edi
popl %ebp // restore the caller's stack frame
ret
//----------------------------------------------------------------------
// Surface block drawer for mip level 2
//----------------------------------------------------------------------
.align 4
.globl C(R_DrawSurfaceBlock8_mip2)
C(R_DrawSurfaceBlock8_mip2):
pushl %ebp // preserve caller's stack frame
pushl %edi
pushl %esi // preserve register variables
pushl %ebx
// for (v=0 ; v<numvblocks ; v++)
// {
movl C(r_lightptr),%ebx
movl C(r_numvblocks),%eax
movl %eax,sb_v
movl C(prowdestbase),%edi
movl C(pbasesource),%esi
Lv_loop_mip2:
// lightleft = lightptr[0];
// lightright = lightptr[1];
// lightdelta = (lightleft - lightright) & 0xFFFFF;
movl (%ebx),%eax // lightleft
movl 4(%ebx),%edx // lightright
movl %eax,%ebp
movl C(r_lightwidth),%ecx
movl %edx,C(lightright)
subl %edx,%ebp
andl $0xFFFFF,%ebp
leal (%ebx,%ecx,4),%ebx
// lightptr += lightwidth;
movl %ebx,C(r_lightptr)
// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
// 0xF0000000;
movl 4(%ebx),%ecx // lightptr[1]
movl (%ebx),%ebx // lightptr[0]
subl %eax,%ebx
subl %edx,%ecx
sarl $2,%ecx
orl $0x30000000,%ebp
sarl $2,%ebx
movl %ecx,C(lightrightstep)
subl %ecx,%ebx
andl $0xFFFFF,%ebx
orl $0xF0000000,%ebx
subl %ecx,%ecx // high word must be 0 in loop for addressing
movl %ebx,C(lightdeltastep)
subl %ebx,%ebx // high word must be 0 in loop for addressing
Lblockloop8_mip2:
movl %ebp,C(lightdelta)
movb 2(%esi),%cl
sarl $2,%ebp
movb %dh,%bh
movb 3(%esi),%bl
addl %ebp,%edx
movb %dh,%ch
addl %ebp,%edx
movb 0x12345678(%ebx),%ah
LBPatch18:
movb 1(%esi),%bl
movb 0x12345678(%ecx),%al
LBPatch19:
movb (%esi),%cl
movb %dh,%bh
addl %ebp,%edx
rorl $16,%eax
movb %dh,%ch
movb 0x12345678(%ebx),%ah
LBPatch20:
movl C(lightright),%edx
movb 0x12345678(%ecx),%al
LBPatch21:
movl C(lightdelta),%ebp
movl %eax,(%edi)
movl C(sourcetstep),%eax
addl %eax,%esi
movl C(surfrowbytes),%eax
addl %eax,%edi
movl C(lightrightstep),%eax
addl %eax,%edx
movl C(lightdeltastep),%eax
addl %eax,%ebp
movl %edx,C(lightright)
jc Lblockloop8_mip2
// if (pbasesource >= r_sourcemax)
// pbasesource -= stepback;
cmpl C(r_sourcemax),%esi
jb LSkip_mip2
subl C(r_stepback),%esi
LSkip_mip2:
movl C(r_lightptr),%ebx
decl sb_v
jnz Lv_loop_mip2
popl %ebx // restore register variables
popl %esi
popl %edi
popl %ebp // restore the caller's stack frame
ret
//----------------------------------------------------------------------
// Surface block drawer for mip level 3
//----------------------------------------------------------------------
.align 4
.globl C(R_DrawSurfaceBlock8_mip3)
C(R_DrawSurfaceBlock8_mip3):
pushl %ebp // preserve caller's stack frame
pushl %edi
pushl %esi // preserve register variables
pushl %ebx
// for (v=0 ; v<numvblocks ; v++)
// {
movl C(r_lightptr),%ebx
movl C(r_numvblocks),%eax
movl %eax,sb_v
movl C(prowdestbase),%edi
movl C(pbasesource),%esi
Lv_loop_mip3:
// lightleft = lightptr[0];
// lightright = lightptr[1];
// lightdelta = (lightleft - lightright) & 0xFFFFF;
movl (%ebx),%eax // lightleft
movl 4(%ebx),%edx // lightright
movl %eax,%ebp
movl C(r_lightwidth),%ecx
movl %edx,C(lightright)
subl %edx,%ebp
andl $0xFFFFF,%ebp
leal (%ebx,%ecx,4),%ebx
movl %ebp,C(lightdelta)
// lightptr += lightwidth;
movl %ebx,C(r_lightptr)
// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
// 0xF0000000;
movl 4(%ebx),%ecx // lightptr[1]
movl (%ebx),%ebx // lightptr[0]
subl %eax,%ebx
subl %edx,%ecx
sarl $1,%ecx
sarl $1,%ebx
movl %ecx,C(lightrightstep)
subl %ecx,%ebx
andl $0xFFFFF,%ebx
sarl $1,%ebp
orl $0xF0000000,%ebx
movl %ebx,C(lightdeltastep)
subl %ebx,%ebx // high word must be 0 in loop for addressing
movb 1(%esi),%bl
subl %ecx,%ecx // high word must be 0 in loop for addressing
movb %dh,%bh
movb (%esi),%cl
addl %ebp,%edx
movb %dh,%ch
movb 0x12345678(%ebx),%al
LBPatch16:
movl C(lightright),%edx
movb %al,1(%edi)
movb 0x12345678(%ecx),%al
LBPatch17:
movb %al,(%edi)
movl C(sourcetstep),%eax
addl %eax,%esi
movl C(surfrowbytes),%eax
addl %eax,%edi
movl C(lightdeltastep),%eax
movl C(lightdelta),%ebp
movb (%esi),%cl
addl %eax,%ebp
movl C(lightrightstep),%eax
sarl $1,%ebp
addl %eax,%edx
movb %dh,%bh
movb 1(%esi),%bl
addl %ebp,%edx
movb %dh,%ch
movb 0x12345678(%ebx),%al
LBPatch30:
movl C(sourcetstep),%edx
movb %al,1(%edi)
movb 0x12345678(%ecx),%al
LBPatch31:
movb %al,(%edi)
movl C(surfrowbytes),%ebp
addl %edx,%esi
addl %ebp,%edi
// if (pbasesource >= r_sourcemax)
// pbasesource -= stepback;
cmpl C(r_sourcemax),%esi
jb LSkip_mip3
subl C(r_stepback),%esi
LSkip_mip3:
movl C(r_lightptr),%ebx
decl sb_v
jnz Lv_loop_mip3
popl %ebx // restore register variables
popl %esi
popl %edi
popl %ebp // restore the caller's stack frame
ret
.globl C(R_Surf8End)
C(R_Surf8End):
//----------------------------------------------------------------------
// Code patching routines
//----------------------------------------------------------------------
.data
.align 4
LPatchTable8:
.long LBPatch0-4
.long LBPatch1-4
.long LBPatch2-4
.long LBPatch3-4
.long LBPatch4-4
.long LBPatch5-4
.long LBPatch6-4
.long LBPatch7-4
.long LBPatch8-4
.long LBPatch9-4
.long LBPatch10-4
.long LBPatch11-4
.long LBPatch12-4
.long LBPatch13-4
.long LBPatch14-4
.long LBPatch15-4
.long LBPatch16-4
.long LBPatch17-4
.long LBPatch18-4
.long LBPatch19-4
.long LBPatch20-4
.long LBPatch21-4
.long LBPatch22-4
.long LBPatch23-4
.long LBPatch24-4
.long LBPatch25-4
.long LBPatch26-4
.long LBPatch27-4
.long LBPatch28-4
.long LBPatch29-4
.long LBPatch30-4
.long LBPatch31-4
.text
.align 4
.globl C(R_Surf8Patch)
C(R_Surf8Patch):
pushl %ebx
movl C(colormap),%eax
movl $LPatchTable8,%ebx
movl $32,%ecx
LPatchLoop8:
movl (%ebx),%edx
addl $4,%ebx
movl %eax,(%edx)
decl %ecx
jnz LPatchLoop8
popl %ebx
ret
#endif // id386

View file

@ -7,3 +7,8 @@ void
Draw_EndDisc(void)
{
}
void
Cmd_ForwardToServer (void)
{
}

95
source/sys_dosa.S Normal file
View file

@ -0,0 +1,95 @@
//
// sys_dosa.s
// x86 assembly-language DOS-dependent routines.
#include "asm_i386.h"
#include "quakeasm.h"
.data
.align 4
fpenv:
.long 0, 0, 0, 0, 0, 0, 0, 0
.text
.globl C(MaskExceptions)
C(MaskExceptions):
fnstenv fpenv
orl $0x3F,fpenv
fldenv fpenv
ret
#if 0
.globl C(unmaskexceptions)
C(unmaskexceptions):
fnstenv fpenv
andl $0xFFFFFFE0,fpenv
fldenv fpenv
ret
#endif
.data
.align 4
.globl ceil_cw, single_cw, full_cw, cw, pushed_cw
ceil_cw: .long 0
single_cw: .long 0
full_cw: .long 0
cw: .long 0
pushed_cw: .long 0
.text
.globl C(Sys_LowFPPrecision)
C(Sys_LowFPPrecision):
fldcw single_cw
ret
.globl C(Sys_HighFPPrecision)
C(Sys_HighFPPrecision):
fldcw full_cw
ret
.globl C(Sys_PushFPCW_SetHigh)
C(Sys_PushFPCW_SetHigh):
fnstcw pushed_cw
fldcw full_cw
ret
.globl C(Sys_PopFPCW)
C(Sys_PopFPCW):
fldcw pushed_cw
ret
.globl C(Sys_SetFPCW)
C(Sys_SetFPCW):
fnstcw cw
movl cw,%eax
#if id386
andb $0xF0,%ah
orb $0x03,%ah // round mode, 64-bit precision
#endif
movl %eax,full_cw
#if id386
andb $0xF0,%ah
orb $0x0C,%ah // chop mode, single precision
#endif
movl %eax,single_cw
#if id386
andb $0xF0,%ah
orb $0x08,%ah // ceil mode, single precision
#endif
movl %eax,ceil_cw
ret

115
source/sys_wina.S Normal file
View file

@ -0,0 +1,115 @@
/*
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//
// sys_wina.s
// x86 assembly-language Win-dependent routines.
#include "asm_i386.h"
#include "quakeasm.h"
//@@@ should be id386-dependent, and have an equivalent C path
.data
.align 4
fpenv:
.long 0, 0, 0, 0, 0, 0, 0, 0
.text
.globl C(MaskExceptions)
C(MaskExceptions):
fnstenv fpenv
orl $0x3F,fpenv
fldenv fpenv
ret
#if 0
.globl C(unmaskexceptions)
C(unmaskexceptions):
fnstenv fpenv
andl $0xFFFFFFE0,fpenv
fldenv fpenv
ret
#endif
.data
.align 4
.globl ceil_cw, single_cw, full_cw, cw, pushed_cw
ceil_cw: .long 0
single_cw: .long 0
full_cw: .long 0
cw: .long 0
pushed_cw: .long 0
.text
.globl C(Sys_LowFPPrecision)
C(Sys_LowFPPrecision):
fldcw single_cw
ret
.globl C(Sys_HighFPPrecision)
C(Sys_HighFPPrecision):
fldcw full_cw
ret
.globl C(Sys_PushFPCW_SetHigh)
C(Sys_PushFPCW_SetHigh):
fnstcw pushed_cw
fldcw full_cw
ret
.globl C(Sys_PopFPCW)
C(Sys_PopFPCW):
fldcw pushed_cw
ret
.globl C(Sys_SetFPCW)
C(Sys_SetFPCW):
fnstcw cw
movl cw,%eax
#if id386
andb $0xF0,%ah
orb $0x03,%ah // round mode, 64-bit precision
#endif
movl %eax,full_cw
#if id386
andb $0xF0,%ah
orb $0x0C,%ah // chop mode, single precision
#endif
movl %eax,single_cw
#if id386
andb $0xF0,%ah
orb $0x08,%ah // ceil mode, single precision
#endif
movl %eax,ceil_cw
ret