mirror of
https://git.code.sf.net/p/quake/newtree
synced 2024-11-22 04:01:17 +00:00
client (svga) almost links now. just gotta fix up net_chan.c and model.c wrt SERVERONLY
This commit is contained in:
parent
ee16312bb8
commit
44deb188ec
23 changed files with 9188 additions and 10 deletions
|
@ -4,10 +4,10 @@ SV_name=qw-server
|
|||
SV_libs=
|
||||
|
||||
CL_SVGA_name=qw-client-svga
|
||||
SV_libs=
|
||||
CL_SVGA_libs=-lvga
|
||||
|
||||
CL_X11_name=qw-client-x11
|
||||
SV_libs=
|
||||
CL_X11_libs=
|
||||
|
||||
DIRECTORIES=
|
||||
vpath %.a $(patsubst @%,%,$(DIRECTORIES)) /usr/lib
|
||||
|
@ -65,6 +65,8 @@ SV_dependencies = $(patsubst %,%.d,$(basename $(SV_sources)))
|
|||
SV_objects = $(patsubst %.d,%.o,$(SV_dependencies))
|
||||
|
||||
CL_sources=\
|
||||
cl_cmd.c \
|
||||
cl_cvar.c \
|
||||
cl_demo.c \
|
||||
cl_ents.c \
|
||||
cl_input.c \
|
||||
|
@ -127,7 +129,25 @@ CL_sources=\
|
|||
zone.c \
|
||||
cd_linux.c \
|
||||
sys_linux.c \
|
||||
snd_linux.c
|
||||
snd_linux.c \
|
||||
d_copy.S \
|
||||
d_draw.S \
|
||||
d_draw16.S \
|
||||
d_parta.S \
|
||||
d_polysa.S \
|
||||
d_scana.S \
|
||||
d_spr8.S \
|
||||
d_varsa.S \
|
||||
math.S \
|
||||
r_aclipa.S \
|
||||
r_aliasa.S \
|
||||
r_drawa.S \
|
||||
r_edgea.S \
|
||||
r_varsa.S \
|
||||
snd_mixa.S \
|
||||
surf16.S \
|
||||
surf8.S \
|
||||
sys_dosa.S
|
||||
|
||||
CL_dependencies = $(patsubst %,%.d,$(basename $(CL_sources)))
|
||||
CL_objects = $(patsubst %.d,%.o,$(CL_dependencies))
|
||||
|
|
|
@ -29,7 +29,7 @@ Cvar_Info(cvar_t *var)
|
|||
if (cls.state >= ca_connected)
|
||||
{
|
||||
MSG_WriteByte (&cls.netchan.message, clc_stringcmd);
|
||||
SZ_Print (&cls.netchan.message, va("setinfo \"%s\" \"%s\"\n", var->name, string));
|
||||
SZ_Print (&cls.netchan.message, va("setinfo \"%s\" \"%s\"\n", var->name, var->string));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -595,10 +595,6 @@ char *Cmd_CompleteCommand (char *partial)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
void Cmd_ForwardToServer (void)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
============
|
||||
Cmd_ExecuteString
|
||||
|
|
149
source/d_copy.S
Normal file
149
source/d_copy.S
Normal file
|
@ -0,0 +1,149 @@
|
|||
//
|
||||
// d_copy.s
|
||||
// x86 assembly-language screen copying code.
|
||||
//
|
||||
|
||||
#include "asm_i386.h"
|
||||
#include "quakeasm.h"
|
||||
#include "asm_draw.h"
|
||||
|
||||
.data
|
||||
|
||||
LCopyWidth: .long 0
|
||||
LBlockSrcStep: .long 0
|
||||
LBlockDestStep: .long 0
|
||||
LSrcDelta: .long 0
|
||||
LDestDelta: .long 0
|
||||
|
||||
#define bufptr 4+16
|
||||
|
||||
// copies 16 rows per plane at a pop; idea is that 16*512 = 8k, and since
|
||||
// no Mode X mode is wider than 360, all the data should fit in the cache for
|
||||
// the passes for the next 3 planes
|
||||
|
||||
.text
|
||||
|
||||
.globl C(VGA_UpdatePlanarScreen)
|
||||
C(VGA_UpdatePlanarScreen):
|
||||
pushl %ebp // preserve caller's stack frame
|
||||
pushl %edi
|
||||
pushl %esi // preserve register variables
|
||||
pushl %ebx
|
||||
|
||||
movl C(VGA_bufferrowbytes),%eax
|
||||
shll $1,%eax
|
||||
movl %eax,LBlockSrcStep
|
||||
movl C(VGA_rowbytes),%eax
|
||||
shll $1,%eax
|
||||
movl %eax,LBlockDestStep
|
||||
|
||||
movl $0x3C4,%edx
|
||||
movb $2,%al
|
||||
outb %al,%dx // point the SC to the Map Mask
|
||||
incl %edx
|
||||
|
||||
movl bufptr(%esp),%esi
|
||||
movl C(VGA_pagebase),%edi
|
||||
movl C(VGA_height),%ebp
|
||||
shrl $1,%ebp
|
||||
|
||||
movl C(VGA_width),%ecx
|
||||
movl C(VGA_bufferrowbytes),%eax
|
||||
subl %ecx,%eax
|
||||
movl %eax,LSrcDelta
|
||||
movl C(VGA_rowbytes),%eax
|
||||
shll $2,%eax
|
||||
subl %ecx,%eax
|
||||
movl %eax,LDestDelta
|
||||
shrl $4,%ecx
|
||||
movl %ecx,LCopyWidth
|
||||
|
||||
LRowLoop:
|
||||
movb $1,%al
|
||||
|
||||
LPlaneLoop:
|
||||
outb %al,%dx
|
||||
movb $2,%ah
|
||||
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
LRowSetLoop:
|
||||
movl LCopyWidth,%ecx
|
||||
LColumnLoop:
|
||||
movb 12(%esi),%bh
|
||||
movb 8(%esi),%bl
|
||||
shll $16,%ebx
|
||||
movb 4(%esi),%bh
|
||||
movb (%esi),%bl
|
||||
movl %ebx,(%edi)
|
||||
addl $16,%esi
|
||||
addl $4,%edi
|
||||
decl %ecx
|
||||
jnz LColumnLoop
|
||||
|
||||
addl LDestDelta,%edi
|
||||
addl LSrcDelta,%esi
|
||||
decb %ah
|
||||
jnz LRowSetLoop
|
||||
|
||||
popl %edi
|
||||
popl %esi
|
||||
incl %esi
|
||||
|
||||
shlb $1,%al
|
||||
cmpb $16,%al
|
||||
jnz LPlaneLoop
|
||||
|
||||
subl $4,%esi
|
||||
addl LBlockSrcStep,%esi
|
||||
addl LBlockDestStep,%edi
|
||||
decl %ebp
|
||||
jnz LRowLoop
|
||||
|
||||
popl %ebx // restore register variables
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp // restore the caller's stack frame
|
||||
|
||||
ret
|
||||
|
||||
|
||||
#define srcptr 4+16
|
||||
#define destptr 8+16
|
||||
#define width 12+16
|
||||
#define height 16+16
|
||||
#define srcrowbytes 20+16
|
||||
#define destrowbytes 24+16
|
||||
|
||||
.globl C(VGA_UpdateLinearScreen)
|
||||
C(VGA_UpdateLinearScreen):
|
||||
pushl %ebp // preserve caller's stack frame
|
||||
pushl %edi
|
||||
pushl %esi // preserve register variables
|
||||
pushl %ebx
|
||||
|
||||
cld
|
||||
movl srcptr(%esp),%esi
|
||||
movl destptr(%esp),%edi
|
||||
movl width(%esp),%ebx
|
||||
movl srcrowbytes(%esp),%eax
|
||||
subl %ebx,%eax
|
||||
movl destrowbytes(%esp),%edx
|
||||
subl %ebx,%edx
|
||||
shrl $2,%ebx
|
||||
movl height(%esp),%ebp
|
||||
LLRowLoop:
|
||||
movl %ebx,%ecx
|
||||
rep/movsl (%esi),(%edi)
|
||||
addl %eax,%esi
|
||||
addl %edx,%edi
|
||||
decl %ebp
|
||||
jnz LLRowLoop
|
||||
|
||||
popl %ebx // restore register variables
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp // restore the caller's stack frame
|
||||
|
||||
ret
|
||||
|
1037
source/d_draw.S
Normal file
1037
source/d_draw.S
Normal file
File diff suppressed because it is too large
Load diff
974
source/d_draw16.S
Normal file
974
source/d_draw16.S
Normal file
|
@ -0,0 +1,974 @@
|
|||
/*
|
||||
Copyright (C) 1996-1997 Id Software, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
*/
|
||||
//
|
||||
// d_draw16.s
|
||||
// x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel
|
||||
// subdivision.
|
||||
//
|
||||
|
||||
#include "asm_i386.h"
|
||||
#include "quakeasm.h"
|
||||
#include "asm_draw.h"
|
||||
#include "d_ifacea.h"
|
||||
|
||||
#if id386
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// 8-bpp horizontal span drawing code for polygons, with no transparency and
|
||||
// 16-pixel subdivision.
|
||||
//
|
||||
// Assumes there is at least one span in pspans, and that every span
|
||||
// contains at least one pixel
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
.data
|
||||
|
||||
.text
|
||||
|
||||
// out-of-line, rarely-needed clamping code
|
||||
|
||||
LClampHigh0:
|
||||
movl C(bbextents),%esi
|
||||
jmp LClampReentry0
|
||||
LClampHighOrLow0:
|
||||
jg LClampHigh0
|
||||
xorl %esi,%esi
|
||||
jmp LClampReentry0
|
||||
|
||||
LClampHigh1:
|
||||
movl C(bbextentt),%edx
|
||||
jmp LClampReentry1
|
||||
LClampHighOrLow1:
|
||||
jg LClampHigh1
|
||||
xorl %edx,%edx
|
||||
jmp LClampReentry1
|
||||
|
||||
LClampLow2:
|
||||
movl $4096,%ebp
|
||||
jmp LClampReentry2
|
||||
LClampHigh2:
|
||||
movl C(bbextents),%ebp
|
||||
jmp LClampReentry2
|
||||
|
||||
LClampLow3:
|
||||
movl $4096,%ecx
|
||||
jmp LClampReentry3
|
||||
LClampHigh3:
|
||||
movl C(bbextentt),%ecx
|
||||
jmp LClampReentry3
|
||||
|
||||
LClampLow4:
|
||||
movl $4096,%eax
|
||||
jmp LClampReentry4
|
||||
LClampHigh4:
|
||||
movl C(bbextents),%eax
|
||||
jmp LClampReentry4
|
||||
|
||||
LClampLow5:
|
||||
movl $4096,%ebx
|
||||
jmp LClampReentry5
|
||||
LClampHigh5:
|
||||
movl C(bbextentt),%ebx
|
||||
jmp LClampReentry5
|
||||
|
||||
|
||||
#define pspans 4+16
|
||||
|
||||
.align 4
|
||||
.globl C(D_DrawSpans16)
|
||||
C(D_DrawSpans16):
|
||||
pushl %ebp // preserve caller's stack frame
|
||||
pushl %edi
|
||||
pushl %esi // preserve register variables
|
||||
pushl %ebx
|
||||
|
||||
//
|
||||
// set up scaled-by-16 steps, for 16-long segments; also set up cacheblock
|
||||
// and span list pointers
|
||||
//
|
||||
// TODO: any overlap from rearranging?
|
||||
flds C(d_sdivzstepu)
|
||||
fmuls fp_16
|
||||
movl C(cacheblock),%edx
|
||||
flds C(d_tdivzstepu)
|
||||
fmuls fp_16
|
||||
movl pspans(%esp),%ebx // point to the first span descriptor
|
||||
flds C(d_zistepu)
|
||||
fmuls fp_16
|
||||
movl %edx,pbase // pbase = cacheblock
|
||||
fstps zi16stepu
|
||||
fstps tdivz16stepu
|
||||
fstps sdivz16stepu
|
||||
|
||||
LSpanLoop:
|
||||
//
|
||||
// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
|
||||
// initial s and t values
|
||||
//
|
||||
// FIXME: pipeline FILD?
|
||||
fildl espan_t_v(%ebx)
|
||||
fildl espan_t_u(%ebx)
|
||||
|
||||
fld %st(1) // dv | du | dv
|
||||
fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
|
||||
fld %st(1) // du | dv*d_sdivzstepv | du | dv
|
||||
fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
|
||||
fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
|
||||
fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
|
||||
// dv*d_sdivzstepv | du | dv
|
||||
fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
|
||||
// dv*d_sdivzstepv | du | dv
|
||||
faddp %st(0),%st(2) // du*d_tdivzstepu |
|
||||
// du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
|
||||
fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
|
||||
// du*d_tdivzstepu | du | dv
|
||||
fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
|
||||
// du*d_tdivzstepu | du | dv
|
||||
fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
|
||||
// du*d_sdivzstepu + dv*d_sdivzstepv |
|
||||
// du*d_tdivzstepu | du | dv
|
||||
fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
|
||||
// dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
|
||||
fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
|
||||
// du*d_sdivzstepu; stays in %st(2) at end
|
||||
fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
|
||||
// s/z
|
||||
fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
|
||||
// du*d_tdivzstepu | du | s/z
|
||||
fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
|
||||
// du*d_tdivzstepu | du | s/z
|
||||
faddp %st(0),%st(2) // dv*d_zistepv |
|
||||
// dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
|
||||
fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
|
||||
// dv*d_zistepv | s/z
|
||||
fmuls C(d_zistepu) // du*d_zistepu |
|
||||
// dv*d_tdivzstepv + du*d_tdivzstepu |
|
||||
// dv*d_zistepv | s/z
|
||||
fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
|
||||
// du*d_zistepu | dv*d_zistepv | s/z
|
||||
fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
|
||||
// du*d_tdivzstepu; stays in %st(1) at end
|
||||
fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
|
||||
faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
|
||||
|
||||
flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
|
||||
fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
|
||||
fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
|
||||
// du*d_zistepu; stays in %st(0) at end
|
||||
// 1/z | fp_64k | t/z | s/z
|
||||
//
|
||||
// calculate and clamp s & t
|
||||
//
|
||||
fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z
|
||||
|
||||
//
|
||||
// point %edi to the first pixel in the span
|
||||
//
|
||||
movl C(d_viewbuffer),%ecx
|
||||
movl espan_t_v(%ebx),%eax
|
||||
movl %ebx,pspantemp // preserve spans pointer
|
||||
|
||||
movl C(tadjust),%edx
|
||||
movl C(sadjust),%esi
|
||||
movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
|
||||
addl %ecx,%edi
|
||||
movl espan_t_u(%ebx),%ecx
|
||||
addl %ecx,%edi // pdest = &pdestspan[scans->u];
|
||||
movl espan_t_count(%ebx),%ecx
|
||||
|
||||
//
|
||||
// now start the FDIV for the end of the span
|
||||
//
|
||||
cmpl $16,%ecx
|
||||
ja LSetupNotLast1
|
||||
|
||||
decl %ecx
|
||||
jz LCleanup1 // if only one pixel, no need to start an FDIV
|
||||
movl %ecx,spancountminus1
|
||||
|
||||
// finish up the s and t calcs
|
||||
fxch %st(1) // z*64k | 1/z | t/z | s/z
|
||||
|
||||
fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
|
||||
fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
|
||||
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
|
||||
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
|
||||
fxch %st(1) // s | t | 1/z | t/z | s/z
|
||||
fistpl s // 1/z | t | t/z | s/z
|
||||
fistpl t // 1/z | t/z | s/z
|
||||
|
||||
fildl spancountminus1
|
||||
|
||||
flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1
|
||||
flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
|
||||
fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
|
||||
fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
|
||||
fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
|
||||
fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
|
||||
fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
|
||||
// C(d_tdivzstepu)*scm1
|
||||
fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
|
||||
// C(d_tdivzstepu)*scm1
|
||||
faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
|
||||
fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
|
||||
faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
|
||||
faddp %st(0),%st(3)
|
||||
|
||||
flds fp_64k
|
||||
fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
|
||||
// overlap
|
||||
jmp LFDIVInFlight1
|
||||
|
||||
LCleanup1:
|
||||
// finish up the s and t calcs
|
||||
fxch %st(1) // z*64k | 1/z | t/z | s/z
|
||||
|
||||
fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
|
||||
fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
|
||||
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
|
||||
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
|
||||
fxch %st(1) // s | t | 1/z | t/z | s/z
|
||||
fistpl s // 1/z | t | t/z | s/z
|
||||
fistpl t // 1/z | t/z | s/z
|
||||
jmp LFDIVInFlight1
|
||||
|
||||
.align 4
|
||||
LSetupNotLast1:
|
||||
// finish up the s and t calcs
|
||||
fxch %st(1) // z*64k | 1/z | t/z | s/z
|
||||
|
||||
fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
|
||||
fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
|
||||
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
|
||||
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
|
||||
fxch %st(1) // s | t | 1/z | t/z | s/z
|
||||
fistpl s // 1/z | t | t/z | s/z
|
||||
fistpl t // 1/z | t/z | s/z
|
||||
|
||||
fadds zi16stepu
|
||||
fxch %st(2)
|
||||
fadds sdivz16stepu
|
||||
fxch %st(2)
|
||||
flds tdivz16stepu
|
||||
faddp %st(0),%st(2)
|
||||
flds fp_64k
|
||||
fdiv %st(1),%st(0) // z = 1/1/z
|
||||
// this is what we've gone to all this trouble to
|
||||
// overlap
|
||||
LFDIVInFlight1:
|
||||
|
||||
addl s,%esi
|
||||
addl t,%edx
|
||||
movl C(bbextents),%ebx
|
||||
movl C(bbextentt),%ebp
|
||||
cmpl %ebx,%esi
|
||||
ja LClampHighOrLow0
|
||||
LClampReentry0:
|
||||
movl %esi,s
|
||||
movl pbase,%ebx
|
||||
shll $16,%esi
|
||||
cmpl %ebp,%edx
|
||||
movl %esi,sfracf
|
||||
ja LClampHighOrLow1
|
||||
LClampReentry1:
|
||||
movl %edx,t
|
||||
movl s,%esi // sfrac = scans->sfrac;
|
||||
shll $16,%edx
|
||||
movl t,%eax // tfrac = scans->tfrac;
|
||||
sarl $16,%esi
|
||||
movl %edx,tfracf
|
||||
|
||||
//
|
||||
// calculate the texture starting address
|
||||
//
|
||||
sarl $16,%eax
|
||||
movl C(cachewidth),%edx
|
||||
imull %edx,%eax // (tfrac >> 16) * cachewidth
|
||||
addl %ebx,%esi
|
||||
addl %eax,%esi // psource = pbase + (sfrac >> 16) +
|
||||
// ((tfrac >> 16) * cachewidth);
|
||||
//
|
||||
// determine whether last span or not
|
||||
//
|
||||
cmpl $16,%ecx
|
||||
jna LLastSegment
|
||||
|
||||
//
|
||||
// not the last segment; do full 16-wide segment
|
||||
//
|
||||
LNotLastSegment:
|
||||
|
||||
//
|
||||
// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
|
||||
// get there
|
||||
//
|
||||
|
||||
// pick up after the FDIV that was left in flight previously
|
||||
|
||||
fld %st(0) // duplicate it
|
||||
fmul %st(4),%st(0) // s = s/z * z
|
||||
fxch %st(1)
|
||||
fmul %st(3),%st(0) // t = t/z * z
|
||||
fxch %st(1)
|
||||
fistpl snext
|
||||
fistpl tnext
|
||||
movl snext,%eax
|
||||
movl tnext,%edx
|
||||
|
||||
movb (%esi),%bl // get first source texel
|
||||
subl $16,%ecx // count off this segments' pixels
|
||||
movl C(sadjust),%ebp
|
||||
movl %ecx,counttemp // remember count of remaining pixels
|
||||
|
||||
movl C(tadjust),%ecx
|
||||
movb %bl,(%edi) // store first dest pixel
|
||||
|
||||
addl %eax,%ebp
|
||||
addl %edx,%ecx
|
||||
|
||||
movl C(bbextents),%eax
|
||||
movl C(bbextentt),%edx
|
||||
|
||||
cmpl $4096,%ebp
|
||||
jl LClampLow2
|
||||
cmpl %eax,%ebp
|
||||
ja LClampHigh2
|
||||
LClampReentry2:
|
||||
|
||||
cmpl $4096,%ecx
|
||||
jl LClampLow3
|
||||
cmpl %edx,%ecx
|
||||
ja LClampHigh3
|
||||
LClampReentry3:
|
||||
|
||||
movl %ebp,snext
|
||||
movl %ecx,tnext
|
||||
|
||||
subl s,%ebp
|
||||
subl t,%ecx
|
||||
|
||||
//
|
||||
// set up advancetable
|
||||
//
|
||||
movl %ecx,%eax
|
||||
movl %ebp,%edx
|
||||
sarl $20,%eax // tstep >>= 16;
|
||||
jz LZero
|
||||
sarl $20,%edx // sstep >>= 16;
|
||||
movl C(cachewidth),%ebx
|
||||
imull %ebx,%eax
|
||||
jmp LSetUp1
|
||||
|
||||
LZero:
|
||||
sarl $20,%edx // sstep >>= 16;
|
||||
movl C(cachewidth),%ebx
|
||||
|
||||
LSetUp1:
|
||||
|
||||
addl %edx,%eax // add in sstep
|
||||
// (tstep >> 16) * cachewidth + (sstep >> 16);
|
||||
movl tfracf,%edx
|
||||
movl %eax,advancetable+4 // advance base in t
|
||||
addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
|
||||
// (sstep >> 16);
|
||||
shll $12,%ebp // left-justify sstep fractional part
|
||||
movl sfracf,%ebx
|
||||
shll $12,%ecx // left-justify tstep fractional part
|
||||
movl %eax,advancetable // advance extra in t
|
||||
|
||||
movl %ecx,tstep
|
||||
addl %ecx,%edx // advance tfrac fractional part by tstep frac
|
||||
|
||||
sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none)
|
||||
addl %ebp,%ebx // advance sfrac fractional part by sstep frac
|
||||
adcl advancetable+4(,%ecx,4),%esi // point to next source texel
|
||||
|
||||
addl tstep,%edx
|
||||
sbbl %ecx,%ecx
|
||||
movb (%esi),%al
|
||||
addl %ebp,%ebx
|
||||
movb %al,1(%edi)
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
addl tstep,%edx
|
||||
sbbl %ecx,%ecx
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
addl tstep,%edx
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,2(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
addl tstep,%edx
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,3(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
addl tstep,%edx
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,4(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
addl tstep,%edx
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,5(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
addl tstep,%edx
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,6(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
addl tstep,%edx
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,7(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
|
||||
//
|
||||
// start FDIV for end of next segment in flight, so it can overlap
|
||||
//
|
||||
movl counttemp,%ecx
|
||||
cmpl $16,%ecx // more than one segment after this?
|
||||
ja LSetupNotLast2 // yes
|
||||
|
||||
decl %ecx
|
||||
jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
|
||||
movl %ecx,spancountminus1
|
||||
fildl spancountminus1
|
||||
|
||||
flds C(d_zistepu) // C(d_zistepu) | spancountminus1
|
||||
fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1
|
||||
flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
|
||||
fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
|
||||
fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
|
||||
faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1
|
||||
fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1
|
||||
fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
|
||||
fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
|
||||
faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
|
||||
flds fp_64k // 64k | C(d_sdivzstepu)*scm1
|
||||
fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k
|
||||
faddp %st(0),%st(4) // 64k
|
||||
|
||||
fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
|
||||
// overlap
|
||||
jmp LFDIVInFlight2
|
||||
|
||||
.align 4
|
||||
LSetupNotLast2:
|
||||
fadds zi16stepu
|
||||
fxch %st(2)
|
||||
fadds sdivz16stepu
|
||||
fxch %st(2)
|
||||
flds tdivz16stepu
|
||||
faddp %st(0),%st(2)
|
||||
flds fp_64k
|
||||
fdiv %st(1),%st(0) // z = 1/1/z
|
||||
// this is what we've gone to all this trouble to
|
||||
// overlap
|
||||
LFDIVInFlight2:
|
||||
movl %ecx,counttemp
|
||||
|
||||
addl tstep,%edx
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,8(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
addl tstep,%edx
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,9(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
addl tstep,%edx
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,10(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
addl tstep,%edx
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,11(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
addl tstep,%edx
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,12(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
addl tstep,%edx
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,13(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
addl tstep,%edx
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,14(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
addl $16,%edi
|
||||
movl %edx,tfracf
|
||||
movl snext,%edx
|
||||
movl %ebx,sfracf
|
||||
movl tnext,%ebx
|
||||
movl %edx,s
|
||||
movl %ebx,t
|
||||
|
||||
movl counttemp,%ecx // retrieve count
|
||||
|
||||
//
|
||||
// determine whether last span or not
|
||||
//
|
||||
cmpl $16,%ecx // are there multiple segments remaining?
|
||||
movb %al,-1(%edi)
|
||||
ja LNotLastSegment // yes
|
||||
|
||||
//
|
||||
// last segment of scan
|
||||
//
|
||||
LLastSegment:
|
||||
|
||||
//
|
||||
// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
|
||||
// get there. The number of pixels left is variable, and we want to land on the
|
||||
// last pixel, not step one past it, so we can't run into arithmetic problems
|
||||
//
|
||||
testl %ecx,%ecx
|
||||
jz LNoSteps // just draw the last pixel and we're done
|
||||
|
||||
// pick up after the FDIV that was left in flight previously
|
||||
|
||||
|
||||
fld %st(0) // duplicate it
|
||||
fmul %st(4),%st(0) // s = s/z * z
|
||||
fxch %st(1)
|
||||
fmul %st(3),%st(0) // t = t/z * z
|
||||
fxch %st(1)
|
||||
fistpl snext
|
||||
fistpl tnext
|
||||
|
||||
movb (%esi),%al // load first texel in segment
|
||||
movl C(tadjust),%ebx
|
||||
movb %al,(%edi) // store first pixel in segment
|
||||
movl C(sadjust),%eax
|
||||
|
||||
addl snext,%eax
|
||||
addl tnext,%ebx
|
||||
|
||||
movl C(bbextents),%ebp
|
||||
movl C(bbextentt),%edx
|
||||
|
||||
cmpl $4096,%eax
|
||||
jl LClampLow4
|
||||
cmpl %ebp,%eax
|
||||
ja LClampHigh4
|
||||
LClampReentry4:
|
||||
movl %eax,snext
|
||||
|
||||
cmpl $4096,%ebx
|
||||
jl LClampLow5
|
||||
cmpl %edx,%ebx
|
||||
ja LClampHigh5
|
||||
LClampReentry5:
|
||||
|
||||
cmpl $1,%ecx // don't bother
|
||||
je LOnlyOneStep // if two pixels in segment, there's only one step,
|
||||
// of the segment length
|
||||
subl s,%eax
|
||||
subl t,%ebx
|
||||
|
||||
addl %eax,%eax // convert to 15.17 format so multiply by 1.31
|
||||
addl %ebx,%ebx // reciprocal yields 16.48
|
||||
|
||||
imull reciprocal_table_16-8(,%ecx,4) // sstep = (snext - s) /
|
||||
// (spancount-1)
|
||||
movl %edx,%ebp
|
||||
|
||||
movl %ebx,%eax
|
||||
imull reciprocal_table_16-8(,%ecx,4) // tstep = (tnext - t) /
|
||||
// (spancount-1)
|
||||
LSetEntryvec:
|
||||
//
|
||||
// set up advancetable
|
||||
//
|
||||
movl entryvec_table_16(,%ecx,4),%ebx
|
||||
movl %edx,%eax
|
||||
movl %ebx,jumptemp // entry point into code for RET later
|
||||
movl %ebp,%ecx
|
||||
sarl $16,%edx // tstep >>= 16;
|
||||
movl C(cachewidth),%ebx
|
||||
sarl $16,%ecx // sstep >>= 16;
|
||||
imull %ebx,%edx
|
||||
|
||||
addl %ecx,%edx // add in sstep
|
||||
// (tstep >> 16) * cachewidth + (sstep >> 16);
|
||||
movl tfracf,%ecx
|
||||
movl %edx,advancetable+4 // advance base in t
|
||||
addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
|
||||
// (sstep >> 16);
|
||||
shll $16,%ebp // left-justify sstep fractional part
|
||||
movl sfracf,%ebx
|
||||
shll $16,%eax // left-justify tstep fractional part
|
||||
movl %edx,advancetable // advance extra in t
|
||||
|
||||
movl %eax,tstep
|
||||
movl %ecx,%edx
|
||||
addl %eax,%edx
|
||||
sbbl %ecx,%ecx
|
||||
addl %ebp,%ebx
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
jmp *jumptemp // jump to the number-of-pixels handler
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
LNoSteps:
|
||||
movb (%esi),%al // load first texel in segment
|
||||
subl $15,%edi // adjust for hardwired offset
|
||||
jmp LEndSpan
|
||||
|
||||
|
||||
LOnlyOneStep:
|
||||
subl s,%eax
|
||||
subl t,%ebx
|
||||
movl %eax,%ebp
|
||||
movl %ebx,%edx
|
||||
jmp LSetEntryvec
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
.globl Entry2_16, Entry3_16, Entry4_16, Entry5_16
|
||||
.globl Entry6_16, Entry7_16, Entry8_16, Entry9_16
|
||||
.globl Entry10_16, Entry11_16, Entry12_16, Entry13_16
|
||||
.globl Entry14_16, Entry15_16, Entry16_16
|
||||
|
||||
Entry2_16:
|
||||
subl $14,%edi // adjust for hardwired offsets
|
||||
movb (%esi),%al
|
||||
jmp LEntry2_16
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
Entry3_16:
|
||||
subl $13,%edi // adjust for hardwired offsets
|
||||
addl %eax,%edx
|
||||
movb (%esi),%al
|
||||
sbbl %ecx,%ecx
|
||||
addl %ebp,%ebx
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
jmp LEntry3_16
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
Entry4_16:
|
||||
subl $12,%edi // adjust for hardwired offsets
|
||||
addl %eax,%edx
|
||||
movb (%esi),%al
|
||||
sbbl %ecx,%ecx
|
||||
addl %ebp,%ebx
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
jmp LEntry4_16
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
Entry5_16:
|
||||
subl $11,%edi // adjust for hardwired offsets
|
||||
addl %eax,%edx
|
||||
movb (%esi),%al
|
||||
sbbl %ecx,%ecx
|
||||
addl %ebp,%ebx
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
jmp LEntry5_16
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
Entry6_16:
|
||||
subl $10,%edi // adjust for hardwired offsets
|
||||
addl %eax,%edx
|
||||
movb (%esi),%al
|
||||
sbbl %ecx,%ecx
|
||||
addl %ebp,%ebx
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
jmp LEntry6_16
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
Entry7_16:
|
||||
subl $9,%edi // adjust for hardwired offsets
|
||||
addl %eax,%edx
|
||||
movb (%esi),%al
|
||||
sbbl %ecx,%ecx
|
||||
addl %ebp,%ebx
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
jmp LEntry7_16
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
Entry8_16:
|
||||
subl $8,%edi // adjust for hardwired offsets
|
||||
addl %eax,%edx
|
||||
movb (%esi),%al
|
||||
sbbl %ecx,%ecx
|
||||
addl %ebp,%ebx
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
jmp LEntry8_16
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
Entry9_16:
|
||||
subl $7,%edi // adjust for hardwired offsets
|
||||
addl %eax,%edx
|
||||
movb (%esi),%al
|
||||
sbbl %ecx,%ecx
|
||||
addl %ebp,%ebx
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
jmp LEntry9_16
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
Entry10_16:
|
||||
subl $6,%edi // adjust for hardwired offsets
|
||||
addl %eax,%edx
|
||||
movb (%esi),%al
|
||||
sbbl %ecx,%ecx
|
||||
addl %ebp,%ebx
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
jmp LEntry10_16
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
Entry11_16:
|
||||
subl $5,%edi // adjust for hardwired offsets
|
||||
addl %eax,%edx
|
||||
movb (%esi),%al
|
||||
sbbl %ecx,%ecx
|
||||
addl %ebp,%ebx
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
jmp LEntry11_16
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
Entry12_16:
|
||||
subl $4,%edi // adjust for hardwired offsets
|
||||
addl %eax,%edx
|
||||
movb (%esi),%al
|
||||
sbbl %ecx,%ecx
|
||||
addl %ebp,%ebx
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
jmp LEntry12_16
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
Entry13_16:
|
||||
subl $3,%edi // adjust for hardwired offsets
|
||||
addl %eax,%edx
|
||||
movb (%esi),%al
|
||||
sbbl %ecx,%ecx
|
||||
addl %ebp,%ebx
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
jmp LEntry13_16
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
Entry14_16:
|
||||
subl $2,%edi // adjust for hardwired offsets
|
||||
addl %eax,%edx
|
||||
movb (%esi),%al
|
||||
sbbl %ecx,%ecx
|
||||
addl %ebp,%ebx
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
jmp LEntry14_16
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
Entry15_16:
|
||||
decl %edi // adjust for hardwired offsets
|
||||
addl %eax,%edx
|
||||
movb (%esi),%al
|
||||
sbbl %ecx,%ecx
|
||||
addl %ebp,%ebx
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
jmp LEntry15_16
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
Entry16_16:
|
||||
addl %eax,%edx
|
||||
movb (%esi),%al
|
||||
sbbl %ecx,%ecx
|
||||
addl %ebp,%ebx
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
|
||||
addl tstep,%edx
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,1(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
LEntry15_16:
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,2(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
LEntry14_16:
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,3(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
LEntry13_16:
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,4(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
LEntry12_16:
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,5(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
LEntry11_16:
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,6(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
LEntry10_16:
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,7(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
LEntry9_16:
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,8(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
LEntry8_16:
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,9(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
LEntry7_16:
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,10(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
LEntry6_16:
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,11(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
LEntry5_16:
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,12(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
addl tstep,%edx
|
||||
LEntry4_16:
|
||||
sbbl %ecx,%ecx
|
||||
movb %al,13(%edi)
|
||||
addl %ebp,%ebx
|
||||
movb (%esi),%al
|
||||
adcl advancetable+4(,%ecx,4),%esi
|
||||
LEntry3_16:
|
||||
movb %al,14(%edi)
|
||||
movb (%esi),%al
|
||||
LEntry2_16:
|
||||
|
||||
LEndSpan:
|
||||
|
||||
//
|
||||
// clear s/z, t/z, 1/z from FP stack
|
||||
//
|
||||
fstp %st(0)
|
||||
fstp %st(0)
|
||||
fstp %st(0)
|
||||
|
||||
movl pspantemp,%ebx // restore spans pointer
|
||||
movl espan_t_pnext(%ebx),%ebx // point to next span
|
||||
testl %ebx,%ebx // any more spans?
|
||||
movb %al,15(%edi)
|
||||
jnz LSpanLoop // more spans
|
||||
|
||||
popl %ebx // restore register variables
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp // restore the caller's stack frame
|
||||
ret
|
||||
|
||||
#endif // id386
|
477
source/d_parta.S
Normal file
477
source/d_parta.S
Normal file
|
@ -0,0 +1,477 @@
|
|||
/*
|
||||
Copyright (C) 1996-1997 Id Software, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
*/
|
||||
//
|
||||
// d_parta.s
|
||||
// x86 assembly-language 8-bpp particle-drawing code.
|
||||
//
|
||||
|
||||
#include "asm_i386.h"
|
||||
#include "quakeasm.h"
|
||||
#include "d_ifacea.h"
|
||||
#include "asm_draw.h"
|
||||
|
||||
#if id386
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// 8-bpp particle drawing code.
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
//FIXME: comments, full optimization
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// 8-bpp particle queueing code.
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
.text
|
||||
|
||||
#define P 12+4
|
||||
|
||||
.align 4
|
||||
.globl C(D_DrawParticle)
|
||||
C(D_DrawParticle):
|
||||
pushl %ebp // preserve caller's stack frame
|
||||
pushl %edi // preserve register variables
|
||||
pushl %ebx
|
||||
|
||||
movl P(%esp),%edi
|
||||
|
||||
// FIXME: better FP overlap in general here
|
||||
|
||||
// transform point
|
||||
// VectorSubtract (p->org, r_origin, local);
|
||||
flds C(r_origin)
|
||||
fsubrs pt_org(%edi)
|
||||
flds pt_org+4(%edi)
|
||||
fsubs C(r_origin)+4
|
||||
flds pt_org+8(%edi)
|
||||
fsubs C(r_origin)+8
|
||||
fxch %st(2) // local[0] | local[1] | local[2]
|
||||
|
||||
// transformed[2] = DotProduct(local, r_ppn);
|
||||
flds C(r_ppn) // r_ppn[0] | local[0] | local[1] | local[2]
|
||||
fmul %st(1),%st(0) // dot0 | local[0] | local[1] | local[2]
|
||||
flds C(r_ppn)+4 // r_ppn[1] | dot0 | local[0] | local[1] | local[2]
|
||||
fmul %st(3),%st(0) // dot1 | dot0 | local[0] | local[1] | local[2]
|
||||
flds C(r_ppn)+8 // r_ppn[2] | dot1 | dot0 | local[0] |
|
||||
// local[1] | local[2]
|
||||
fmul %st(5),%st(0) // dot2 | dot1 | dot0 | local[0] | local[1] | local[2]
|
||||
fxch %st(2) // dot0 | dot1 | dot2 | local[0] | local[1] | local[2]
|
||||
faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] |
|
||||
// local[2]
|
||||
faddp %st(0),%st(1) // z | local[0] | local[1] | local[2]
|
||||
fld %st(0) // z | z | local[0] | local[1] |
|
||||
// local[2]
|
||||
fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2]
|
||||
fxch %st(1) // z | 1/z | local[0] | local[1] | local[2]
|
||||
|
||||
// if (transformed[2] < PARTICLE_Z_CLIP)
|
||||
// return;
|
||||
fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2]
|
||||
fxch %st(3) // local[2] | local[0] | local[1] | 1/z
|
||||
|
||||
flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z
|
||||
fmul %st(2),%st(0) // dot0 | local[2] | local[0] | local[1] | 1/z
|
||||
flds C(r_pup)+4 // r_pup[1] | dot0 | local[2] | local[0] |
|
||||
// local[1] | 1/z
|
||||
|
||||
fnstsw %ax
|
||||
testb $1,%ah
|
||||
jnz LPop6AndDone
|
||||
|
||||
// transformed[1] = DotProduct(local, r_pup);
|
||||
fmul %st(4),%st(0) // dot1 | dot0 | local[2] | local[0] | local[1] | 1/z
|
||||
flds C(r_pup)+8 // r_pup[2] | dot1 | dot0 | local[2] |
|
||||
// local[0] | local[1] | 1/z
|
||||
fmul %st(3),%st(0) // dot2 | dot1 | dot0 | local[2] | local[0] |
|
||||
// local[1] | 1/z
|
||||
fxch %st(2) // dot0 | dot1 | dot2 | local[2] | local[0] |
|
||||
// local[1] | 1/z
|
||||
faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] |
|
||||
// local[1] | 1/z
|
||||
faddp %st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z
|
||||
fxch %st(3) // local[1] | local[2] | local[0] | y | 1/z
|
||||
|
||||
// transformed[0] = DotProduct(local, r_pright);
|
||||
fmuls C(r_pright)+4 // dot1 | local[2] | local[0] | y | 1/z
|
||||
fxch %st(2) // local[0] | local[2] | dot1 | y | 1/z
|
||||
fmuls C(r_pright) // dot0 | local[2] | dot1 | y | 1/z
|
||||
fxch %st(1) // local[2] | dot0 | dot1 | y | 1/z
|
||||
fmuls C(r_pright)+8 // dot2 | dot0 | dot1 | y | 1/z
|
||||
fxch %st(2) // dot1 | dot0 | dot2 | y | 1/z
|
||||
faddp %st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z
|
||||
|
||||
faddp %st(0),%st(1) // x | y | 1/z
|
||||
fxch %st(1) // y | x | 1/z
|
||||
|
||||
// project the point
|
||||
fmul %st(2),%st(0) // y/z | x | 1/z
|
||||
fxch %st(1) // x | y/z | 1/z
|
||||
fmul %st(2),%st(0) // x/z | y/z | 1/z
|
||||
fxch %st(1) // y/z | x/z | 1/z
|
||||
fsubrs C(ycenter) // v | x/z | 1/z
|
||||
fxch %st(1) // x/z | v | 1/z
|
||||
fadds C(xcenter) // u | v | 1/z
|
||||
// FIXME: preadjust xcenter and ycenter
|
||||
fxch %st(1) // v | u | 1/z
|
||||
fadds float_point5 // v | u | 1/z
|
||||
fxch %st(1) // u | v | 1/z
|
||||
fadds float_point5 // u | v | 1/z
|
||||
fxch %st(2) // 1/z | v | u
|
||||
fmuls DP_32768 // 1/z * 0x8000 | v | u
|
||||
fxch %st(2) // u | v | 1/z * 0x8000
|
||||
|
||||
// FIXME: use Terje's fp->int trick here?
|
||||
// FIXME: check we're getting proper rounding here
|
||||
fistpl DP_u // v | 1/z * 0x8000
|
||||
fistpl DP_v // 1/z * 0x8000
|
||||
|
||||
movl DP_u,%eax
|
||||
movl DP_v,%edx
|
||||
|
||||
// if ((v > d_vrectbottom_particle) ||
|
||||
// (u > d_vrectright_particle) ||
|
||||
// (v < d_vrecty) ||
|
||||
// (u < d_vrectx))
|
||||
// {
|
||||
// continue;
|
||||
// }
|
||||
|
||||
movl C(d_vrectbottom_particle),%ebx
|
||||
movl C(d_vrectright_particle),%ecx
|
||||
cmpl %ebx,%edx
|
||||
jg LPop1AndDone
|
||||
cmpl %ecx,%eax
|
||||
jg LPop1AndDone
|
||||
movl C(d_vrecty),%ebx
|
||||
movl C(d_vrectx),%ecx
|
||||
cmpl %ebx,%edx
|
||||
jl LPop1AndDone
|
||||
|
||||
cmpl %ecx,%eax
|
||||
jl LPop1AndDone
|
||||
|
||||
flds pt_color(%edi) // color | 1/z * 0x8000
|
||||
// FIXME: use Terje's fast fp->int trick?
|
||||
fistpl DP_Color // 1/z * 0x8000
|
||||
|
||||
movl C(d_viewbuffer),%ebx
|
||||
|
||||
addl %eax,%ebx
|
||||
movl C(d_scantable)(,%edx,4),%edi // point to the pixel
|
||||
|
||||
imull C(d_zrowbytes),%edx // point to the z pixel
|
||||
|
||||
leal (%edx,%eax,2),%edx
|
||||
movl C(d_pzbuffer),%eax
|
||||
|
||||
fistpl izi
|
||||
|
||||
addl %ebx,%edi
|
||||
addl %eax,%edx
|
||||
|
||||
// pix = izi >> d_pix_shift;
|
||||
|
||||
movl izi,%eax
|
||||
movl C(d_pix_shift),%ecx
|
||||
shrl %cl,%eax
|
||||
movl izi,%ebp
|
||||
|
||||
// if (pix < d_pix_min)
|
||||
// pix = d_pix_min;
|
||||
// else if (pix > d_pix_max)
|
||||
// pix = d_pix_max;
|
||||
|
||||
movl C(d_pix_min),%ebx
|
||||
movl C(d_pix_max),%ecx
|
||||
cmpl %ebx,%eax
|
||||
jnl LTestPixMax
|
||||
movl %ebx,%eax
|
||||
jmp LTestDone
|
||||
|
||||
LTestPixMax:
|
||||
cmpl %ecx,%eax
|
||||
jng LTestDone
|
||||
movl %ecx,%eax
|
||||
LTestDone:
|
||||
|
||||
movb DP_Color,%ch
|
||||
|
||||
movl C(d_y_aspect_shift),%ebx
|
||||
testl %ebx,%ebx
|
||||
jnz LDefault
|
||||
|
||||
cmpl $4,%eax
|
||||
ja LDefault
|
||||
|
||||
jmp DP_EntryTable-4(,%eax,4)
|
||||
|
||||
// 1x1
|
||||
.globl DP_1x1
|
||||
DP_1x1:
|
||||
cmpw %bp,(%edx) // just one pixel to do
|
||||
jg LDone
|
||||
movw %bp,(%edx)
|
||||
movb %ch,(%edi)
|
||||
jmp LDone
|
||||
|
||||
// 2x2
|
||||
.globl DP_2x2
|
||||
DP_2x2:
|
||||
pushl %esi
|
||||
movl C(screenwidth),%ebx
|
||||
movl C(d_zrowbytes),%esi
|
||||
|
||||
cmpw %bp,(%edx)
|
||||
jg L2x2_1
|
||||
movw %bp,(%edx)
|
||||
movb %ch,(%edi)
|
||||
L2x2_1:
|
||||
cmpw %bp,2(%edx)
|
||||
jg L2x2_2
|
||||
movw %bp,2(%edx)
|
||||
movb %ch,1(%edi)
|
||||
L2x2_2:
|
||||
cmpw %bp,(%edx,%esi,1)
|
||||
jg L2x2_3
|
||||
movw %bp,(%edx,%esi,1)
|
||||
movb %ch,(%edi,%ebx,1)
|
||||
L2x2_3:
|
||||
cmpw %bp,2(%edx,%esi,1)
|
||||
jg L2x2_4
|
||||
movw %bp,2(%edx,%esi,1)
|
||||
movb %ch,1(%edi,%ebx,1)
|
||||
L2x2_4:
|
||||
|
||||
popl %esi
|
||||
jmp LDone
|
||||
|
||||
// 3x3
|
||||
.globl DP_3x3
|
||||
DP_3x3:
|
||||
pushl %esi
|
||||
movl C(screenwidth),%ebx
|
||||
movl C(d_zrowbytes),%esi
|
||||
|
||||
cmpw %bp,(%edx)
|
||||
jg L3x3_1
|
||||
movw %bp,(%edx)
|
||||
movb %ch,(%edi)
|
||||
L3x3_1:
|
||||
cmpw %bp,2(%edx)
|
||||
jg L3x3_2
|
||||
movw %bp,2(%edx)
|
||||
movb %ch,1(%edi)
|
||||
L3x3_2:
|
||||
cmpw %bp,4(%edx)
|
||||
jg L3x3_3
|
||||
movw %bp,4(%edx)
|
||||
movb %ch,2(%edi)
|
||||
L3x3_3:
|
||||
|
||||
cmpw %bp,(%edx,%esi,1)
|
||||
jg L3x3_4
|
||||
movw %bp,(%edx,%esi,1)
|
||||
movb %ch,(%edi,%ebx,1)
|
||||
L3x3_4:
|
||||
cmpw %bp,2(%edx,%esi,1)
|
||||
jg L3x3_5
|
||||
movw %bp,2(%edx,%esi,1)
|
||||
movb %ch,1(%edi,%ebx,1)
|
||||
L3x3_5:
|
||||
cmpw %bp,4(%edx,%esi,1)
|
||||
jg L3x3_6
|
||||
movw %bp,4(%edx,%esi,1)
|
||||
movb %ch,2(%edi,%ebx,1)
|
||||
L3x3_6:
|
||||
|
||||
cmpw %bp,(%edx,%esi,2)
|
||||
jg L3x3_7
|
||||
movw %bp,(%edx,%esi,2)
|
||||
movb %ch,(%edi,%ebx,2)
|
||||
L3x3_7:
|
||||
cmpw %bp,2(%edx,%esi,2)
|
||||
jg L3x3_8
|
||||
movw %bp,2(%edx,%esi,2)
|
||||
movb %ch,1(%edi,%ebx,2)
|
||||
L3x3_8:
|
||||
cmpw %bp,4(%edx,%esi,2)
|
||||
jg L3x3_9
|
||||
movw %bp,4(%edx,%esi,2)
|
||||
movb %ch,2(%edi,%ebx,2)
|
||||
L3x3_9:
|
||||
|
||||
popl %esi
|
||||
jmp LDone
|
||||
|
||||
|
||||
// 4x4
|
||||
.globl DP_4x4
|
||||
DP_4x4:
|
||||
pushl %esi
|
||||
movl C(screenwidth),%ebx
|
||||
movl C(d_zrowbytes),%esi
|
||||
|
||||
cmpw %bp,(%edx)
|
||||
jg L4x4_1
|
||||
movw %bp,(%edx)
|
||||
movb %ch,(%edi)
|
||||
L4x4_1:
|
||||
cmpw %bp,2(%edx)
|
||||
jg L4x4_2
|
||||
movw %bp,2(%edx)
|
||||
movb %ch,1(%edi)
|
||||
L4x4_2:
|
||||
cmpw %bp,4(%edx)
|
||||
jg L4x4_3
|
||||
movw %bp,4(%edx)
|
||||
movb %ch,2(%edi)
|
||||
L4x4_3:
|
||||
cmpw %bp,6(%edx)
|
||||
jg L4x4_4
|
||||
movw %bp,6(%edx)
|
||||
movb %ch,3(%edi)
|
||||
L4x4_4:
|
||||
|
||||
cmpw %bp,(%edx,%esi,1)
|
||||
jg L4x4_5
|
||||
movw %bp,(%edx,%esi,1)
|
||||
movb %ch,(%edi,%ebx,1)
|
||||
L4x4_5:
|
||||
cmpw %bp,2(%edx,%esi,1)
|
||||
jg L4x4_6
|
||||
movw %bp,2(%edx,%esi,1)
|
||||
movb %ch,1(%edi,%ebx,1)
|
||||
L4x4_6:
|
||||
cmpw %bp,4(%edx,%esi,1)
|
||||
jg L4x4_7
|
||||
movw %bp,4(%edx,%esi,1)
|
||||
movb %ch,2(%edi,%ebx,1)
|
||||
L4x4_7:
|
||||
cmpw %bp,6(%edx,%esi,1)
|
||||
jg L4x4_8
|
||||
movw %bp,6(%edx,%esi,1)
|
||||
movb %ch,3(%edi,%ebx,1)
|
||||
L4x4_8:
|
||||
|
||||
leal (%edx,%esi,2),%edx
|
||||
leal (%edi,%ebx,2),%edi
|
||||
|
||||
cmpw %bp,(%edx)
|
||||
jg L4x4_9
|
||||
movw %bp,(%edx)
|
||||
movb %ch,(%edi)
|
||||
L4x4_9:
|
||||
cmpw %bp,2(%edx)
|
||||
jg L4x4_10
|
||||
movw %bp,2(%edx)
|
||||
movb %ch,1(%edi)
|
||||
L4x4_10:
|
||||
cmpw %bp,4(%edx)
|
||||
jg L4x4_11
|
||||
movw %bp,4(%edx)
|
||||
movb %ch,2(%edi)
|
||||
L4x4_11:
|
||||
cmpw %bp,6(%edx)
|
||||
jg L4x4_12
|
||||
movw %bp,6(%edx)
|
||||
movb %ch,3(%edi)
|
||||
L4x4_12:
|
||||
|
||||
cmpw %bp,(%edx,%esi,1)
|
||||
jg L4x4_13
|
||||
movw %bp,(%edx,%esi,1)
|
||||
movb %ch,(%edi,%ebx,1)
|
||||
L4x4_13:
|
||||
cmpw %bp,2(%edx,%esi,1)
|
||||
jg L4x4_14
|
||||
movw %bp,2(%edx,%esi,1)
|
||||
movb %ch,1(%edi,%ebx,1)
|
||||
L4x4_14:
|
||||
cmpw %bp,4(%edx,%esi,1)
|
||||
jg L4x4_15
|
||||
movw %bp,4(%edx,%esi,1)
|
||||
movb %ch,2(%edi,%ebx,1)
|
||||
L4x4_15:
|
||||
cmpw %bp,6(%edx,%esi,1)
|
||||
jg L4x4_16
|
||||
movw %bp,6(%edx,%esi,1)
|
||||
movb %ch,3(%edi,%ebx,1)
|
||||
L4x4_16:
|
||||
|
||||
popl %esi
|
||||
jmp LDone
|
||||
|
||||
// default case, handling any size particle
|
||||
LDefault:
|
||||
|
||||
// count = pix << d_y_aspect_shift;
|
||||
|
||||
movl %eax,%ebx
|
||||
movl %eax,DP_Pix
|
||||
movb C(d_y_aspect_shift),%cl
|
||||
shll %cl,%ebx
|
||||
|
||||
// for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
|
||||
// {
|
||||
// for (i=0 ; i<pix ; i++)
|
||||
// {
|
||||
// if (pz[i] <= izi)
|
||||
// {
|
||||
// pz[i] = izi;
|
||||
// pdest[i] = color;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
LGenRowLoop:
|
||||
movl DP_Pix,%eax
|
||||
|
||||
LGenColLoop:
|
||||
cmpw %bp,-2(%edx,%eax,2)
|
||||
jg LGSkip
|
||||
movw %bp,-2(%edx,%eax,2)
|
||||
movb %ch,-1(%edi,%eax,1)
|
||||
LGSkip:
|
||||
decl %eax // --pix
|
||||
jnz LGenColLoop
|
||||
|
||||
addl C(d_zrowbytes),%edx
|
||||
addl C(screenwidth),%edi
|
||||
|
||||
decl %ebx // --count
|
||||
jnz LGenRowLoop
|
||||
|
||||
LDone:
|
||||
popl %ebx // restore register variables
|
||||
popl %edi
|
||||
popl %ebp // restore the caller's stack frame
|
||||
ret
|
||||
|
||||
LPop6AndDone:
|
||||
fstp %st(0)
|
||||
fstp %st(0)
|
||||
fstp %st(0)
|
||||
fstp %st(0)
|
||||
fstp %st(0)
|
||||
LPop1AndDone:
|
||||
fstp %st(0)
|
||||
jmp LDone
|
||||
|
||||
#endif // id386
|
1744
source/d_polysa.S
Normal file
1744
source/d_polysa.S
Normal file
File diff suppressed because it is too large
Load diff
89
source/d_scana.S
Normal file
89
source/d_scana.S
Normal file
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
Copyright (C) 1996-1997 Id Software, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
*/
|
||||
//
|
||||
// d_scana.s
|
||||
// x86 assembly-language turbulent texture mapping code
|
||||
//
|
||||
|
||||
#include "asm_i386.h"
|
||||
#include "quakeasm.h"
|
||||
#include "asm_draw.h"
|
||||
#include "d_ifacea.h"
|
||||
|
||||
#if id386
|
||||
|
||||
.data
|
||||
|
||||
.text
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// turbulent texture mapping code
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
.align 4
|
||||
.globl C(D_DrawTurbulent8Span)
|
||||
C(D_DrawTurbulent8Span):
|
||||
pushl %ebp // preserve caller's stack frame pointer
|
||||
pushl %esi // preserve register variables
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl C(r_turb_s),%esi
|
||||
movl C(r_turb_t),%ecx
|
||||
movl C(r_turb_pdest),%edi
|
||||
movl C(r_turb_spancount),%ebx
|
||||
|
||||
Llp:
|
||||
movl %ecx,%eax
|
||||
movl %esi,%edx
|
||||
sarl $16,%eax
|
||||
movl C(r_turb_turb),%ebp
|
||||
sarl $16,%edx
|
||||
andl $(CYCLE-1),%eax
|
||||
andl $(CYCLE-1),%edx
|
||||
movl (%ebp,%eax,4),%eax
|
||||
movl (%ebp,%edx,4),%edx
|
||||
addl %esi,%eax
|
||||
sarl $16,%eax
|
||||
addl %ecx,%edx
|
||||
sarl $16,%edx
|
||||
andl $(TURB_TEX_SIZE-1),%eax
|
||||
andl $(TURB_TEX_SIZE-1),%edx
|
||||
shll $6,%edx
|
||||
movl C(r_turb_pbase),%ebp
|
||||
addl %eax,%edx
|
||||
incl %edi
|
||||
addl C(r_turb_sstep),%esi
|
||||
addl C(r_turb_tstep),%ecx
|
||||
movb (%ebp,%edx,1),%dl
|
||||
decl %ebx
|
||||
movb %dl,-1(%edi)
|
||||
jnz Llp
|
||||
|
||||
movl %edi,C(r_turb_pdest)
|
||||
|
||||
popl %ebx // restore register variables
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebp // restore caller's stack frame pointer
|
||||
ret
|
||||
|
||||
#endif // id386
|
||||
|
900
source/d_spr8.S
Normal file
900
source/d_spr8.S
Normal file
|
@ -0,0 +1,900 @@
|
|||
/*
|
||||
Copyright (C) 1996-1997 Id Software, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
*/
|
||||
//
|
||||
// d_spr8.s
|
||||
// x86 assembly-language horizontal 8-bpp transparent span-drawing code.
|
||||
//
|
||||
|
||||
#include "asm_i386.h"
|
||||
#include "quakeasm.h"
|
||||
#include "asm_draw.h"
|
||||
|
||||
#if id386
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// 8-bpp horizontal span drawing code for polygons, with transparency.
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
.text
|
||||
|
||||
// out-of-line, rarely-needed clamping code
|
||||
|
||||
LClampHigh0:
|
||||
movl C(bbextents),%esi
|
||||
jmp LClampReentry0
|
||||
LClampHighOrLow0:
|
||||
jg LClampHigh0
|
||||
xorl %esi,%esi
|
||||
jmp LClampReentry0
|
||||
|
||||
LClampHigh1:
|
||||
movl C(bbextentt),%edx
|
||||
jmp LClampReentry1
|
||||
LClampHighOrLow1:
|
||||
jg LClampHigh1
|
||||
xorl %edx,%edx
|
||||
jmp LClampReentry1
|
||||
|
||||
LClampLow2:
|
||||
movl $2048,%ebp
|
||||
jmp LClampReentry2
|
||||
LClampHigh2:
|
||||
movl C(bbextents),%ebp
|
||||
jmp LClampReentry2
|
||||
|
||||
LClampLow3:
|
||||
movl $2048,%ecx
|
||||
jmp LClampReentry3
|
||||
LClampHigh3:
|
||||
movl C(bbextentt),%ecx
|
||||
jmp LClampReentry3
|
||||
|
||||
LClampLow4:
|
||||
movl $2048,%eax
|
||||
jmp LClampReentry4
|
||||
LClampHigh4:
|
||||
movl C(bbextents),%eax
|
||||
jmp LClampReentry4
|
||||
|
||||
LClampLow5:
|
||||
movl $2048,%ebx
|
||||
jmp LClampReentry5
|
||||
LClampHigh5:
|
||||
movl C(bbextentt),%ebx
|
||||
jmp LClampReentry5
|
||||
|
||||
|
||||
#define pspans 4+16
|
||||
|
||||
.align 4
|
||||
.globl C(D_SpriteDrawSpans)
|
||||
C(D_SpriteDrawSpans):
|
||||
pushl %ebp // preserve caller's stack frame
|
||||
pushl %edi
|
||||
pushl %esi // preserve register variables
|
||||
pushl %ebx
|
||||
|
||||
//
|
||||
// set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
|
||||
// and span list pointers, and 1/z step in 0.32 fixed-point
|
||||
//
|
||||
// FIXME: any overlap from rearranging?
|
||||
flds C(d_sdivzstepu)
|
||||
fmuls fp_8
|
||||
movl C(cacheblock),%edx
|
||||
flds C(d_tdivzstepu)
|
||||
fmuls fp_8
|
||||
movl pspans(%esp),%ebx // point to the first span descriptor
|
||||
flds C(d_zistepu)
|
||||
fmuls fp_8
|
||||
movl %edx,pbase // pbase = cacheblock
|
||||
flds C(d_zistepu)
|
||||
fmuls fp_64kx64k
|
||||
fxch %st(3)
|
||||
fstps sdivz8stepu
|
||||
fstps zi8stepu
|
||||
fstps tdivz8stepu
|
||||
fistpl izistep
|
||||
movl izistep,%eax
|
||||
rorl $16,%eax // put upper 16 bits in low word
|
||||
movl sspan_t_count(%ebx),%ecx
|
||||
movl %eax,izistep
|
||||
|
||||
cmpl $0,%ecx
|
||||
jle LNextSpan
|
||||
|
||||
LSpanLoop:
|
||||
|
||||
//
|
||||
// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
|
||||
// initial s and t values
|
||||
//
|
||||
// FIXME: pipeline FILD?
|
||||
fildl sspan_t_v(%ebx)
|
||||
fildl sspan_t_u(%ebx)
|
||||
|
||||
fld %st(1) // dv | du | dv
|
||||
fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
|
||||
fld %st(1) // du | dv*d_sdivzstepv | du | dv
|
||||
fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
|
||||
fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
|
||||
fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
|
||||
// dv*d_sdivzstepv | du | dv
|
||||
fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
|
||||
// dv*d_sdivzstepv | du | dv
|
||||
faddp %st(0),%st(2) // du*d_tdivzstepu |
|
||||
// du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
|
||||
fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
|
||||
// du*d_tdivzstepu | du | dv
|
||||
fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
|
||||
// du*d_tdivzstepu | du | dv
|
||||
fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
|
||||
// du*d_sdivzstepu + dv*d_sdivzstepv |
|
||||
// du*d_tdivzstepu | du | dv
|
||||
fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
|
||||
// dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
|
||||
fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
|
||||
// du*d_sdivzstepu; stays in %st(2) at end
|
||||
fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
|
||||
// s/z
|
||||
fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
|
||||
// du*d_tdivzstepu | du | s/z
|
||||
fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
|
||||
// du*d_tdivzstepu | du | s/z
|
||||
faddp %st(0),%st(2) // dv*d_zistepv |
|
||||
// dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
|
||||
fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
|
||||
// dv*d_zistepv | s/z
|
||||
fmuls C(d_zistepu) // du*d_zistepu |
|
||||
// dv*d_tdivzstepv + du*d_tdivzstepu |
|
||||
// dv*d_zistepv | s/z
|
||||
fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
|
||||
// du*d_zistepu | dv*d_zistepv | s/z
|
||||
fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
|
||||
// du*d_tdivzstepu; stays in %st(1) at end
|
||||
fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
|
||||
faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
|
||||
|
||||
flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
|
||||
fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
|
||||
fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
|
||||
// du*d_zistepu; stays in %st(0) at end
|
||||
// 1/z | fp_64k | t/z | s/z
|
||||
|
||||
fld %st(0) // FIXME: get rid of stall on FMUL?
|
||||
fmuls fp_64kx64k
|
||||
fxch %st(1)
|
||||
|
||||
//
|
||||
// calculate and clamp s & t
|
||||
//
|
||||
fdivr %st(0),%st(2) // 1/z | z*64k | t/z | s/z
|
||||
fxch %st(1)
|
||||
|
||||
fistpl izi // 0.32 fixed-point 1/z
|
||||
movl izi,%ebp
|
||||
|
||||
//
|
||||
// set pz to point to the first z-buffer pixel in the span
|
||||
//
|
||||
rorl $16,%ebp // put upper 16 bits in low word
|
||||
movl sspan_t_v(%ebx),%eax
|
||||
movl %ebp,izi
|
||||
movl sspan_t_u(%ebx),%ebp
|
||||
imull C(d_zrowbytes)
|
||||
shll $1,%ebp // a word per pixel
|
||||
addl C(d_pzbuffer),%eax
|
||||
addl %ebp,%eax
|
||||
movl %eax,pz
|
||||
|
||||
//
|
||||
// point %edi to the first pixel in the span
|
||||
//
|
||||
movl C(d_viewbuffer),%ebp
|
||||
movl sspan_t_v(%ebx),%eax
|
||||
pushl %ebx // preserve spans pointer
|
||||
movl C(tadjust),%edx
|
||||
movl C(sadjust),%esi
|
||||
movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
|
||||
addl %ebp,%edi
|
||||
movl sspan_t_u(%ebx),%ebp
|
||||
addl %ebp,%edi // pdest = &pdestspan[scans->u];
|
||||
|
||||
//
|
||||
// now start the FDIV for the end of the span
|
||||
//
|
||||
cmpl $8,%ecx
|
||||
ja LSetupNotLast1
|
||||
|
||||
decl %ecx
|
||||
jz LCleanup1 // if only one pixel, no need to start an FDIV
|
||||
movl %ecx,spancountminus1
|
||||
|
||||
// finish up the s and t calcs
|
||||
fxch %st(1) // z*64k | 1/z | t/z | s/z
|
||||
|
||||
fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
|
||||
fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
|
||||
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
|
||||
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
|
||||
fxch %st(1) // s | t | 1/z | t/z | s/z
|
||||
fistpl s // 1/z | t | t/z | s/z
|
||||
fistpl t // 1/z | t/z | s/z
|
||||
|
||||
fildl spancountminus1
|
||||
|
||||
flds C(d_tdivzstepu) // _d_tdivzstepu | spancountminus1
|
||||
flds C(d_zistepu) // _d_zistepu | _d_tdivzstepu | spancountminus1
|
||||
fmul %st(2),%st(0) // _d_zistepu*scm1 | _d_tdivzstepu | scm1
|
||||
fxch %st(1) // _d_tdivzstepu | _d_zistepu*scm1 | scm1
|
||||
fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
|
||||
fxch %st(2) // scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
|
||||
fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
|
||||
// _d_tdivzstepu*scm1
|
||||
fxch %st(1) // _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
|
||||
// _d_tdivzstepu*scm1
|
||||
faddp %st(0),%st(3) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
|
||||
fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
|
||||
faddp %st(0),%st(3) // _d_sdivzstepu*scm1
|
||||
faddp %st(0),%st(3)
|
||||
|
||||
flds fp_64k
|
||||
fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
|
||||
// overlap
|
||||
jmp LFDIVInFlight1
|
||||
|
||||
LCleanup1:
|
||||
// finish up the s and t calcs
|
||||
fxch %st(1) // z*64k | 1/z | t/z | s/z
|
||||
|
||||
fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
|
||||
fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
|
||||
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
|
||||
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
|
||||
fxch %st(1) // s | t | 1/z | t/z | s/z
|
||||
fistpl s // 1/z | t | t/z | s/z
|
||||
fistpl t // 1/z | t/z | s/z
|
||||
jmp LFDIVInFlight1
|
||||
|
||||
.align 4
|
||||
LSetupNotLast1:
|
||||
// finish up the s and t calcs
|
||||
fxch %st(1) // z*64k | 1/z | t/z | s/z
|
||||
|
||||
fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
|
||||
fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
|
||||
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
|
||||
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
|
||||
fxch %st(1) // s | t | 1/z | t/z | s/z
|
||||
fistpl s // 1/z | t | t/z | s/z
|
||||
fistpl t // 1/z | t/z | s/z
|
||||
|
||||
fadds zi8stepu
|
||||
fxch %st(2)
|
||||
fadds sdivz8stepu
|
||||
fxch %st(2)
|
||||
flds tdivz8stepu
|
||||
faddp %st(0),%st(2)
|
||||
flds fp_64k
|
||||
fdiv %st(1),%st(0) // z = 1/1/z
|
||||
// this is what we've gone to all this trouble to
|
||||
// overlap
|
||||
LFDIVInFlight1:
|
||||
|
||||
addl s,%esi
|
||||
addl t,%edx
|
||||
movl C(bbextents),%ebx
|
||||
movl C(bbextentt),%ebp
|
||||
cmpl %ebx,%esi
|
||||
ja LClampHighOrLow0
|
||||
LClampReentry0:
|
||||
movl %esi,s
|
||||
movl pbase,%ebx
|
||||
shll $16,%esi
|
||||
cmpl %ebp,%edx
|
||||
movl %esi,sfracf
|
||||
ja LClampHighOrLow1
|
||||
LClampReentry1:
|
||||
movl %edx,t
|
||||
movl s,%esi // sfrac = scans->sfrac;
|
||||
shll $16,%edx
|
||||
movl t,%eax // tfrac = scans->tfrac;
|
||||
sarl $16,%esi
|
||||
movl %edx,tfracf
|
||||
|
||||
//
|
||||
// calculate the texture starting address
|
||||
//
|
||||
sarl $16,%eax
|
||||
addl %ebx,%esi
|
||||
imull C(cachewidth),%eax // (tfrac >> 16) * cachewidth
|
||||
addl %eax,%esi // psource = pbase + (sfrac >> 16) +
|
||||
// ((tfrac >> 16) * cachewidth);
|
||||
|
||||
//
|
||||
// determine whether last span or not
|
||||
//
|
||||
cmpl $8,%ecx
|
||||
jna LLastSegment
|
||||
|
||||
//
|
||||
// not the last segment; do full 8-wide segment
|
||||
//
|
||||
LNotLastSegment:
|
||||
|
||||
//
|
||||
// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
|
||||
// get there
|
||||
//
|
||||
|
||||
// pick up after the FDIV that was left in flight previously
|
||||
|
||||
fld %st(0) // duplicate it
|
||||
fmul %st(4),%st(0) // s = s/z * z
|
||||
fxch %st(1)
|
||||
fmul %st(3),%st(0) // t = t/z * z
|
||||
fxch %st(1)
|
||||
fistpl snext
|
||||
fistpl tnext
|
||||
movl snext,%eax
|
||||
movl tnext,%edx
|
||||
|
||||
subl $8,%ecx // count off this segments' pixels
|
||||
movl C(sadjust),%ebp
|
||||
pushl %ecx // remember count of remaining pixels
|
||||
movl C(tadjust),%ecx
|
||||
|
||||
addl %eax,%ebp
|
||||
addl %edx,%ecx
|
||||
|
||||
movl C(bbextents),%eax
|
||||
movl C(bbextentt),%edx
|
||||
|
||||
cmpl $2048,%ebp
|
||||
jl LClampLow2
|
||||
cmpl %eax,%ebp
|
||||
ja LClampHigh2
|
||||
LClampReentry2:
|
||||
|
||||
cmpl $2048,%ecx
|
||||
jl LClampLow3
|
||||
cmpl %edx,%ecx
|
||||
ja LClampHigh3
|
||||
LClampReentry3:
|
||||
|
||||
movl %ebp,snext
|
||||
movl %ecx,tnext
|
||||
|
||||
subl s,%ebp
|
||||
subl t,%ecx
|
||||
|
||||
//
|
||||
// set up advancetable
|
||||
//
|
||||
movl %ecx,%eax
|
||||
movl %ebp,%edx
|
||||
sarl $19,%edx // sstep >>= 16;
|
||||
movl C(cachewidth),%ebx
|
||||
sarl $19,%eax // tstep >>= 16;
|
||||
jz LIsZero
|
||||
imull %ebx,%eax // (tstep >> 16) * cachewidth;
|
||||
LIsZero:
|
||||
addl %edx,%eax // add in sstep
|
||||
// (tstep >> 16) * cachewidth + (sstep >> 16);
|
||||
movl tfracf,%edx
|
||||
movl %eax,advancetable+4 // advance base in t
|
||||
addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
|
||||
// (sstep >> 16);
|
||||
shll $13,%ebp // left-justify sstep fractional part
|
||||
movl %ebp,sstep
|
||||
movl sfracf,%ebx
|
||||
shll $13,%ecx // left-justify tstep fractional part
|
||||
movl %eax,advancetable // advance extra in t
|
||||
movl %ecx,tstep
|
||||
|
||||
movl pz,%ecx
|
||||
movl izi,%ebp
|
||||
|
||||
cmpw (%ecx),%bp
|
||||
jl Lp1
|
||||
movb (%esi),%al // get first source texel
|
||||
cmpb $(TRANSPARENT_COLOR),%al
|
||||
jz Lp1
|
||||
movw %bp,(%ecx)
|
||||
movb %al,(%edi) // store first dest pixel
|
||||
Lp1:
|
||||
addl izistep,%ebp
|
||||
adcl $0,%ebp
|
||||
addl tstep,%edx // advance tfrac fractional part by tstep frac
|
||||
|
||||
sbbl %eax,%eax // turn tstep carry into -1 (0 if none)
|
||||
addl sstep,%ebx // advance sfrac fractional part by sstep frac
|
||||
adcl advancetable+4(,%eax,4),%esi // point to next source texel
|
||||
|
||||
cmpw 2(%ecx),%bp
|
||||
jl Lp2
|
||||
movb (%esi),%al
|
||||
cmpb $(TRANSPARENT_COLOR),%al
|
||||
jz Lp2
|
||||
movw %bp,2(%ecx)
|
||||
movb %al,1(%edi)
|
||||
Lp2:
|
||||
addl izistep,%ebp
|
||||
adcl $0,%ebp
|
||||
addl tstep,%edx
|
||||
sbbl %eax,%eax
|
||||
addl sstep,%ebx
|
||||
adcl advancetable+4(,%eax,4),%esi
|
||||
|
||||
cmpw 4(%ecx),%bp
|
||||
jl Lp3
|
||||
movb (%esi),%al
|
||||
cmpb $(TRANSPARENT_COLOR),%al
|
||||
jz Lp3
|
||||
movw %bp,4(%ecx)
|
||||
movb %al,2(%edi)
|
||||
Lp3:
|
||||
addl izistep,%ebp
|
||||
adcl $0,%ebp
|
||||
addl tstep,%edx
|
||||
sbbl %eax,%eax
|
||||
addl sstep,%ebx
|
||||
adcl advancetable+4(,%eax,4),%esi
|
||||
|
||||
cmpw 6(%ecx),%bp
|
||||
jl Lp4
|
||||
movb (%esi),%al
|
||||
cmpb $(TRANSPARENT_COLOR),%al
|
||||
jz Lp4
|
||||
movw %bp,6(%ecx)
|
||||
movb %al,3(%edi)
|
||||
Lp4:
|
||||
addl izistep,%ebp
|
||||
adcl $0,%ebp
|
||||
addl tstep,%edx
|
||||
sbbl %eax,%eax
|
||||
addl sstep,%ebx
|
||||
adcl advancetable+4(,%eax,4),%esi
|
||||
|
||||
cmpw 8(%ecx),%bp
|
||||
jl Lp5
|
||||
movb (%esi),%al
|
||||
cmpb $(TRANSPARENT_COLOR),%al
|
||||
jz Lp5
|
||||
movw %bp,8(%ecx)
|
||||
movb %al,4(%edi)
|
||||
Lp5:
|
||||
addl izistep,%ebp
|
||||
adcl $0,%ebp
|
||||
addl tstep,%edx
|
||||
sbbl %eax,%eax
|
||||
addl sstep,%ebx
|
||||
adcl advancetable+4(,%eax,4),%esi
|
||||
|
||||
//
|
||||
// start FDIV for end of next segment in flight, so it can overlap
|
||||
//
|
||||
popl %eax
|
||||
cmpl $8,%eax // more than one segment after this?
|
||||
ja LSetupNotLast2 // yes
|
||||
|
||||
decl %eax
|
||||
jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
|
||||
movl %eax,spancountminus1
|
||||
fildl spancountminus1
|
||||
|
||||
flds C(d_zistepu) // _d_zistepu | spancountminus1
|
||||
fmul %st(1),%st(0) // _d_zistepu*scm1 | scm1
|
||||
flds C(d_tdivzstepu) // _d_tdivzstepu | _d_zistepu*scm1 | scm1
|
||||
fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
|
||||
fxch %st(1) // _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
|
||||
faddp %st(0),%st(3) // _d_tdivzstepu*scm1 | scm1
|
||||
fxch %st(1) // scm1 | _d_tdivzstepu*scm1
|
||||
fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
|
||||
fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
|
||||
faddp %st(0),%st(3) // _d_sdivzstepu*scm1
|
||||
flds fp_64k // 64k | _d_sdivzstepu*scm1
|
||||
fxch %st(1) // _d_sdivzstepu*scm1 | 64k
|
||||
faddp %st(0),%st(4) // 64k
|
||||
|
||||
fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
|
||||
// overlap
|
||||
jmp LFDIVInFlight2
|
||||
|
||||
.align 4
|
||||
LSetupNotLast2:
|
||||
fadds zi8stepu
|
||||
fxch %st(2)
|
||||
fadds sdivz8stepu
|
||||
fxch %st(2)
|
||||
flds tdivz8stepu
|
||||
faddp %st(0),%st(2)
|
||||
flds fp_64k
|
||||
fdiv %st(1),%st(0) // z = 1/1/z
|
||||
// this is what we've gone to all this trouble to
|
||||
// overlap
|
||||
LFDIVInFlight2:
|
||||
pushl %eax
|
||||
|
||||
cmpw 10(%ecx),%bp
|
||||
jl Lp6
|
||||
movb (%esi),%al
|
||||
cmpb $(TRANSPARENT_COLOR),%al
|
||||
jz Lp6
|
||||
movw %bp,10(%ecx)
|
||||
movb %al,5(%edi)
|
||||
Lp6:
|
||||
addl izistep,%ebp
|
||||
adcl $0,%ebp
|
||||
addl tstep,%edx
|
||||
sbbl %eax,%eax
|
||||
addl sstep,%ebx
|
||||
adcl advancetable+4(,%eax,4),%esi
|
||||
|
||||
cmpw 12(%ecx),%bp
|
||||
jl Lp7
|
||||
movb (%esi),%al
|
||||
cmpb $(TRANSPARENT_COLOR),%al
|
||||
jz Lp7
|
||||
movw %bp,12(%ecx)
|
||||
movb %al,6(%edi)
|
||||
Lp7:
|
||||
addl izistep,%ebp
|
||||
adcl $0,%ebp
|
||||
addl tstep,%edx
|
||||
sbbl %eax,%eax
|
||||
addl sstep,%ebx
|
||||
adcl advancetable+4(,%eax,4),%esi
|
||||
|
||||
cmpw 14(%ecx),%bp
|
||||
jl Lp8
|
||||
movb (%esi),%al
|
||||
cmpb $(TRANSPARENT_COLOR),%al
|
||||
jz Lp8
|
||||
movw %bp,14(%ecx)
|
||||
movb %al,7(%edi)
|
||||
Lp8:
|
||||
addl izistep,%ebp
|
||||
adcl $0,%ebp
|
||||
addl tstep,%edx
|
||||
sbbl %eax,%eax
|
||||
addl sstep,%ebx
|
||||
adcl advancetable+4(,%eax,4),%esi
|
||||
|
||||
addl $8,%edi
|
||||
addl $16,%ecx
|
||||
movl %edx,tfracf
|
||||
movl snext,%edx
|
||||
movl %ebx,sfracf
|
||||
movl tnext,%ebx
|
||||
movl %edx,s
|
||||
movl %ebx,t
|
||||
|
||||
movl %ecx,pz
|
||||
movl %ebp,izi
|
||||
|
||||
popl %ecx // retrieve count
|
||||
|
||||
//
|
||||
// determine whether last span or not
|
||||
//
|
||||
cmpl $8,%ecx // are there multiple segments remaining?
|
||||
ja LNotLastSegment // yes
|
||||
|
||||
//
|
||||
// last segment of scan
|
||||
//
|
||||
LLastSegment:
|
||||
|
||||
//
|
||||
// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
|
||||
// get there. The number of pixels left is variable, and we want to land on the
|
||||
// last pixel, not step one past it, so we can't run into arithmetic problems
|
||||
//
|
||||
testl %ecx,%ecx
|
||||
jz LNoSteps // just draw the last pixel and we're done
|
||||
|
||||
// pick up after the FDIV that was left in flight previously
|
||||
|
||||
|
||||
fld %st(0) // duplicate it
|
||||
fmul %st(4),%st(0) // s = s/z * z
|
||||
fxch %st(1)
|
||||
fmul %st(3),%st(0) // t = t/z * z
|
||||
fxch %st(1)
|
||||
fistpl snext
|
||||
fistpl tnext
|
||||
|
||||
movl C(tadjust),%ebx
|
||||
movl C(sadjust),%eax
|
||||
|
||||
addl snext,%eax
|
||||
addl tnext,%ebx
|
||||
|
||||
movl C(bbextents),%ebp
|
||||
movl C(bbextentt),%edx
|
||||
|
||||
cmpl $2048,%eax
|
||||
jl LClampLow4
|
||||
cmpl %ebp,%eax
|
||||
ja LClampHigh4
|
||||
LClampReentry4:
|
||||
movl %eax,snext
|
||||
|
||||
cmpl $2048,%ebx
|
||||
jl LClampLow5
|
||||
cmpl %edx,%ebx
|
||||
ja LClampHigh5
|
||||
LClampReentry5:
|
||||
|
||||
cmpl $1,%ecx // don't bother
|
||||
je LOnlyOneStep // if two pixels in segment, there's only one step,
|
||||
// of the segment length
|
||||
subl s,%eax
|
||||
subl t,%ebx
|
||||
|
||||
addl %eax,%eax // convert to 15.17 format so multiply by 1.31
|
||||
addl %ebx,%ebx // reciprocal yields 16.48
|
||||
imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
|
||||
movl %edx,%ebp
|
||||
|
||||
movl %ebx,%eax
|
||||
imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
|
||||
|
||||
LSetEntryvec:
|
||||
//
|
||||
// set up advancetable
|
||||
//
|
||||
movl spr8entryvec_table(,%ecx,4),%ebx
|
||||
movl %edx,%eax
|
||||
pushl %ebx // entry point into code for RET later
|
||||
movl %ebp,%ecx
|
||||
sarl $16,%ecx // sstep >>= 16;
|
||||
movl C(cachewidth),%ebx
|
||||
sarl $16,%edx // tstep >>= 16;
|
||||
jz LIsZeroLast
|
||||
imull %ebx,%edx // (tstep >> 16) * cachewidth;
|
||||
LIsZeroLast:
|
||||
addl %ecx,%edx // add in sstep
|
||||
// (tstep >> 16) * cachewidth + (sstep >> 16);
|
||||
movl tfracf,%ecx
|
||||
movl %edx,advancetable+4 // advance base in t
|
||||
addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
|
||||
// (sstep >> 16);
|
||||
shll $16,%ebp // left-justify sstep fractional part
|
||||
movl sfracf,%ebx
|
||||
shll $16,%eax // left-justify tstep fractional part
|
||||
movl %edx,advancetable // advance extra in t
|
||||
|
||||
movl %eax,tstep
|
||||
movl %ebp,sstep
|
||||
movl %ecx,%edx
|
||||
|
||||
movl pz,%ecx
|
||||
movl izi,%ebp
|
||||
|
||||
ret // jump to the number-of-pixels handler
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
LNoSteps:
|
||||
movl pz,%ecx
|
||||
subl $7,%edi // adjust for hardwired offset
|
||||
subl $14,%ecx
|
||||
jmp LEndSpan
|
||||
|
||||
|
||||
LOnlyOneStep:
|
||||
subl s,%eax
|
||||
subl t,%ebx
|
||||
movl %eax,%ebp
|
||||
movl %ebx,%edx
|
||||
jmp LSetEntryvec
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
.globl Spr8Entry2_8
|
||||
Spr8Entry2_8:
|
||||
subl $6,%edi // adjust for hardwired offsets
|
||||
subl $12,%ecx
|
||||
movb (%esi),%al
|
||||
jmp LLEntry2_8
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
.globl Spr8Entry3_8
|
||||
Spr8Entry3_8:
|
||||
subl $5,%edi // adjust for hardwired offsets
|
||||
subl $10,%ecx
|
||||
jmp LLEntry3_8
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
.globl Spr8Entry4_8
|
||||
Spr8Entry4_8:
|
||||
subl $4,%edi // adjust for hardwired offsets
|
||||
subl $8,%ecx
|
||||
jmp LLEntry4_8
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
.globl Spr8Entry5_8
|
||||
Spr8Entry5_8:
|
||||
subl $3,%edi // adjust for hardwired offsets
|
||||
subl $6,%ecx
|
||||
jmp LLEntry5_8
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
.globl Spr8Entry6_8
|
||||
Spr8Entry6_8:
|
||||
subl $2,%edi // adjust for hardwired offsets
|
||||
subl $4,%ecx
|
||||
jmp LLEntry6_8
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
.globl Spr8Entry7_8
|
||||
Spr8Entry7_8:
|
||||
decl %edi // adjust for hardwired offsets
|
||||
subl $2,%ecx
|
||||
jmp LLEntry7_8
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
.globl Spr8Entry8_8
|
||||
Spr8Entry8_8:
|
||||
cmpw (%ecx),%bp
|
||||
jl Lp9
|
||||
movb (%esi),%al
|
||||
cmpb $(TRANSPARENT_COLOR),%al
|
||||
jz Lp9
|
||||
movw %bp,(%ecx)
|
||||
movb %al,(%edi)
|
||||
Lp9:
|
||||
addl izistep,%ebp
|
||||
adcl $0,%ebp
|
||||
addl tstep,%edx
|
||||
sbbl %eax,%eax
|
||||
addl sstep,%ebx
|
||||
adcl advancetable+4(,%eax,4),%esi
|
||||
LLEntry7_8:
|
||||
cmpw 2(%ecx),%bp
|
||||
jl Lp10
|
||||
movb (%esi),%al
|
||||
cmpb $(TRANSPARENT_COLOR),%al
|
||||
jz Lp10
|
||||
movw %bp,2(%ecx)
|
||||
movb %al,1(%edi)
|
||||
Lp10:
|
||||
addl izistep,%ebp
|
||||
adcl $0,%ebp
|
||||
addl tstep,%edx
|
||||
sbbl %eax,%eax
|
||||
addl sstep,%ebx
|
||||
adcl advancetable+4(,%eax,4),%esi
|
||||
LLEntry6_8:
|
||||
cmpw 4(%ecx),%bp
|
||||
jl Lp11
|
||||
movb (%esi),%al
|
||||
cmpb $(TRANSPARENT_COLOR),%al
|
||||
jz Lp11
|
||||
movw %bp,4(%ecx)
|
||||
movb %al,2(%edi)
|
||||
Lp11:
|
||||
addl izistep,%ebp
|
||||
adcl $0,%ebp
|
||||
addl tstep,%edx
|
||||
sbbl %eax,%eax
|
||||
addl sstep,%ebx
|
||||
adcl advancetable+4(,%eax,4),%esi
|
||||
LLEntry5_8:
|
||||
cmpw 6(%ecx),%bp
|
||||
jl Lp12
|
||||
movb (%esi),%al
|
||||
cmpb $(TRANSPARENT_COLOR),%al
|
||||
jz Lp12
|
||||
movw %bp,6(%ecx)
|
||||
movb %al,3(%edi)
|
||||
Lp12:
|
||||
addl izistep,%ebp
|
||||
adcl $0,%ebp
|
||||
addl tstep,%edx
|
||||
sbbl %eax,%eax
|
||||
addl sstep,%ebx
|
||||
adcl advancetable+4(,%eax,4),%esi
|
||||
LLEntry4_8:
|
||||
cmpw 8(%ecx),%bp
|
||||
jl Lp13
|
||||
movb (%esi),%al
|
||||
cmpb $(TRANSPARENT_COLOR),%al
|
||||
jz Lp13
|
||||
movw %bp,8(%ecx)
|
||||
movb %al,4(%edi)
|
||||
Lp13:
|
||||
addl izistep,%ebp
|
||||
adcl $0,%ebp
|
||||
addl tstep,%edx
|
||||
sbbl %eax,%eax
|
||||
addl sstep,%ebx
|
||||
adcl advancetable+4(,%eax,4),%esi
|
||||
LLEntry3_8:
|
||||
cmpw 10(%ecx),%bp
|
||||
jl Lp14
|
||||
movb (%esi),%al
|
||||
cmpb $(TRANSPARENT_COLOR),%al
|
||||
jz Lp14
|
||||
movw %bp,10(%ecx)
|
||||
movb %al,5(%edi)
|
||||
Lp14:
|
||||
addl izistep,%ebp
|
||||
adcl $0,%ebp
|
||||
addl tstep,%edx
|
||||
sbbl %eax,%eax
|
||||
addl sstep,%ebx
|
||||
adcl advancetable+4(,%eax,4),%esi
|
||||
LLEntry2_8:
|
||||
cmpw 12(%ecx),%bp
|
||||
jl Lp15
|
||||
movb (%esi),%al
|
||||
cmpb $(TRANSPARENT_COLOR),%al
|
||||
jz Lp15
|
||||
movw %bp,12(%ecx)
|
||||
movb %al,6(%edi)
|
||||
Lp15:
|
||||
addl izistep,%ebp
|
||||
adcl $0,%ebp
|
||||
addl tstep,%edx
|
||||
sbbl %eax,%eax
|
||||
addl sstep,%ebx
|
||||
adcl advancetable+4(,%eax,4),%esi
|
||||
|
||||
LEndSpan:
|
||||
cmpw 14(%ecx),%bp
|
||||
jl Lp16
|
||||
movb (%esi),%al // load first texel in segment
|
||||
cmpb $(TRANSPARENT_COLOR),%al
|
||||
jz Lp16
|
||||
movw %bp,14(%ecx)
|
||||
movb %al,7(%edi)
|
||||
Lp16:
|
||||
|
||||
//
|
||||
// clear s/z, t/z, 1/z from FP stack
|
||||
//
|
||||
fstp %st(0)
|
||||
fstp %st(0)
|
||||
fstp %st(0)
|
||||
|
||||
popl %ebx // restore spans pointer
|
||||
LNextSpan:
|
||||
addl $(sspan_t_size),%ebx // point to next span
|
||||
movl sspan_t_count(%ebx),%ecx
|
||||
cmpl $0,%ecx // any more spans?
|
||||
jg LSpanLoop // yes
|
||||
jz LNextSpan // yes, but this one's empty
|
||||
|
||||
popl %ebx // restore register variables
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp // restore the caller's stack frame
|
||||
ret
|
||||
|
||||
#endif // id386
|
213
source/d_varsa.S
Normal file
213
source/d_varsa.S
Normal file
|
@ -0,0 +1,213 @@
|
|||
/*
|
||||
Copyright (C) 1996-1997 Id Software, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
*/
|
||||
//
|
||||
// d_varsa.s
|
||||
//
|
||||
|
||||
#include "asm_i386.h"
|
||||
#include "quakeasm.h"
|
||||
#include "asm_draw.h"
|
||||
#include "d_ifacea.h"
|
||||
|
||||
#if id386
|
||||
|
||||
.data
|
||||
|
||||
//-------------------------------------------------------
|
||||
// global refresh variables
|
||||
//-------------------------------------------------------
|
||||
|
||||
// FIXME: put all refresh variables into one contiguous block. Make into one
|
||||
// big structure, like cl or sv?
|
||||
|
||||
.align 4
|
||||
.globl C(d_sdivzstepu)
|
||||
.globl C(d_tdivzstepu)
|
||||
.globl C(d_zistepu)
|
||||
.globl C(d_sdivzstepv)
|
||||
.globl C(d_tdivzstepv)
|
||||
.globl C(d_zistepv)
|
||||
.globl C(d_sdivzorigin)
|
||||
.globl C(d_tdivzorigin)
|
||||
.globl C(d_ziorigin)
|
||||
C(d_sdivzstepu): .single 0
|
||||
C(d_tdivzstepu): .single 0
|
||||
C(d_zistepu): .single 0
|
||||
C(d_sdivzstepv): .single 0
|
||||
C(d_tdivzstepv): .single 0
|
||||
C(d_zistepv): .single 0
|
||||
C(d_sdivzorigin): .single 0
|
||||
C(d_tdivzorigin): .single 0
|
||||
C(d_ziorigin): .single 0
|
||||
|
||||
.globl C(sadjust)
|
||||
.globl C(tadjust)
|
||||
.globl C(bbextents)
|
||||
.globl C(bbextentt)
|
||||
C(sadjust): .long 0
|
||||
C(tadjust): .long 0
|
||||
C(bbextents): .long 0
|
||||
C(bbextentt): .long 0
|
||||
|
||||
.globl C(cacheblock)
|
||||
.globl C(d_viewbuffer)
|
||||
.globl C(cachewidth)
|
||||
.globl C(d_pzbuffer)
|
||||
.globl C(d_zrowbytes)
|
||||
.globl C(d_zwidth)
|
||||
C(cacheblock): .long 0
|
||||
C(cachewidth): .long 0
|
||||
C(d_viewbuffer): .long 0
|
||||
C(d_pzbuffer): .long 0
|
||||
C(d_zrowbytes): .long 0
|
||||
C(d_zwidth): .long 0
|
||||
|
||||
|
||||
//-------------------------------------------------------
|
||||
// ASM-only variables
|
||||
//-------------------------------------------------------
|
||||
.globl izi
|
||||
izi: .long 0
|
||||
|
||||
.globl pbase, s, t, sfracf, tfracf, snext, tnext
|
||||
.globl spancountminus1, zi16stepu, sdivz16stepu, tdivz16stepu
|
||||
.globl zi8stepu, sdivz8stepu, tdivz8stepu, pz
|
||||
s: .long 0
|
||||
t: .long 0
|
||||
snext: .long 0
|
||||
tnext: .long 0
|
||||
sfracf: .long 0
|
||||
tfracf: .long 0
|
||||
pbase: .long 0
|
||||
zi8stepu: .long 0
|
||||
sdivz8stepu: .long 0
|
||||
tdivz8stepu: .long 0
|
||||
zi16stepu: .long 0
|
||||
sdivz16stepu: .long 0
|
||||
tdivz16stepu: .long 0
|
||||
spancountminus1: .long 0
|
||||
pz: .long 0
|
||||
|
||||
.globl izistep
|
||||
izistep: .long 0
|
||||
|
||||
//-------------------------------------------------------
|
||||
// local variables for d_draw16.s
|
||||
//-------------------------------------------------------
|
||||
|
||||
.globl reciprocal_table_16, entryvec_table_16
|
||||
// 1/2, 1/3, 1/4, 1/5, 1/6, 1/7, 1/8, 1/9, 1/10, 1/11, 1/12, 1/13,
|
||||
// 1/14, and 1/15 in 0.32 form
|
||||
reciprocal_table_16: .long 0x40000000, 0x2aaaaaaa, 0x20000000
|
||||
.long 0x19999999, 0x15555555, 0x12492492
|
||||
.long 0x10000000, 0xe38e38e, 0xccccccc, 0xba2e8ba
|
||||
.long 0xaaaaaaa, 0x9d89d89, 0x9249249, 0x8888888
|
||||
|
||||
#ifndef NeXT
|
||||
.extern Entry2_16
|
||||
.extern Entry3_16
|
||||
.extern Entry4_16
|
||||
.extern Entry5_16
|
||||
.extern Entry6_16
|
||||
.extern Entry7_16
|
||||
.extern Entry8_16
|
||||
.extern Entry9_16
|
||||
.extern Entry10_16
|
||||
.extern Entry11_16
|
||||
.extern Entry12_16
|
||||
.extern Entry13_16
|
||||
.extern Entry14_16
|
||||
.extern Entry15_16
|
||||
.extern Entry16_16
|
||||
#endif
|
||||
|
||||
entryvec_table_16: .long 0, Entry2_16, Entry3_16, Entry4_16
|
||||
.long Entry5_16, Entry6_16, Entry7_16, Entry8_16
|
||||
.long Entry9_16, Entry10_16, Entry11_16, Entry12_16
|
||||
.long Entry13_16, Entry14_16, Entry15_16, Entry16_16
|
||||
|
||||
//-------------------------------------------------------
|
||||
// local variables for d_parta.s
|
||||
//-------------------------------------------------------
|
||||
.globl DP_Count, DP_u, DP_v, DP_32768, DP_Color, DP_Pix, DP_EntryTable
|
||||
DP_Count: .long 0
|
||||
DP_u: .long 0
|
||||
DP_v: .long 0
|
||||
DP_32768: .single 32768.0
|
||||
DP_Color: .long 0
|
||||
DP_Pix: .long 0
|
||||
|
||||
|
||||
#ifndef NeXT
|
||||
.extern DP_1x1
|
||||
.extern DP_2x2
|
||||
.extern DP_3x3
|
||||
.extern DP_4x4
|
||||
#endif
|
||||
|
||||
DP_EntryTable: .long DP_1x1, DP_2x2, DP_3x3, DP_4x4
|
||||
|
||||
//
|
||||
// advancetable is 8 bytes, but points to the middle of that range so negative
|
||||
// offsets will work
|
||||
//
|
||||
.globl advancetable, sstep, tstep, pspantemp, counttemp, jumptemp
|
||||
advancetable: .long 0, 0
|
||||
sstep: .long 0
|
||||
tstep: .long 0
|
||||
|
||||
pspantemp: .long 0
|
||||
counttemp: .long 0
|
||||
jumptemp: .long 0
|
||||
|
||||
// 1/2, 1/3, 1/4, 1/5, 1/6, and 1/7 in 0.32 form
|
||||
.globl reciprocal_table, entryvec_table
|
||||
reciprocal_table: .long 0x40000000, 0x2aaaaaaa, 0x20000000
|
||||
.long 0x19999999, 0x15555555, 0x12492492
|
||||
|
||||
#ifndef NeXT
|
||||
.extern Entry2_8
|
||||
.extern Entry3_8
|
||||
.extern Entry4_8
|
||||
.extern Entry5_8
|
||||
.extern Entry6_8
|
||||
.extern Entry7_8
|
||||
.extern Entry8_8
|
||||
#endif
|
||||
|
||||
entryvec_table: .long 0, Entry2_8, Entry3_8, Entry4_8
|
||||
.long Entry5_8, Entry6_8, Entry7_8, Entry8_8
|
||||
|
||||
#ifndef NeXT
|
||||
.extern Spr8Entry2_8
|
||||
.extern Spr8Entry3_8
|
||||
.extern Spr8Entry4_8
|
||||
.extern Spr8Entry5_8
|
||||
.extern Spr8Entry6_8
|
||||
.extern Spr8Entry7_8
|
||||
.extern Spr8Entry8_8
|
||||
#endif
|
||||
|
||||
.globl spr8entryvec_table
|
||||
spr8entryvec_table: .long 0, Spr8Entry2_8, Spr8Entry3_8, Spr8Entry4_8
|
||||
.long Spr8Entry5_8, Spr8Entry6_8, Spr8Entry7_8, Spr8Entry8_8
|
||||
|
||||
#endif // id386
|
||||
|
|
@ -1,3 +1,22 @@
|
|||
/*
|
||||
Copyright (C) 1996-1997 Id Software, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
*/
|
||||
//
|
||||
// math.s
|
||||
// x86 assembly-language math routines.
|
||||
|
@ -6,7 +25,7 @@
|
|||
#include "quakeasm.h"
|
||||
|
||||
|
||||
#if id386
|
||||
#if id386
|
||||
|
||||
.data
|
||||
|
||||
|
@ -16,6 +35,73 @@ Ljmptab: .long Lcase0, Lcase1, Lcase2, Lcase3
|
|||
|
||||
.text
|
||||
|
||||
// TODO: rounding needed?
|
||||
// stack parameter offset
|
||||
#define val 4
|
||||
|
||||
.globl C(Invert24To16)
|
||||
C(Invert24To16):
|
||||
|
||||
movl val(%esp),%ecx
|
||||
movl $0x100,%edx // 0x10000000000 as dividend
|
||||
cmpl %edx,%ecx
|
||||
jle LOutOfRange
|
||||
|
||||
subl %eax,%eax
|
||||
divl %ecx
|
||||
|
||||
ret
|
||||
|
||||
LOutOfRange:
|
||||
movl $0xFFFFFFFF,%eax
|
||||
ret
|
||||
|
||||
#define in 4
|
||||
#define out 8
|
||||
|
||||
.align 2
|
||||
.globl C(TransformVector)
|
||||
C(TransformVector):
|
||||
movl in(%esp),%eax
|
||||
movl out(%esp),%edx
|
||||
|
||||
flds (%eax) // in[0]
|
||||
fmuls C(vright) // in[0]*vright[0]
|
||||
flds (%eax) // in[0] | in[0]*vright[0]
|
||||
fmuls C(vup) // in[0]*vup[0] | in[0]*vright[0]
|
||||
flds (%eax) // in[0] | in[0]*vup[0] | in[0]*vright[0]
|
||||
fmuls C(vpn) // in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0]
|
||||
|
||||
flds 4(%eax) // in[1] | ...
|
||||
fmuls C(vright)+4 // in[1]*vright[1] | ...
|
||||
flds 4(%eax) // in[1] | in[1]*vright[1] | ...
|
||||
fmuls C(vup)+4 // in[1]*vup[1] | in[1]*vright[1] | ...
|
||||
flds 4(%eax) // in[1] | in[1]*vup[1] | in[1]*vright[1] | ...
|
||||
fmuls C(vpn)+4 // in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ...
|
||||
fxch %st(2) // in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ...
|
||||
|
||||
faddp %st(0),%st(5) // in[1]*vup[1] | in[1]*vpn[1] | ...
|
||||
faddp %st(0),%st(3) // in[1]*vpn[1] | ...
|
||||
faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum
|
||||
|
||||
flds 8(%eax) // in[2] | ...
|
||||
fmuls C(vright)+8 // in[2]*vright[2] | ...
|
||||
flds 8(%eax) // in[2] | in[2]*vright[2] | ...
|
||||
fmuls C(vup)+8 // in[2]*vup[2] | in[2]*vright[2] | ...
|
||||
flds 8(%eax) // in[2] | in[2]*vup[2] | in[2]*vright[2] | ...
|
||||
fmuls C(vpn)+8 // in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ...
|
||||
fxch %st(2) // in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ...
|
||||
|
||||
faddp %st(0),%st(5) // in[2]*vup[2] | in[2]*vpn[2] | ...
|
||||
faddp %st(0),%st(3) // in[2]*vpn[2] | ...
|
||||
faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum
|
||||
|
||||
fstps 8(%edx) // out[2]
|
||||
fstps 4(%edx) // out[1]
|
||||
fstps (%edx) // out[0]
|
||||
|
||||
ret
|
||||
|
||||
|
||||
#define EMINS 4+4
|
||||
#define EMAXS 4+8
|
||||
|
@ -35,7 +121,7 @@ C(BoxOnPlaneSide):
|
|||
jge Lerror
|
||||
flds pl_normal(%edx) // p->normal[0]
|
||||
fld %st(0) // p->normal[0] | p->normal[0]
|
||||
jmp *Ljmptab(,%eax,4)
|
||||
jmp Ljmptab(,%eax,4)
|
||||
|
||||
|
||||
//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
|
||||
|
|
216
source/r_aclipa.S
Normal file
216
source/r_aclipa.S
Normal file
|
@ -0,0 +1,216 @@
|
|||
/*
|
||||
Copyright (C) 1996-1997 Id Software, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
*/
|
||||
//
|
||||
// r_aliasa.s
|
||||
// x86 assembly-language Alias model transform and project code.
|
||||
//
|
||||
|
||||
#include "asm_i386.h"
|
||||
#include "quakeasm.h"
|
||||
#include "asm_draw.h"
|
||||
#include "d_ifacea.h"
|
||||
|
||||
#if id386
|
||||
|
||||
.data
|
||||
Ltemp0: .long 0
|
||||
Ltemp1: .long 0
|
||||
|
||||
.text
|
||||
|
||||
#define pfv0 8+4
|
||||
#define pfv1 8+8
|
||||
#define out 8+12
|
||||
|
||||
.globl C(R_Alias_clip_bottom)
|
||||
C(R_Alias_clip_bottom):
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl pfv0(%esp),%esi
|
||||
movl pfv1(%esp),%edi
|
||||
|
||||
movl C(r_refdef)+rd_aliasvrectbottom,%eax
|
||||
|
||||
LDoForwardOrBackward:
|
||||
|
||||
movl fv_v+4(%esi),%edx
|
||||
movl fv_v+4(%edi),%ecx
|
||||
|
||||
cmpl %ecx,%edx
|
||||
jl LDoForward
|
||||
|
||||
movl fv_v+4(%esi),%ecx
|
||||
movl fv_v+4(%edi),%edx
|
||||
movl pfv0(%esp),%edi
|
||||
movl pfv1(%esp),%esi
|
||||
|
||||
LDoForward:
|
||||
|
||||
subl %edx,%ecx
|
||||
subl %edx,%eax
|
||||
movl %ecx,Ltemp1
|
||||
movl %eax,Ltemp0
|
||||
fildl Ltemp1
|
||||
fildl Ltemp0
|
||||
movl out(%esp),%edx
|
||||
movl $2,%eax
|
||||
|
||||
fdivp %st(0),%st(1) // scale
|
||||
|
||||
LDo3Forward:
|
||||
fildl fv_v+0(%esi) // fv0v0 | scale
|
||||
fildl fv_v+0(%edi) // fv1v0 | fv0v0 | scale
|
||||
fildl fv_v+4(%esi) // fv0v1 | fv1v0 | fv0v0 | scale
|
||||
fildl fv_v+4(%edi) // fv1v1 | fv0v1 | fv1v0 | fv0v0 | scale
|
||||
fildl fv_v+8(%esi) // fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv0v0 | scale
|
||||
fildl fv_v+8(%edi) // fv1v2 | fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv0v0 |
|
||||
// scale
|
||||
fxch %st(5) // fv0v0 | fv0v2 | fv1v1 | fv0v1 | fv1v0 | fv1v2 |
|
||||
// scale
|
||||
fsubr %st(0),%st(4) // fv0v0 | fv0v2 | fv1v1 | fv0v1 | fv1v0-fv0v0 |
|
||||
// fv1v2 | scale
|
||||
fxch %st(3) // fv0v1 | fv0v2 | fv1v1 | fv0v0 | fv1v0-fv0v0 |
|
||||
// fv1v2 | scale
|
||||
fsubr %st(0),%st(2) // fv0v1 | fv0v2 | fv1v1-fv0v1 | fv0v0 |
|
||||
// fv1v0-fv0v0 | fv1v2 | scale
|
||||
fxch %st(1) // fv0v2 | fv0v1 | fv1v1-fv0v1 | fv0v0 |
|
||||
// fv1v0-fv0v0 | fv1v2 | scale
|
||||
fsubr %st(0),%st(5) // fv0v2 | fv0v1 | fv1v1-fv0v1 | fv0v0 |
|
||||
// fv1v0-fv0v0 | fv1v2-fv0v2 | scale
|
||||
fxch %st(6) // scale | fv0v1 | fv1v1-fv0v1 | fv0v0 |
|
||||
// fv1v0-fv0v0 | fv1v2-fv0v2 | fv0v2
|
||||
fmul %st(0),%st(4) // scale | fv0v1 | fv1v1-fv0v1 | fv0v0 |
|
||||
// (fv1v0-fv0v0)*scale | fv1v2-fv0v2 | fv0v2
|
||||
addl $12,%edi
|
||||
fmul %st(0),%st(2) // scale | fv0v1 | (fv1v1-fv0v1)*scale | fv0v0 |
|
||||
// (fv1v0-fv0v0)*scale | fv1v2-fv0v2 | fv0v2
|
||||
addl $12,%esi
|
||||
addl $12,%edx
|
||||
fmul %st(0),%st(5) // scale | fv0v1 | (fv1v1-fv0v1)*scale | fv0v0 |
|
||||
// (fv1v0-fv0v0)*scale | (fv1v2-fv0v2)*scale |
|
||||
// fv0v2
|
||||
fxch %st(3) // fv0v0 | fv0v1 | (fv1v1-fv0v1)*scale | scale |
|
||||
// (fv1v0-fv0v0)*scale | (fv1v2-fv0v2)*scale |
|
||||
// fv0v2
|
||||
faddp %st(0),%st(4) // fv0v1 | (fv1v1-fv0v1)*scale | scale |
|
||||
// fv0v0+(fv1v0-fv0v0)*scale |
|
||||
// (fv1v2-fv0v2)*scale | fv0v2
|
||||
faddp %st(0),%st(1) // fv0v1+(fv1v1-fv0v1)*scale | scale |
|
||||
// fv0v0+(fv1v0-fv0v0)*scale |
|
||||
// (fv1v2-fv0v2)*scale | fv0v2
|
||||
fxch %st(4) // fv0v2 | scale | fv0v0+(fv1v0-fv0v0)*scale |
|
||||
// (fv1v2-fv0v2)*scale | fv0v1+(fv1v1-fv0v1)*scale
|
||||
faddp %st(0),%st(3) // scale | fv0v0+(fv1v0-fv0v0)*scale |
|
||||
// fv0v2+(fv1v2-fv0v2)*scale |
|
||||
// fv0v1+(fv1v1-fv0v1)*scale
|
||||
fxch %st(1) // fv0v0+(fv1v0-fv0v0)*scale | scale |
|
||||
// fv0v2+(fv1v2-fv0v2)*scale |
|
||||
// fv0v1+(fv1v1-fv0v1)*scale
|
||||
fadds float_point5
|
||||
fxch %st(3) // fv0v1+(fv1v1-fv0v1)*scale | scale |
|
||||
// fv0v2+(fv1v2-fv0v2)*scale |
|
||||
// fv0v0+(fv1v0-fv0v0)*scale
|
||||
fadds float_point5
|
||||
fxch %st(2) // fv0v2+(fv1v2-fv0v2)*scale | scale |
|
||||
// fv0v1+(fv1v1-fv0v1)*scale |
|
||||
// fv0v0+(fv1v0-fv0v0)*scale
|
||||
fadds float_point5
|
||||
fxch %st(3) // fv0v0+(fv1v0-fv0v0)*scale | scale |
|
||||
// fv0v1+(fv1v1-fv0v1)*scale |
|
||||
// fv0v2+(fv1v2-fv0v2)*scale
|
||||
fistpl fv_v+0-12(%edx) // scale | fv0v1+(fv1v1-fv0v1)*scale |
|
||||
// fv0v2+(fv1v2-fv0v2)*scale
|
||||
fxch %st(1) // fv0v1+(fv1v1-fv0v1)*scale | scale |
|
||||
// fv0v2+(fv1v2-fv0v2)*scale | scale
|
||||
fistpl fv_v+4-12(%edx) // scale | fv0v2+(fv1v2-fv0v2)*scale
|
||||
fxch %st(1) // fv0v2+(fv1v2-fv0v2)*sc | scale
|
||||
fistpl fv_v+8-12(%edx) // scale
|
||||
|
||||
decl %eax
|
||||
jnz LDo3Forward
|
||||
|
||||
fstp %st(0)
|
||||
|
||||
popl %edi
|
||||
popl %esi
|
||||
|
||||
ret
|
||||
|
||||
|
||||
.globl C(R_Alias_clip_top)
|
||||
C(R_Alias_clip_top):
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl pfv0(%esp),%esi
|
||||
movl pfv1(%esp),%edi
|
||||
|
||||
movl C(r_refdef)+rd_aliasvrect+4,%eax
|
||||
jmp LDoForwardOrBackward
|
||||
|
||||
|
||||
|
||||
.globl C(R_Alias_clip_right)
|
||||
C(R_Alias_clip_right):
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl pfv0(%esp),%esi
|
||||
movl pfv1(%esp),%edi
|
||||
|
||||
movl C(r_refdef)+rd_aliasvrectright,%eax
|
||||
|
||||
LRightLeftEntry:
|
||||
|
||||
|
||||
movl fv_v+4(%esi),%edx
|
||||
movl fv_v+4(%edi),%ecx
|
||||
|
||||
cmpl %ecx,%edx
|
||||
movl fv_v+0(%esi),%edx
|
||||
|
||||
movl fv_v+0(%edi),%ecx
|
||||
jl LDoForward2
|
||||
|
||||
movl fv_v+0(%esi),%ecx
|
||||
movl fv_v+0(%edi),%edx
|
||||
movl pfv0(%esp),%edi
|
||||
movl pfv1(%esp),%esi
|
||||
|
||||
LDoForward2:
|
||||
|
||||
jmp LDoForward
|
||||
|
||||
|
||||
.globl C(R_Alias_clip_left)
|
||||
C(R_Alias_clip_left):
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl pfv0(%esp),%esi
|
||||
movl pfv1(%esp),%edi
|
||||
|
||||
movl C(r_refdef)+rd_aliasvrect+0,%eax
|
||||
jmp LRightLeftEntry
|
||||
|
||||
|
||||
#endif // id386
|
||||
|
237
source/r_aliasa.S
Normal file
237
source/r_aliasa.S
Normal file
|
@ -0,0 +1,237 @@
|
|||
/*
|
||||
Copyright (C) 1996-1997 Id Software, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
*/
|
||||
//
|
||||
// r_aliasa.s
|
||||
// x86 assembly-language Alias model transform and project code.
|
||||
//
|
||||
|
||||
#include "asm_i386.h"
|
||||
#include "quakeasm.h"
|
||||
#include "asm_draw.h"
|
||||
#include "d_ifacea.h"
|
||||
|
||||
#if id386
|
||||
|
||||
.data
|
||||
|
||||
Lfloat_1: .single 1.0
|
||||
Ltemp: .long 0
|
||||
Lcoords: .long 0, 0, 0
|
||||
|
||||
.text
|
||||
|
||||
#define fv 12+4
|
||||
#define pstverts 12+8
|
||||
|
||||
.globl C(R_AliasTransformAndProjectFinalVerts)
|
||||
C(R_AliasTransformAndProjectFinalVerts):
|
||||
pushl %ebp // preserve caller's stack frame
|
||||
pushl %edi
|
||||
pushl %esi // preserve register variables
|
||||
|
||||
// int i, temp;
|
||||
// float lightcos, *plightnormal, zi;
|
||||
// trivertx_t *pverts;
|
||||
|
||||
// pverts = r_apverts;
|
||||
movl C(r_apverts),%esi
|
||||
|
||||
// for (i=0 ; i<r_anumverts ; i++, fv++, pverts++, pstverts++)
|
||||
// {
|
||||
movl pstverts(%esp),%ebp
|
||||
movl fv(%esp),%edi
|
||||
movl C(r_anumverts),%ecx
|
||||
subl %edx,%edx
|
||||
|
||||
Lloop:
|
||||
|
||||
// // transform and project
|
||||
// zi = 1.0 / (DotProduct(pverts->v, aliastransform[2]) +
|
||||
// aliastransform[2][3]);
|
||||
movb (%esi),%dl
|
||||
movb %dl,Lcoords
|
||||
fildl Lcoords // v[0]
|
||||
movb 1(%esi),%dl
|
||||
movb %dl,Lcoords+4
|
||||
fildl Lcoords+4 // v[1] | v[0]
|
||||
movb 2(%esi),%dl
|
||||
movb %dl,Lcoords+8
|
||||
fildl Lcoords+8 // v[2] | v[1] | v[0]
|
||||
|
||||
fld %st(2) // v[0] | v[2] | v[1] | v[0]
|
||||
fmuls C(aliastransform)+32 // accum | v[2] | v[1] | v[0]
|
||||
fld %st(2) // v[1] | accum | v[2] | v[1] | v[0]
|
||||
fmuls C(aliastransform)+36 // accum2 | accum | v[2] | v[1] | v[0]
|
||||
fxch %st(1) // accum | accum2 | v[2] | v[1] | v[0]
|
||||
fadds C(aliastransform)+44 // accum | accum2 | v[2] | v[1] | v[0]
|
||||
fld %st(2) // v[2] | accum | accum2 | v[2] | v[1] | v[0]
|
||||
fmuls C(aliastransform)+40 // accum3 | accum | accum2 | v[2] | v[1] |
|
||||
// v[0]
|
||||
fxch %st(1) // accum | accum3 | accum2 | v[2] | v[1] | v[0]
|
||||
faddp %st(0),%st(2) // accum3 | accum | v[2] | v[1] | v[0]
|
||||
movb tv_lightnormalindex(%esi),%dl
|
||||
movl stv_s(%ebp),%eax
|
||||
movl %eax,fv_v+8(%edi)
|
||||
faddp %st(0),%st(1) // z | v[2] | v[1] | v[0]
|
||||
|
||||
movl stv_t(%ebp),%eax
|
||||
movl %eax,fv_v+12(%edi)
|
||||
|
||||
// // lighting
|
||||
// plightnormal = r_avertexnormals[pverts->lightnormalindex];
|
||||
|
||||
fdivrs Lfloat_1 // zi | v[2] | v[1] | v[0]
|
||||
|
||||
// fv->v[2] = pstverts->s;
|
||||
// fv->v[3] = pstverts->t;
|
||||
// fv->flags = pstverts->onseam;
|
||||
movl stv_onseam(%ebp),%eax
|
||||
movl %eax,fv_flags(%edi)
|
||||
|
||||
movl fv_size(%edi),%eax
|
||||
movl stv_size(%ebp),%eax
|
||||
movl 4(%esi),%eax
|
||||
|
||||
leal (%edx,%edx,2),%eax // index*3
|
||||
|
||||
fxch %st(3) // v[0] | v[2] | v[1] | zi
|
||||
|
||||
// lightcos = DotProduct (plightnormal, r_plightvec);
|
||||
flds C(r_avertexnormals)(,%eax,4)
|
||||
fmuls C(r_plightvec)
|
||||
flds C(r_avertexnormals)+4(,%eax,4)
|
||||
fmuls C(r_plightvec)+4
|
||||
flds C(r_avertexnormals)+8(,%eax,4)
|
||||
fmuls C(r_plightvec)+8
|
||||
fxch %st(1)
|
||||
faddp %st(0),%st(2)
|
||||
fld %st(2) // v[0] | laccum | laccum2 | v[0] | v[2] |
|
||||
// v[1] | zi
|
||||
fmuls C(aliastransform)+0 // xaccum | laccum | laccum2 | v[0] | v[2] |
|
||||
// v[1] | zi
|
||||
fxch %st(2) // laccum2 | laccum | xaccum | v[0] | v[2] |
|
||||
// v[1] | zi
|
||||
faddp %st(0),%st(1) // laccum | xaccum | v[0] | v[2] | v[1] | zi
|
||||
|
||||
// temp = r_ambientlight;
|
||||
// if (lightcos < 0)
|
||||
// {
|
||||
fsts Ltemp
|
||||
movl C(r_ambientlight),%eax
|
||||
movb Ltemp+3,%dl
|
||||
testb $0x80,%dl
|
||||
jz Lsavelight // no need to clamp if only ambient lit, because
|
||||
// r_ambientlight is preclamped
|
||||
|
||||
// temp += (int)(r_shadelight * lightcos);
|
||||
fmuls C(r_shadelight)
|
||||
// FIXME: fast float->int conversion?
|
||||
fistpl Ltemp
|
||||
addl Ltemp,%eax
|
||||
|
||||
// // clamp; because we limited the minimum ambient and shading light, we
|
||||
// // don't have to clamp low light, just bright
|
||||
// if (temp < 0)
|
||||
// temp = 0;
|
||||
jns Lp1
|
||||
subl %eax,%eax
|
||||
|
||||
// }
|
||||
|
||||
Lp1:
|
||||
|
||||
// fv->v[4] = temp;
|
||||
//
|
||||
// // x, y, and z are scaled down by 1/2**31 in the transform, so 1/z is
|
||||
// // scaled up by 1/2**31, and the scaling cancels out for x and y in the
|
||||
// // projection
|
||||
// fv->v[0] = ((DotProduct(pverts->v, aliastransform[0]) +
|
||||
// aliastransform[0][3]) * zi) + aliasxcenter;
|
||||
// fv->v[1] = ((DotProduct(pverts->v, aliastransform[1]) +
|
||||
// aliastransform[1][3]) * zi) + aliasycenter;
|
||||
// fv->v[5] = zi;
|
||||
fxch %st(1) // v[0] | xaccum | v[2] | v[1] | zi
|
||||
fmuls C(aliastransform)+16 // yaccum | xaccum | v[2] | v[1] | zi
|
||||
fxch %st(3) // v[1] | xaccum | v[2] | yaccum | zi
|
||||
fld %st(0) // v[1] | v[1] | xaccum | v[2] | yaccum | zi
|
||||
fmuls C(aliastransform)+4 // xaccum2 | v[1] | xaccum | v[2] | yaccum |zi
|
||||
fxch %st(1) // v[1] | xaccum2 | xaccum | v[2] | yaccum |zi
|
||||
movl %eax,fv_v+16(%edi)
|
||||
fmuls C(aliastransform)+20 // yaccum2 | xaccum2 | xaccum | v[2] | yaccum|
|
||||
// zi
|
||||
fxch %st(2) // xaccum | xaccum2 | yaccum2 | v[2] | yaccum|
|
||||
// zi
|
||||
fadds C(aliastransform)+12 // xaccum | xaccum2 | yaccum2 | v[2] | yaccum|
|
||||
// zi
|
||||
fxch %st(4) // yaccum | xaccum2 | yaccum2 | v[2] | xaccum|
|
||||
// zi
|
||||
fadds C(aliastransform)+28 // yaccum | xaccum2 | yaccum2 | v[2] | xaccum|
|
||||
// zi
|
||||
fxch %st(3) // v[2] | xaccum2 | yaccum2 | yaccum | xaccum|
|
||||
// zi
|
||||
fld %st(0) // v[2] | v[2] | xaccum2 | yaccum2 | yaccum |
|
||||
// xaccum | zi
|
||||
fmuls C(aliastransform)+8 // xaccum3 | v[2] | xaccum2 | yaccum2 |yaccum|
|
||||
// xaccum | zi
|
||||
fxch %st(1) // v[2] | xaccum3 | xaccum2 | yaccum2 |yaccum|
|
||||
// xaccum | zi
|
||||
fmuls C(aliastransform)+24 // yaccum3 | xaccum3 | xaccum2 | yaccum2 |
|
||||
// yaccum | xaccum | zi
|
||||
fxch %st(5) // xaccum | xaccum3 | xaccum2 | yaccum2 |
|
||||
// yaccum | yaccum3 | zi
|
||||
faddp %st(0),%st(2) // xaccum3 | xaccum | yaccum2 | yaccum |
|
||||
// yaccum3 | zi
|
||||
fxch %st(3) // yaccum | xaccum | yaccum2 | xaccum3 |
|
||||
// yaccum3 | zi
|
||||
faddp %st(0),%st(2) // xaccum | yaccum | xaccum3 | yaccum3 | zi
|
||||
addl $(tv_size),%esi
|
||||
faddp %st(0),%st(2) // yaccum | x | yaccum3 | zi
|
||||
faddp %st(0),%st(2) // x | y | zi
|
||||
addl $(stv_size),%ebp
|
||||
fmul %st(2),%st(0) // x/z | y | zi
|
||||
fxch %st(1) // y | x/z | zi
|
||||
fmul %st(2),%st(0) // y/z | x/z | zi
|
||||
fxch %st(1) // x/z | y/z | zi
|
||||
fadds C(aliasxcenter) // u | y/z | zi
|
||||
fxch %st(1) // y/z | u | zi
|
||||
fadds C(aliasycenter) // v | u | zi
|
||||
fxch %st(2) // zi | u | v
|
||||
// FIXME: fast float->int conversion?
|
||||
fistpl fv_v+20(%edi) // u | v
|
||||
fistpl fv_v+0(%edi) // v
|
||||
fistpl fv_v+4(%edi)
|
||||
|
||||
// }
|
||||
|
||||
addl $(fv_size),%edi
|
||||
decl %ecx
|
||||
jnz Lloop
|
||||
|
||||
popl %esi // restore register variables
|
||||
popl %edi
|
||||
popl %ebp // restore the caller's stack frame
|
||||
ret
|
||||
|
||||
Lsavelight:
|
||||
fstp %st(0)
|
||||
jmp Lp1
|
||||
|
||||
#endif // id386
|
||||
|
838
source/r_drawa.S
Normal file
838
source/r_drawa.S
Normal file
|
@ -0,0 +1,838 @@
|
|||
/*
|
||||
Copyright (C) 1996-1997 Id Software, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
*/
|
||||
//
|
||||
// r_drawa.s
|
||||
// x86 assembly-language edge clipping and emission code
|
||||
//
|
||||
|
||||
#include "asm_i386.h"
|
||||
#include "quakeasm.h"
|
||||
#include "asm_draw.h"
|
||||
#include "d_ifacea.h"
|
||||
|
||||
#if id386
|
||||
|
||||
// !!! if these are changed, they must be changed in r_draw.c too !!!
|
||||
#define FULLY_CLIPPED_CACHED 0x80000000
|
||||
#define FRAMECOUNT_MASK 0x7FFFFFFF
|
||||
|
||||
.data
|
||||
|
||||
Ld0: .single 0.0
|
||||
Ld1: .single 0.0
|
||||
Lstack: .long 0
|
||||
Lfp_near_clip: .single NEAR_CLIP
|
||||
Lceilv0: .long 0
|
||||
Lv: .long 0
|
||||
Lu0: .long 0
|
||||
Lv0: .long 0
|
||||
Lzi0: .long 0
|
||||
|
||||
.text
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// edge clipping code
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
#define pv0 4+12
|
||||
#define pv1 8+12
|
||||
#define clip 12+12
|
||||
|
||||
.align 4
|
||||
.globl C(R_ClipEdge)
|
||||
C(R_ClipEdge):
|
||||
pushl %esi // preserve register variables
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
movl %esp,Lstack // for clearing the stack later
|
||||
|
||||
// float d0, d1, f;
|
||||
// mvertex_t clipvert;
|
||||
|
||||
movl clip(%esp),%ebx
|
||||
movl pv0(%esp),%esi
|
||||
movl pv1(%esp),%edx
|
||||
|
||||
// if (clip)
|
||||
// {
|
||||
testl %ebx,%ebx
|
||||
jz Lemit
|
||||
|
||||
// do
|
||||
// {
|
||||
|
||||
Lcliploop:
|
||||
|
||||
// d0 = DotProduct (pv0->position, clip->normal) - clip->dist;
|
||||
// d1 = DotProduct (pv1->position, clip->normal) - clip->dist;
|
||||
flds mv_position+0(%esi)
|
||||
fmuls cp_normal+0(%ebx)
|
||||
flds mv_position+4(%esi)
|
||||
fmuls cp_normal+4(%ebx)
|
||||
flds mv_position+8(%esi)
|
||||
fmuls cp_normal+8(%ebx)
|
||||
fxch %st(1)
|
||||
faddp %st(0),%st(2) // d0mul2 | d0add0
|
||||
|
||||
flds mv_position+0(%edx)
|
||||
fmuls cp_normal+0(%ebx)
|
||||
flds mv_position+4(%edx)
|
||||
fmuls cp_normal+4(%ebx)
|
||||
flds mv_position+8(%edx)
|
||||
fmuls cp_normal+8(%ebx)
|
||||
fxch %st(1)
|
||||
faddp %st(0),%st(2) // d1mul2 | d1add0 | d0mul2 | d0add0
|
||||
fxch %st(3) // d0add0 | d1add0 | d0mul2 | d1mul2
|
||||
|
||||
faddp %st(0),%st(2) // d1add0 | dot0 | d1mul2
|
||||
faddp %st(0),%st(2) // dot0 | dot1
|
||||
|
||||
fsubs cp_dist(%ebx) // d0 | dot1
|
||||
fxch %st(1) // dot1 | d0
|
||||
fsubs cp_dist(%ebx) // d1 | d0
|
||||
fxch %st(1)
|
||||
fstps Ld0
|
||||
fstps Ld1
|
||||
|
||||
// if (d0 >= 0)
|
||||
// {
|
||||
movl Ld0,%eax
|
||||
movl Ld1,%ecx
|
||||
orl %eax,%ecx
|
||||
js Lp2
|
||||
|
||||
// both points are unclipped
|
||||
|
||||
Lcontinue:
|
||||
|
||||
//
|
||||
// R_ClipEdge (&clipvert, pv1, clip->next);
|
||||
// return;
|
||||
// }
|
||||
// } while ((clip = clip->next) != NULL);
|
||||
movl cp_next(%ebx),%ebx
|
||||
testl %ebx,%ebx
|
||||
jnz Lcliploop
|
||||
|
||||
// }
|
||||
|
||||
//// add the edge
|
||||
// R_EmitEdge (pv0, pv1);
|
||||
Lemit:
|
||||
|
||||
//
|
||||
// set integer rounding to ceil mode, set to single precision
|
||||
//
|
||||
// FIXME: do away with by manually extracting integers from floats?
|
||||
// FIXME: set less often
|
||||
fldcw ceil_cw
|
||||
|
||||
// edge_t *edge, *pcheck;
|
||||
// int u_check;
|
||||
// float u, u_step;
|
||||
// vec3_t local, transformed;
|
||||
// float *world;
|
||||
// int v, v2, ceilv0;
|
||||
// float scale, lzi0, u0, v0;
|
||||
// int side;
|
||||
|
||||
// if (r_lastvertvalid)
|
||||
// {
|
||||
cmpl $0,C(r_lastvertvalid)
|
||||
jz LCalcFirst
|
||||
|
||||
// u0 = r_u1;
|
||||
// v0 = r_v1;
|
||||
// lzi0 = r_lzi1;
|
||||
// ceilv0 = r_ceilv1;
|
||||
movl C(r_lzi1),%eax
|
||||
movl C(r_u1),%ecx
|
||||
movl %eax,Lzi0
|
||||
movl %ecx,Lu0
|
||||
movl C(r_v1),%ecx
|
||||
movl C(r_ceilv1),%eax
|
||||
movl %ecx,Lv0
|
||||
movl %eax,Lceilv0
|
||||
jmp LCalcSecond
|
||||
|
||||
// }
|
||||
|
||||
LCalcFirst:
|
||||
|
||||
// else
|
||||
// {
|
||||
// world = &pv0->position[0];
|
||||
|
||||
call LTransformAndProject // v0 | lzi0 | u0
|
||||
|
||||
fsts Lv0
|
||||
fxch %st(2) // u0 | lzi0 | v0
|
||||
fstps Lu0 // lzi0 | v0
|
||||
fstps Lzi0 // v0
|
||||
|
||||
// ceilv0 = (int)(v0 - 2000) + 2000; // ceil(v0);
|
||||
fistpl Lceilv0
|
||||
|
||||
// }
|
||||
|
||||
LCalcSecond:
|
||||
|
||||
// world = &pv1->position[0];
|
||||
movl %edx,%esi
|
||||
|
||||
call LTransformAndProject // v1 | lzi1 | u1
|
||||
|
||||
flds Lu0 // u0 | v1 | lzi1 | u1
|
||||
fxch %st(3) // u1 | v1 | lzi1 | u0
|
||||
flds Lzi0 // lzi0 | u1 | v1 | lzi1 | u0
|
||||
fxch %st(3) // lzi1 | u1 | v1 | lzi0 | u0
|
||||
flds Lv0 // v0 | lzi1 | u1 | v1 | lzi0 | u0
|
||||
fxch %st(3) // v1 | lzi1 | u1 | v0 | lzi0 | u0
|
||||
|
||||
// r_ceilv1 = (int)(r_v1 - 2000) + 2000; // ceil(r_v1);
|
||||
fistl C(r_ceilv1)
|
||||
|
||||
fldcw single_cw // put back normal floating-point state
|
||||
|
||||
fsts C(r_v1)
|
||||
fxch %st(4) // lzi0 | lzi1 | u1 | v0 | v1 | u0
|
||||
|
||||
// if (r_lzi1 > lzi0)
|
||||
// lzi0 = r_lzi1;
|
||||
fcom %st(1)
|
||||
fnstsw %ax
|
||||
testb $1,%ah
|
||||
jz LP0
|
||||
fstp %st(0)
|
||||
fld %st(0)
|
||||
LP0:
|
||||
|
||||
fxch %st(1) // lzi1 | lzi0 | u1 | v0 | v1 | u0
|
||||
fstps C(r_lzi1) // lzi0 | u1 | v0 | v1 | u0
|
||||
fxch %st(1)
|
||||
fsts C(r_u1)
|
||||
fxch %st(1)
|
||||
|
||||
// if (lzi0 > r_nearzi) // for mipmap finding
|
||||
// r_nearzi = lzi0;
|
||||
fcoms C(r_nearzi)
|
||||
fnstsw %ax
|
||||
testb $0x45,%ah
|
||||
jnz LP1
|
||||
fsts C(r_nearzi)
|
||||
LP1:
|
||||
|
||||
// // for right edges, all we want is the effect on 1/z
|
||||
// if (r_nearzionly)
|
||||
// return;
|
||||
movl C(r_nearzionly),%eax
|
||||
testl %eax,%eax
|
||||
jz LP2
|
||||
LPop5AndDone:
|
||||
movl C(cacheoffset),%eax
|
||||
movl C(r_framecount),%edx
|
||||
cmpl $0x7FFFFFFF,%eax
|
||||
jz LDoPop
|
||||
andl $(FRAMECOUNT_MASK),%edx
|
||||
orl $(FULLY_CLIPPED_CACHED),%edx
|
||||
movl %edx,C(cacheoffset)
|
||||
|
||||
LDoPop:
|
||||
fstp %st(0) // u1 | v0 | v1 | u0
|
||||
fstp %st(0) // v0 | v1 | u0
|
||||
fstp %st(0) // v1 | u0
|
||||
fstp %st(0) // u0
|
||||
fstp %st(0)
|
||||
jmp Ldone
|
||||
|
||||
LP2:
|
||||
|
||||
// // create the edge
|
||||
// if (ceilv0 == r_ceilv1)
|
||||
// return; // horizontal edge
|
||||
movl Lceilv0,%ebx
|
||||
movl C(edge_p),%edi
|
||||
movl C(r_ceilv1),%ecx
|
||||
movl %edi,%edx
|
||||
movl C(r_pedge),%esi
|
||||
addl $(et_size),%edx
|
||||
cmpl %ecx,%ebx
|
||||
jz LPop5AndDone
|
||||
|
||||
movl C(r_pedge),%eax
|
||||
movl %eax,et_owner(%edi)
|
||||
|
||||
// side = ceilv0 > r_ceilv1;
|
||||
//
|
||||
// edge->nearzi = lzi0;
|
||||
fstps et_nearzi(%edi) // u1 | v0 | v1 | u0
|
||||
|
||||
// if (side == 1)
|
||||
// {
|
||||
jc LSide0
|
||||
|
||||
LSide1:
|
||||
|
||||
// // leading edge (go from p2 to p1)
|
||||
|
||||
// u_step = ((u0 - r_u1) / (v0 - r_v1));
|
||||
fsubrp %st(0),%st(3) // v0 | v1 | u0-u1
|
||||
fsub %st(1),%st(0) // v0-v1 | v1 | u0-u1
|
||||
fdivrp %st(0),%st(2) // v1 | ustep
|
||||
|
||||
// r_emitted = 1;
|
||||
movl $1,C(r_emitted)
|
||||
|
||||
// edge = edge_p++;
|
||||
movl %edx,C(edge_p)
|
||||
|
||||
// pretouch next edge
|
||||
movl (%edx),%eax
|
||||
|
||||
// v2 = ceilv0 - 1;
|
||||
// v = r_ceilv1;
|
||||
movl %ecx,%eax
|
||||
leal -1(%ebx),%ecx
|
||||
movl %eax,%ebx
|
||||
|
||||
// edge->surfs[0] = 0;
|
||||
// edge->surfs[1] = surface_p - surfaces;
|
||||
movl C(surface_p),%eax
|
||||
movl C(surfaces),%esi
|
||||
subl %edx,%edx
|
||||
subl %esi,%eax
|
||||
shrl $(SURF_T_SHIFT),%eax
|
||||
movl %edx,et_surfs(%edi)
|
||||
movl %eax,et_surfs+2(%edi)
|
||||
|
||||
subl %esi,%esi
|
||||
|
||||
// u = r_u1 + ((float)v - r_v1) * u_step;
|
||||
movl %ebx,Lv
|
||||
fildl Lv // v | v1 | ustep
|
||||
fsubp %st(0),%st(1) // v-v1 | ustep
|
||||
fmul %st(1),%st(0) // (v-v1)*ustep | ustep
|
||||
fadds C(r_u1) // u | ustep
|
||||
|
||||
jmp LSideDone
|
||||
|
||||
// }
|
||||
|
||||
LSide0:
|
||||
|
||||
// else
|
||||
// {
|
||||
// // trailing edge (go from p1 to p2)
|
||||
|
||||
// u_step = ((r_u1 - u0) / (r_v1 - v0));
|
||||
fsub %st(3),%st(0) // u1-u0 | v0 | v1 | u0
|
||||
fxch %st(2) // v1 | v0 | u1-u0 | u0
|
||||
fsub %st(1),%st(0) // v1-v0 | v0 | u1-u0 | u0
|
||||
fdivrp %st(0),%st(2) // v0 | ustep | u0
|
||||
|
||||
// r_emitted = 1;
|
||||
movl $1,C(r_emitted)
|
||||
|
||||
// edge = edge_p++;
|
||||
movl %edx,C(edge_p)
|
||||
|
||||
// pretouch next edge
|
||||
movl (%edx),%eax
|
||||
|
||||
// v = ceilv0;
|
||||
// v2 = r_ceilv1 - 1;
|
||||
decl %ecx
|
||||
|
||||
// edge->surfs[0] = surface_p - surfaces;
|
||||
// edge->surfs[1] = 0;
|
||||
movl C(surface_p),%eax
|
||||
movl C(surfaces),%esi
|
||||
subl %edx,%edx
|
||||
subl %esi,%eax
|
||||
shrl $(SURF_T_SHIFT),%eax
|
||||
movl %edx,et_surfs+2(%edi)
|
||||
movl %eax,et_surfs(%edi)
|
||||
|
||||
movl $1,%esi
|
||||
|
||||
// u = u0 + ((float)v - v0) * u_step;
|
||||
movl %ebx,Lv
|
||||
fildl Lv // v | v0 | ustep | u0
|
||||
fsubp %st(0),%st(1) // v-v0 | ustep | u0
|
||||
fmul %st(1),%st(0) // (v-v0)*ustep | ustep | u0
|
||||
faddp %st(0),%st(2) // ustep | u
|
||||
fxch %st(1) // u | ustep
|
||||
|
||||
// }
|
||||
|
||||
LSideDone:
|
||||
|
||||
// edge->u_step = u_step*0x100000;
|
||||
// edge->u = u*0x100000 + 0xFFFFF;
|
||||
|
||||
fmuls fp_1m // u*0x100000 | ustep
|
||||
fxch %st(1) // ustep | u*0x100000
|
||||
fmuls fp_1m // ustep*0x100000 | u*0x100000
|
||||
fxch %st(1) // u*0x100000 | ustep*0x100000
|
||||
fadds fp_1m_minus_1 // u*0x100000 + 0xFFFFF | ustep*0x100000
|
||||
fxch %st(1) // ustep*0x100000 | u*0x100000 + 0xFFFFF
|
||||
fistpl et_u_step(%edi) // u*0x100000 + 0xFFFFF
|
||||
fistpl et_u(%edi)
|
||||
|
||||
// // we need to do this to avoid stepping off the edges if a very nearly
|
||||
// // horizontal edge is less than epsilon above a scan, and numeric error
|
||||
// // causes it to incorrectly extend to the scan, and the extension of the
|
||||
// // line goes off the edge of the screen
|
||||
// // FIXME: is this actually needed?
|
||||
// if (edge->u < r_refdef.vrect_x_adj_shift20)
|
||||
// edge->u = r_refdef.vrect_x_adj_shift20;
|
||||
// if (edge->u > r_refdef.vrectright_adj_shift20)
|
||||
// edge->u = r_refdef.vrectright_adj_shift20;
|
||||
movl et_u(%edi),%eax
|
||||
movl C(r_refdef)+rd_vrect_x_adj_shift20,%edx
|
||||
cmpl %edx,%eax
|
||||
jl LP4
|
||||
movl C(r_refdef)+rd_vrectright_adj_shift20,%edx
|
||||
cmpl %edx,%eax
|
||||
jng LP5
|
||||
LP4:
|
||||
movl %edx,et_u(%edi)
|
||||
movl %edx,%eax
|
||||
LP5:
|
||||
|
||||
// // sort the edge in normally
|
||||
// u_check = edge->u;
|
||||
//
|
||||
// if (edge->surfs[0])
|
||||
// u_check++; // sort trailers after leaders
|
||||
addl %esi,%eax
|
||||
|
||||
// if (!newedges[v] || newedges[v]->u >= u_check)
|
||||
// {
|
||||
movl C(newedges)(,%ebx,4),%esi
|
||||
testl %esi,%esi
|
||||
jz LDoFirst
|
||||
cmpl %eax,et_u(%esi)
|
||||
jl LNotFirst
|
||||
LDoFirst:
|
||||
|
||||
// edge->next = newedges[v];
|
||||
// newedges[v] = edge;
|
||||
movl %esi,et_next(%edi)
|
||||
movl %edi,C(newedges)(,%ebx,4)
|
||||
|
||||
jmp LSetRemove
|
||||
|
||||
// }
|
||||
|
||||
LNotFirst:
|
||||
|
||||
// else
|
||||
// {
|
||||
// pcheck = newedges[v];
|
||||
//
|
||||
// while (pcheck->next && pcheck->next->u < u_check)
|
||||
// pcheck = pcheck->next;
|
||||
LFindInsertLoop:
|
||||
movl %esi,%edx
|
||||
movl et_next(%esi),%esi
|
||||
testl %esi,%esi
|
||||
jz LInsertFound
|
||||
cmpl %eax,et_u(%esi)
|
||||
jl LFindInsertLoop
|
||||
|
||||
LInsertFound:
|
||||
|
||||
// edge->next = pcheck->next;
|
||||
// pcheck->next = edge;
|
||||
movl %esi,et_next(%edi)
|
||||
movl %edi,et_next(%edx)
|
||||
|
||||
// }
|
||||
|
||||
LSetRemove:
|
||||
|
||||
// edge->nextremove = removeedges[v2];
|
||||
// removeedges[v2] = edge;
|
||||
movl C(removeedges)(,%ecx,4),%eax
|
||||
movl %edi,C(removeedges)(,%ecx,4)
|
||||
movl %eax,et_nextremove(%edi)
|
||||
|
||||
Ldone:
|
||||
movl Lstack,%esp // clear temporary variables from stack
|
||||
|
||||
popl %ebx // restore register variables
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
|
||||
// at least one point is clipped
|
||||
|
||||
Lp2:
|
||||
testl %eax,%eax
|
||||
jns Lp1
|
||||
|
||||
// else
|
||||
// {
|
||||
// // point 0 is clipped
|
||||
|
||||
// if (d1 < 0)
|
||||
// {
|
||||
movl Ld1,%eax
|
||||
testl %eax,%eax
|
||||
jns Lp3
|
||||
|
||||
// // both points are clipped
|
||||
// // we do cache fully clipped edges
|
||||
// if (!leftclipped)
|
||||
movl C(r_leftclipped),%eax
|
||||
movl C(r_pedge),%ecx
|
||||
testl %eax,%eax
|
||||
jnz Ldone
|
||||
|
||||
// r_pedge->framecount = r_framecount;
|
||||
movl C(r_framecount),%eax
|
||||
andl $(FRAMECOUNT_MASK),%eax
|
||||
orl $(FULLY_CLIPPED_CACHED),%eax
|
||||
movl %eax,C(cacheoffset)
|
||||
|
||||
// return;
|
||||
jmp Ldone
|
||||
|
||||
// }
|
||||
|
||||
Lp1:
|
||||
|
||||
// // point 0 is unclipped
|
||||
// if (d1 >= 0)
|
||||
// {
|
||||
// // both points are unclipped
|
||||
// continue;
|
||||
|
||||
// // only point 1 is clipped
|
||||
|
||||
// f = d0 / (d0 - d1);
|
||||
flds Ld0
|
||||
flds Ld1
|
||||
fsubr %st(1),%st(0)
|
||||
|
||||
// // we don't cache partially clipped edges
|
||||
movl $0x7FFFFFFF,C(cacheoffset)
|
||||
|
||||
fdivrp %st(0),%st(1)
|
||||
|
||||
subl $(mv_size),%esp // allocate space for clipvert
|
||||
|
||||
// clipvert.position[0] = pv0->position[0] +
|
||||
// f * (pv1->position[0] - pv0->position[0]);
|
||||
// clipvert.position[1] = pv0->position[1] +
|
||||
// f * (pv1->position[1] - pv0->position[1]);
|
||||
// clipvert.position[2] = pv0->position[2] +
|
||||
// f * (pv1->position[2] - pv0->position[2]);
|
||||
flds mv_position+8(%edx)
|
||||
fsubs mv_position+8(%esi)
|
||||
flds mv_position+4(%edx)
|
||||
fsubs mv_position+4(%esi)
|
||||
flds mv_position+0(%edx)
|
||||
fsubs mv_position+0(%esi) // 0 | 1 | 2
|
||||
|
||||
// replace pv1 with the clip point
|
||||
movl %esp,%edx
|
||||
movl cp_leftedge(%ebx),%eax
|
||||
testb %al,%al
|
||||
|
||||
fmul %st(3),%st(0)
|
||||
fxch %st(1) // 1 | 0 | 2
|
||||
fmul %st(3),%st(0)
|
||||
fxch %st(2) // 2 | 0 | 1
|
||||
fmulp %st(0),%st(3) // 0 | 1 | 2
|
||||
fadds mv_position+0(%esi)
|
||||
fxch %st(1) // 1 | 0 | 2
|
||||
fadds mv_position+4(%esi)
|
||||
fxch %st(2) // 2 | 0 | 1
|
||||
fadds mv_position+8(%esi)
|
||||
fxch %st(1) // 0 | 2 | 1
|
||||
fstps mv_position+0(%esp) // 2 | 1
|
||||
fstps mv_position+8(%esp) // 1
|
||||
fstps mv_position+4(%esp)
|
||||
|
||||
// if (clip->leftedge)
|
||||
// {
|
||||
jz Ltestright
|
||||
|
||||
// r_leftclipped = true;
|
||||
// r_leftexit = clipvert;
|
||||
movl $1,C(r_leftclipped)
|
||||
movl mv_position+0(%esp),%eax
|
||||
movl %eax,C(r_leftexit)+mv_position+0
|
||||
movl mv_position+4(%esp),%eax
|
||||
movl %eax,C(r_leftexit)+mv_position+4
|
||||
movl mv_position+8(%esp),%eax
|
||||
movl %eax,C(r_leftexit)+mv_position+8
|
||||
|
||||
jmp Lcontinue
|
||||
|
||||
// }
|
||||
|
||||
Ltestright:
|
||||
// else if (clip->rightedge)
|
||||
// {
|
||||
testb %ah,%ah
|
||||
jz Lcontinue
|
||||
|
||||
// r_rightclipped = true;
|
||||
// r_rightexit = clipvert;
|
||||
movl $1,C(r_rightclipped)
|
||||
movl mv_position+0(%esp),%eax
|
||||
movl %eax,C(r_rightexit)+mv_position+0
|
||||
movl mv_position+4(%esp),%eax
|
||||
movl %eax,C(r_rightexit)+mv_position+4
|
||||
movl mv_position+8(%esp),%eax
|
||||
movl %eax,C(r_rightexit)+mv_position+8
|
||||
|
||||
// }
|
||||
//
|
||||
// R_ClipEdge (pv0, &clipvert, clip->next);
|
||||
// return;
|
||||
// }
|
||||
jmp Lcontinue
|
||||
|
||||
// }
|
||||
|
||||
Lp3:
|
||||
|
||||
// // only point 0 is clipped
|
||||
// r_lastvertvalid = false;
|
||||
|
||||
movl $0,C(r_lastvertvalid)
|
||||
|
||||
// f = d0 / (d0 - d1);
|
||||
flds Ld0
|
||||
flds Ld1
|
||||
fsubr %st(1),%st(0)
|
||||
|
||||
// // we don't cache partially clipped edges
|
||||
movl $0x7FFFFFFF,C(cacheoffset)
|
||||
|
||||
fdivrp %st(0),%st(1)
|
||||
|
||||
subl $(mv_size),%esp // allocate space for clipvert
|
||||
|
||||
// clipvert.position[0] = pv0->position[0] +
|
||||
// f * (pv1->position[0] - pv0->position[0]);
|
||||
// clipvert.position[1] = pv0->position[1] +
|
||||
// f * (pv1->position[1] - pv0->position[1]);
|
||||
// clipvert.position[2] = pv0->position[2] +
|
||||
// f * (pv1->position[2] - pv0->position[2]);
|
||||
flds mv_position+8(%edx)
|
||||
fsubs mv_position+8(%esi)
|
||||
flds mv_position+4(%edx)
|
||||
fsubs mv_position+4(%esi)
|
||||
flds mv_position+0(%edx)
|
||||
fsubs mv_position+0(%esi) // 0 | 1 | 2
|
||||
|
||||
movl cp_leftedge(%ebx),%eax
|
||||
testb %al,%al
|
||||
|
||||
fmul %st(3),%st(0)
|
||||
fxch %st(1) // 1 | 0 | 2
|
||||
fmul %st(3),%st(0)
|
||||
fxch %st(2) // 2 | 0 | 1
|
||||
fmulp %st(0),%st(3) // 0 | 1 | 2
|
||||
fadds mv_position+0(%esi)
|
||||
fxch %st(1) // 1 | 0 | 2
|
||||
fadds mv_position+4(%esi)
|
||||
fxch %st(2) // 2 | 0 | 1
|
||||
fadds mv_position+8(%esi)
|
||||
fxch %st(1) // 0 | 2 | 1
|
||||
fstps mv_position+0(%esp) // 2 | 1
|
||||
fstps mv_position+8(%esp) // 1
|
||||
fstps mv_position+4(%esp)
|
||||
|
||||
// replace pv0 with the clip point
|
||||
movl %esp,%esi
|
||||
|
||||
// if (clip->leftedge)
|
||||
// {
|
||||
jz Ltestright2
|
||||
|
||||
// r_leftclipped = true;
|
||||
// r_leftenter = clipvert;
|
||||
movl $1,C(r_leftclipped)
|
||||
movl mv_position+0(%esp),%eax
|
||||
movl %eax,C(r_leftenter)+mv_position+0
|
||||
movl mv_position+4(%esp),%eax
|
||||
movl %eax,C(r_leftenter)+mv_position+4
|
||||
movl mv_position+8(%esp),%eax
|
||||
movl %eax,C(r_leftenter)+mv_position+8
|
||||
|
||||
jmp Lcontinue
|
||||
|
||||
// }
|
||||
|
||||
Ltestright2:
|
||||
// else if (clip->rightedge)
|
||||
// {
|
||||
testb %ah,%ah
|
||||
jz Lcontinue
|
||||
|
||||
// r_rightclipped = true;
|
||||
// r_rightenter = clipvert;
|
||||
movl $1,C(r_rightclipped)
|
||||
movl mv_position+0(%esp),%eax
|
||||
movl %eax,C(r_rightenter)+mv_position+0
|
||||
movl mv_position+4(%esp),%eax
|
||||
movl %eax,C(r_rightenter)+mv_position+4
|
||||
movl mv_position+8(%esp),%eax
|
||||
movl %eax,C(r_rightenter)+mv_position+8
|
||||
|
||||
// }
|
||||
jmp Lcontinue
|
||||
|
||||
// %esi = vec3_t point to transform and project
|
||||
// %edx preserved
|
||||
LTransformAndProject:
|
||||
|
||||
// // transform and project
|
||||
// VectorSubtract (world, modelorg, local);
|
||||
flds mv_position+0(%esi)
|
||||
fsubs C(modelorg)+0
|
||||
flds mv_position+4(%esi)
|
||||
fsubs C(modelorg)+4
|
||||
flds mv_position+8(%esi)
|
||||
fsubs C(modelorg)+8
|
||||
fxch %st(2) // local[0] | local[1] | local[2]
|
||||
|
||||
// TransformVector (local, transformed);
|
||||
//
|
||||
// if (transformed[2] < NEAR_CLIP)
|
||||
// transformed[2] = NEAR_CLIP;
|
||||
//
|
||||
// lzi0 = 1.0 / transformed[2];
|
||||
fld %st(0) // local[0] | local[0] | local[1] | local[2]
|
||||
fmuls C(vpn)+0 // zm0 | local[0] | local[1] | local[2]
|
||||
fld %st(1) // local[0] | zm0 | local[0] | local[1] |
|
||||
// local[2]
|
||||
fmuls C(vright)+0 // xm0 | zm0 | local[0] | local[1] | local[2]
|
||||
fxch %st(2) // local[0] | zm0 | xm0 | local[1] | local[2]
|
||||
fmuls C(vup)+0 // ym0 | zm0 | xm0 | local[1] | local[2]
|
||||
fld %st(3) // local[1] | ym0 | zm0 | xm0 | local[1] |
|
||||
// local[2]
|
||||
fmuls C(vpn)+4 // zm1 | ym0 | zm0 | xm0 | local[1] |
|
||||
// local[2]
|
||||
fld %st(4) // local[1] | zm1 | ym0 | zm0 | xm0 |
|
||||
// local[1] | local[2]
|
||||
fmuls C(vright)+4 // xm1 | zm1 | ym0 | zm0 | xm0 |
|
||||
// local[1] | local[2]
|
||||
fxch %st(5) // local[1] | zm1 | ym0 | zm0 | xm0 |
|
||||
// xm1 | local[2]
|
||||
fmuls C(vup)+4 // ym1 | zm1 | ym0 | zm0 | xm0 |
|
||||
// xm1 | local[2]
|
||||
fxch %st(1) // zm1 | ym1 | ym0 | zm0 | xm0 |
|
||||
// xm1 | local[2]
|
||||
faddp %st(0),%st(3) // ym1 | ym0 | zm2 | xm0 | xm1 | local[2]
|
||||
fxch %st(3) // xm0 | ym0 | zm2 | ym1 | xm1 | local[2]
|
||||
faddp %st(0),%st(4) // ym0 | zm2 | ym1 | xm2 | local[2]
|
||||
faddp %st(0),%st(2) // zm2 | ym2 | xm2 | local[2]
|
||||
fld %st(3) // local[2] | zm2 | ym2 | xm2 | local[2]
|
||||
fmuls C(vpn)+8 // zm3 | zm2 | ym2 | xm2 | local[2]
|
||||
fld %st(4) // local[2] | zm3 | zm2 | ym2 | xm2 | local[2]
|
||||
fmuls C(vright)+8 // xm3 | zm3 | zm2 | ym2 | xm2 | local[2]
|
||||
fxch %st(5) // local[2] | zm3 | zm2 | ym2 | xm2 | xm3
|
||||
fmuls C(vup)+8 // ym3 | zm3 | zm2 | ym2 | xm2 | xm3
|
||||
fxch %st(1) // zm3 | ym3 | zm2 | ym2 | xm2 | xm3
|
||||
faddp %st(0),%st(2) // ym3 | zm4 | ym2 | xm2 | xm3
|
||||
fxch %st(4) // xm3 | zm4 | ym2 | xm2 | ym3
|
||||
faddp %st(0),%st(3) // zm4 | ym2 | xm4 | ym3
|
||||
fxch %st(1) // ym2 | zm4 | xm4 | ym3
|
||||
faddp %st(0),%st(3) // zm4 | xm4 | ym4
|
||||
|
||||
fcoms Lfp_near_clip
|
||||
fnstsw %ax
|
||||
testb $1,%ah
|
||||
jz LNoClip
|
||||
fstp %st(0)
|
||||
flds Lfp_near_clip
|
||||
|
||||
LNoClip:
|
||||
|
||||
fdivrs float_1 // lzi0 | x | y
|
||||
fxch %st(1) // x | lzi0 | y
|
||||
|
||||
// // FIXME: build x/yscale into transform?
|
||||
// scale = xscale * lzi0;
|
||||
// u0 = (xcenter + scale*transformed[0]);
|
||||
flds C(xscale) // xscale | x | lzi0 | y
|
||||
fmul %st(2),%st(0) // scale | x | lzi0 | y
|
||||
fmulp %st(0),%st(1) // scale*x | lzi0 | y
|
||||
fadds C(xcenter) // u0 | lzi0 | y
|
||||
|
||||
// if (u0 < r_refdef.fvrectx_adj)
|
||||
// u0 = r_refdef.fvrectx_adj;
|
||||
// if (u0 > r_refdef.fvrectright_adj)
|
||||
// u0 = r_refdef.fvrectright_adj;
|
||||
// FIXME: use integer compares of floats?
|
||||
fcoms C(r_refdef)+rd_fvrectx_adj
|
||||
fnstsw %ax
|
||||
testb $1,%ah
|
||||
jz LClampP0
|
||||
fstp %st(0)
|
||||
flds C(r_refdef)+rd_fvrectx_adj
|
||||
LClampP0:
|
||||
fcoms C(r_refdef)+rd_fvrectright_adj
|
||||
fnstsw %ax
|
||||
testb $0x45,%ah
|
||||
jnz LClampP1
|
||||
fstp %st(0)
|
||||
flds C(r_refdef)+rd_fvrectright_adj
|
||||
LClampP1:
|
||||
|
||||
fld %st(1) // lzi0 | u0 | lzi0 | y
|
||||
|
||||
// scale = yscale * lzi0;
|
||||
// v0 = (ycenter - scale*transformed[1]);
|
||||
fmuls C(yscale) // scale | u0 | lzi0 | y
|
||||
fmulp %st(0),%st(3) // u0 | lzi0 | scale*y
|
||||
fxch %st(2) // scale*y | lzi0 | u0
|
||||
fsubrs C(ycenter) // v0 | lzi0 | u0
|
||||
|
||||
// if (v0 < r_refdef.fvrecty_adj)
|
||||
// v0 = r_refdef.fvrecty_adj;
|
||||
// if (v0 > r_refdef.fvrectbottom_adj)
|
||||
// v0 = r_refdef.fvrectbottom_adj;
|
||||
// FIXME: use integer compares of floats?
|
||||
fcoms C(r_refdef)+rd_fvrecty_adj
|
||||
fnstsw %ax
|
||||
testb $1,%ah
|
||||
jz LClampP2
|
||||
fstp %st(0)
|
||||
flds C(r_refdef)+rd_fvrecty_adj
|
||||
LClampP2:
|
||||
fcoms C(r_refdef)+rd_fvrectbottom_adj
|
||||
fnstsw %ax
|
||||
testb $0x45,%ah
|
||||
jnz LClampP3
|
||||
fstp %st(0)
|
||||
flds C(r_refdef)+rd_fvrectbottom_adj
|
||||
LClampP3:
|
||||
ret
|
||||
|
||||
#endif // id386
|
||||
|
750
source/r_edgea.S
Normal file
750
source/r_edgea.S
Normal file
|
@ -0,0 +1,750 @@
|
|||
/*
|
||||
Copyright (C) 1996-1997 Id Software, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
*/
|
||||
//
|
||||
// r_edgea.s
|
||||
// x86 assembly-language edge-processing code.
|
||||
//
|
||||
|
||||
#include "asm_i386.h"
|
||||
#include "quakeasm.h"
|
||||
#include "asm_draw.h"
|
||||
|
||||
#if id386
|
||||
|
||||
.data
|
||||
Ltemp: .long 0
|
||||
float_1_div_0100000h: .long 0x35800000 // 1.0/(float)0x100000
|
||||
float_point_999: .single 0.999
|
||||
float_1_point_001: .single 1.001
|
||||
|
||||
.text
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
#define edgestoadd 4+8 // note odd stack offsets because of interleaving
|
||||
#define edgelist 8+12 // with pushes
|
||||
|
||||
.globl C(R_EdgeCodeStart)
|
||||
C(R_EdgeCodeStart):
|
||||
|
||||
.globl C(R_InsertNewEdges)
|
||||
C(R_InsertNewEdges):
|
||||
pushl %edi
|
||||
pushl %esi // preserve register variables
|
||||
movl edgestoadd(%esp),%edx
|
||||
pushl %ebx
|
||||
movl edgelist(%esp),%ecx
|
||||
|
||||
LDoNextEdge:
|
||||
movl et_u(%edx),%eax
|
||||
movl %edx,%edi
|
||||
|
||||
LContinueSearch:
|
||||
movl et_u(%ecx),%ebx
|
||||
movl et_next(%ecx),%esi
|
||||
cmpl %ebx,%eax
|
||||
jle LAddedge
|
||||
movl et_u(%esi),%ebx
|
||||
movl et_next(%esi),%ecx
|
||||
cmpl %ebx,%eax
|
||||
jle LAddedge2
|
||||
movl et_u(%ecx),%ebx
|
||||
movl et_next(%ecx),%esi
|
||||
cmpl %ebx,%eax
|
||||
jle LAddedge
|
||||
movl et_u(%esi),%ebx
|
||||
movl et_next(%esi),%ecx
|
||||
cmpl %ebx,%eax
|
||||
jg LContinueSearch
|
||||
|
||||
LAddedge2:
|
||||
movl et_next(%edx),%edx
|
||||
movl et_prev(%esi),%ebx
|
||||
movl %esi,et_next(%edi)
|
||||
movl %ebx,et_prev(%edi)
|
||||
movl %edi,et_next(%ebx)
|
||||
movl %edi,et_prev(%esi)
|
||||
movl %esi,%ecx
|
||||
|
||||
cmpl $0,%edx
|
||||
jnz LDoNextEdge
|
||||
jmp LDone
|
||||
|
||||
.align 4
|
||||
LAddedge:
|
||||
movl et_next(%edx),%edx
|
||||
movl et_prev(%ecx),%ebx
|
||||
movl %ecx,et_next(%edi)
|
||||
movl %ebx,et_prev(%edi)
|
||||
movl %edi,et_next(%ebx)
|
||||
movl %edi,et_prev(%ecx)
|
||||
|
||||
cmpl $0,%edx
|
||||
jnz LDoNextEdge
|
||||
|
||||
LDone:
|
||||
popl %ebx // restore register variables
|
||||
popl %esi
|
||||
popl %edi
|
||||
|
||||
ret
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
#define predge 4+4
|
||||
|
||||
.globl C(R_RemoveEdges)
|
||||
C(R_RemoveEdges):
|
||||
pushl %ebx
|
||||
movl predge(%esp),%eax
|
||||
|
||||
Lre_loop:
|
||||
movl et_next(%eax),%ecx
|
||||
movl et_nextremove(%eax),%ebx
|
||||
movl et_prev(%eax),%edx
|
||||
testl %ebx,%ebx
|
||||
movl %edx,et_prev(%ecx)
|
||||
jz Lre_done
|
||||
movl %ecx,et_next(%edx)
|
||||
|
||||
movl et_next(%ebx),%ecx
|
||||
movl et_prev(%ebx),%edx
|
||||
movl et_nextremove(%ebx),%eax
|
||||
movl %edx,et_prev(%ecx)
|
||||
testl %eax,%eax
|
||||
movl %ecx,et_next(%edx)
|
||||
jnz Lre_loop
|
||||
|
||||
popl %ebx
|
||||
ret
|
||||
|
||||
Lre_done:
|
||||
movl %ecx,et_next(%edx)
|
||||
popl %ebx
|
||||
|
||||
ret
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
#define pedgelist 4+4 // note odd stack offset because of interleaving
|
||||
// with pushes
|
||||
|
||||
.globl C(R_StepActiveU)
|
||||
C(R_StepActiveU):
|
||||
pushl %edi
|
||||
movl pedgelist(%esp),%edx
|
||||
pushl %esi // preserve register variables
|
||||
pushl %ebx
|
||||
|
||||
movl et_prev(%edx),%esi
|
||||
|
||||
LNewEdge:
|
||||
movl et_u(%esi),%edi
|
||||
|
||||
LNextEdge:
|
||||
movl et_u(%edx),%eax
|
||||
movl et_u_step(%edx),%ebx
|
||||
addl %ebx,%eax
|
||||
movl et_next(%edx),%esi
|
||||
movl %eax,et_u(%edx)
|
||||
cmpl %edi,%eax
|
||||
jl LPushBack
|
||||
|
||||
movl et_u(%esi),%edi
|
||||
movl et_u_step(%esi),%ebx
|
||||
addl %ebx,%edi
|
||||
movl et_next(%esi),%edx
|
||||
movl %edi,et_u(%esi)
|
||||
cmpl %eax,%edi
|
||||
jl LPushBack2
|
||||
|
||||
movl et_u(%edx),%eax
|
||||
movl et_u_step(%edx),%ebx
|
||||
addl %ebx,%eax
|
||||
movl et_next(%edx),%esi
|
||||
movl %eax,et_u(%edx)
|
||||
cmpl %edi,%eax
|
||||
jl LPushBack
|
||||
|
||||
movl et_u(%esi),%edi
|
||||
movl et_u_step(%esi),%ebx
|
||||
addl %ebx,%edi
|
||||
movl et_next(%esi),%edx
|
||||
movl %edi,et_u(%esi)
|
||||
cmpl %eax,%edi
|
||||
jnl LNextEdge
|
||||
|
||||
LPushBack2:
|
||||
movl %edx,%ebx
|
||||
movl %edi,%eax
|
||||
movl %esi,%edx
|
||||
movl %ebx,%esi
|
||||
|
||||
LPushBack:
|
||||
// push it back to keep it sorted
|
||||
movl et_prev(%edx),%ecx
|
||||
movl et_next(%edx),%ebx
|
||||
|
||||
// done if the -1 in edge_aftertail triggered this
|
||||
cmpl $(C(edge_aftertail)),%edx
|
||||
jz LUDone
|
||||
|
||||
// pull the edge out of the edge list
|
||||
movl et_prev(%ecx),%edi
|
||||
movl %ecx,et_prev(%esi)
|
||||
movl %ebx,et_next(%ecx)
|
||||
|
||||
// find out where the edge goes in the edge list
|
||||
LPushBackLoop:
|
||||
movl et_prev(%edi),%ecx
|
||||
movl et_u(%edi),%ebx
|
||||
cmpl %ebx,%eax
|
||||
jnl LPushBackFound
|
||||
|
||||
movl et_prev(%ecx),%edi
|
||||
movl et_u(%ecx),%ebx
|
||||
cmpl %ebx,%eax
|
||||
jl LPushBackLoop
|
||||
|
||||
movl %ecx,%edi
|
||||
|
||||
// put the edge back into the edge list
|
||||
LPushBackFound:
|
||||
movl et_next(%edi),%ebx
|
||||
movl %edi,et_prev(%edx)
|
||||
movl %ebx,et_next(%edx)
|
||||
movl %edx,et_next(%edi)
|
||||
movl %edx,et_prev(%ebx)
|
||||
|
||||
movl %esi,%edx
|
||||
movl et_prev(%esi),%esi
|
||||
|
||||
cmpl $(C(edge_tail)),%edx
|
||||
jnz LNewEdge
|
||||
|
||||
LUDone:
|
||||
popl %ebx // restore register variables
|
||||
popl %esi
|
||||
popl %edi
|
||||
|
||||
ret
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
#define surf 4 // note this is loaded before any pushes
|
||||
|
||||
.align 4
|
||||
TrailingEdge:
|
||||
movl st_spanstate(%esi),%eax // check for edge inversion
|
||||
decl %eax
|
||||
jnz LInverted
|
||||
|
||||
movl %eax,st_spanstate(%esi)
|
||||
movl st_insubmodel(%esi),%ecx
|
||||
movl 0x12345678,%edx // surfaces[1].st_next
|
||||
LPatch0:
|
||||
movl C(r_bmodelactive),%eax
|
||||
subl %ecx,%eax
|
||||
cmpl %esi,%edx
|
||||
movl %eax,C(r_bmodelactive)
|
||||
jnz LNoEmit // surface isn't on top, just remove
|
||||
|
||||
// emit a span (current top going away)
|
||||
movl et_u(%ebx),%eax
|
||||
shrl $20,%eax // iu = integral pixel u
|
||||
movl st_last_u(%esi),%edx
|
||||
movl st_next(%esi),%ecx
|
||||
cmpl %edx,%eax
|
||||
jle LNoEmit2 // iu <= surf->last_u, so nothing to emit
|
||||
|
||||
movl %eax,st_last_u(%ecx) // surf->next->last_u = iu;
|
||||
subl %edx,%eax
|
||||
movl %edx,espan_t_u(%ebp) // span->u = surf->last_u;
|
||||
|
||||
movl %eax,espan_t_count(%ebp) // span->count = iu - span->u;
|
||||
movl C(current_iv),%eax
|
||||
movl %eax,espan_t_v(%ebp) // span->v = current_iv;
|
||||
movl st_spans(%esi),%eax
|
||||
movl %eax,espan_t_pnext(%ebp) // span->pnext = surf->spans;
|
||||
movl %ebp,st_spans(%esi) // surf->spans = span;
|
||||
addl $(espan_t_size),%ebp
|
||||
|
||||
movl st_next(%esi),%edx // remove the surface from the surface
|
||||
movl st_prev(%esi),%esi // stack
|
||||
|
||||
movl %edx,st_next(%esi)
|
||||
movl %esi,st_prev(%edx)
|
||||
ret
|
||||
|
||||
LNoEmit2:
|
||||
movl %eax,st_last_u(%ecx) // surf->next->last_u = iu;
|
||||
movl st_next(%esi),%edx // remove the surface from the surface
|
||||
movl st_prev(%esi),%esi // stack
|
||||
|
||||
movl %edx,st_next(%esi)
|
||||
movl %esi,st_prev(%edx)
|
||||
ret
|
||||
|
||||
LNoEmit:
|
||||
movl st_next(%esi),%edx // remove the surface from the surface
|
||||
movl st_prev(%esi),%esi // stack
|
||||
|
||||
movl %edx,st_next(%esi)
|
||||
movl %esi,st_prev(%edx)
|
||||
ret
|
||||
|
||||
LInverted:
|
||||
movl %eax,st_spanstate(%esi)
|
||||
ret
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// trailing edge only
|
||||
Lgs_trailing:
|
||||
pushl $Lgs_nextedge
|
||||
jmp TrailingEdge
|
||||
|
||||
|
||||
.globl C(R_GenerateSpans)
|
||||
C(R_GenerateSpans):
|
||||
pushl %ebp // preserve caller's stack frame
|
||||
pushl %edi
|
||||
pushl %esi // preserve register variables
|
||||
pushl %ebx
|
||||
|
||||
// clear active surfaces to just the background surface
|
||||
movl C(surfaces),%eax
|
||||
movl C(edge_head_u_shift20),%edx
|
||||
addl $(st_size),%eax
|
||||
// %ebp = span_p throughout
|
||||
movl C(span_p),%ebp
|
||||
|
||||
movl $0,C(r_bmodelactive)
|
||||
|
||||
movl %eax,st_next(%eax)
|
||||
movl %eax,st_prev(%eax)
|
||||
movl %edx,st_last_u(%eax)
|
||||
movl C(edge_head)+et_next,%ebx // edge=edge_head.next
|
||||
|
||||
// generate spans
|
||||
cmpl $(C(edge_tail)),%ebx // done if empty list
|
||||
jz Lgs_lastspan
|
||||
|
||||
Lgs_edgeloop:
|
||||
|
||||
movl et_surfs(%ebx),%edi
|
||||
movl C(surfaces),%eax
|
||||
movl %edi,%esi
|
||||
andl $0xFFFF0000,%edi
|
||||
andl $0xFFFF,%esi
|
||||
jz Lgs_leading // not a trailing edge
|
||||
|
||||
// it has a left surface, so a surface is going away for this span
|
||||
shll $(SURF_T_SHIFT),%esi
|
||||
addl %eax,%esi
|
||||
testl %edi,%edi
|
||||
jz Lgs_trailing
|
||||
|
||||
// both leading and trailing
|
||||
call TrailingEdge
|
||||
movl C(surfaces),%eax
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// handle a leading edge
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
Lgs_leading:
|
||||
shrl $16-SURF_T_SHIFT,%edi
|
||||
movl C(surfaces),%eax
|
||||
addl %eax,%edi
|
||||
movl 0x12345678,%esi // surf2 = surfaces[1].next;
|
||||
LPatch2:
|
||||
movl st_spanstate(%edi),%edx
|
||||
movl st_insubmodel(%edi),%eax
|
||||
testl %eax,%eax
|
||||
jnz Lbmodel_leading
|
||||
|
||||
// handle a leading non-bmodel edge
|
||||
|
||||
// don't start a span if this is an inverted span, with the end edge preceding
|
||||
// the start edge (that is, we've already seen the end edge)
|
||||
testl %edx,%edx
|
||||
jnz Lxl_done
|
||||
|
||||
|
||||
// if (surf->key < surf2->key)
|
||||
// goto newtop;
|
||||
incl %edx
|
||||
movl st_key(%edi),%eax
|
||||
movl %edx,st_spanstate(%edi)
|
||||
movl st_key(%esi),%ecx
|
||||
cmpl %ecx,%eax
|
||||
jl Lnewtop
|
||||
|
||||
// main sorting loop to search through surface stack until insertion point
|
||||
// found. Always terminates because background surface is sentinel
|
||||
// do
|
||||
// {
|
||||
// surf2 = surf2->next;
|
||||
// } while (surf->key >= surf2->key);
|
||||
Lsortloopnb:
|
||||
movl st_next(%esi),%esi
|
||||
movl st_key(%esi),%ecx
|
||||
cmpl %ecx,%eax
|
||||
jge Lsortloopnb
|
||||
|
||||
jmp LInsertAndExit
|
||||
|
||||
|
||||
// handle a leading bmodel edge
|
||||
.align 4
|
||||
Lbmodel_leading:
|
||||
|
||||
// don't start a span if this is an inverted span, with the end edge preceding
|
||||
// the start edge (that is, we've already seen the end edge)
|
||||
testl %edx,%edx
|
||||
jnz Lxl_done
|
||||
|
||||
movl C(r_bmodelactive),%ecx
|
||||
incl %edx
|
||||
incl %ecx
|
||||
movl %edx,st_spanstate(%edi)
|
||||
movl %ecx,C(r_bmodelactive)
|
||||
|
||||
// if (surf->key < surf2->key)
|
||||
// goto newtop;
|
||||
movl st_key(%edi),%eax
|
||||
movl st_key(%esi),%ecx
|
||||
cmpl %ecx,%eax
|
||||
jl Lnewtop
|
||||
|
||||
// if ((surf->key == surf2->key) && surf->insubmodel)
|
||||
// {
|
||||
jz Lzcheck_for_newtop
|
||||
|
||||
// main sorting loop to search through surface stack until insertion point
|
||||
// found. Always terminates because background surface is sentinel
|
||||
// do
|
||||
// {
|
||||
// surf2 = surf2->next;
|
||||
// } while (surf->key > surf2->key);
|
||||
Lsortloop:
|
||||
movl st_next(%esi),%esi
|
||||
movl st_key(%esi),%ecx
|
||||
cmpl %ecx,%eax
|
||||
jg Lsortloop
|
||||
|
||||
jne LInsertAndExit
|
||||
|
||||
// Do 1/z sorting to see if we've arrived in the right position
|
||||
movl et_u(%ebx),%eax
|
||||
subl $0xFFFFF,%eax
|
||||
movl %eax,Ltemp
|
||||
fildl Ltemp
|
||||
|
||||
fmuls float_1_div_0100000h // fu = (float)(edge->u - 0xFFFFF) *
|
||||
// (1.0 / 0x100000);
|
||||
|
||||
fld %st(0) // fu | fu
|
||||
fmuls st_d_zistepu(%edi) // fu*surf->d_zistepu | fu
|
||||
flds C(fv) // fv | fu*surf->d_zistepu | fu
|
||||
fmuls st_d_zistepv(%edi) // fv*surf->d_zistepv | fu*surf->d_zistepu | fu
|
||||
fxch %st(1) // fu*surf->d_zistepu | fv*surf->d_zistepv | fu
|
||||
fadds st_d_ziorigin(%edi) // fu*surf->d_zistepu + surf->d_ziorigin |
|
||||
// fv*surf->d_zistepv | fu
|
||||
|
||||
flds st_d_zistepu(%esi) // surf2->d_zistepu |
|
||||
// fu*surf->d_zistepu + surf->d_ziorigin |
|
||||
// fv*surf->d_zistepv | fu
|
||||
fmul %st(3),%st(0) // fu*surf2->d_zistepu |
|
||||
// fu*surf->d_zistepu + surf->d_ziorigin |
|
||||
// fv*surf->d_zistepv | fu
|
||||
fxch %st(1) // fu*surf->d_zistepu + surf->d_ziorigin |
|
||||
// fu*surf2->d_zistepu |
|
||||
// fv*surf->d_zistepv | fu
|
||||
faddp %st(0),%st(2) // fu*surf2->d_zistepu | newzi | fu
|
||||
|
||||
flds C(fv) // fv | fu*surf2->d_zistepu | newzi | fu
|
||||
fmuls st_d_zistepv(%esi) // fv*surf2->d_zistepv |
|
||||
// fu*surf2->d_zistepu | newzi | fu
|
||||
fld %st(2) // newzi | fv*surf2->d_zistepv |
|
||||
// fu*surf2->d_zistepu | newzi | fu
|
||||
fmuls float_point_999 // newzibottom | fv*surf2->d_zistepv |
|
||||
// fu*surf2->d_zistepu | newzi | fu
|
||||
|
||||
fxch %st(2) // fu*surf2->d_zistepu | fv*surf2->d_zistepv |
|
||||
// newzibottom | newzi | fu
|
||||
fadds st_d_ziorigin(%esi) // fu*surf2->d_zistepu + surf2->d_ziorigin |
|
||||
// fv*surf2->d_zistepv | newzibottom | newzi |
|
||||
// fu
|
||||
faddp %st(0),%st(1) // testzi | newzibottom | newzi | fu
|
||||
fxch %st(1) // newzibottom | testzi | newzi | fu
|
||||
|
||||
// if (newzibottom >= testzi)
|
||||
// goto Lgotposition;
|
||||
|
||||
fcomp %st(1) // testzi | newzi | fu
|
||||
|
||||
fxch %st(1) // newzi | testzi | fu
|
||||
fmuls float_1_point_001 // newzitop | testzi | fu
|
||||
fxch %st(1) // testzi | newzitop | fu
|
||||
|
||||
fnstsw %ax
|
||||
testb $0x01,%ah
|
||||
jz Lgotposition_fpop3
|
||||
|
||||
// if (newzitop >= testzi)
|
||||
// {
|
||||
|
||||
fcomp %st(1) // newzitop | fu
|
||||
fnstsw %ax
|
||||
testb $0x45,%ah
|
||||
jz Lsortloop_fpop2
|
||||
|
||||
// if (surf->d_zistepu >= surf2->d_zistepu)
|
||||
// goto newtop;
|
||||
|
||||
flds st_d_zistepu(%edi) // surf->d_zistepu | newzitop| fu
|
||||
fcomps st_d_zistepu(%esi) // newzitop | fu
|
||||
fnstsw %ax
|
||||
testb $0x01,%ah
|
||||
jz Lgotposition_fpop2
|
||||
|
||||
fstp %st(0) // clear the FPstack
|
||||
fstp %st(0)
|
||||
movl st_key(%edi),%eax
|
||||
jmp Lsortloop
|
||||
|
||||
|
||||
Lgotposition_fpop3:
|
||||
fstp %st(0)
|
||||
Lgotposition_fpop2:
|
||||
fstp %st(0)
|
||||
fstp %st(0)
|
||||
jmp LInsertAndExit
|
||||
|
||||
|
||||
// emit a span (obscures current top)
|
||||
|
||||
Lnewtop_fpop3:
|
||||
fstp %st(0)
|
||||
Lnewtop_fpop2:
|
||||
fstp %st(0)
|
||||
fstp %st(0)
|
||||
movl st_key(%edi),%eax // reload the sorting key
|
||||
|
||||
Lnewtop:
|
||||
movl et_u(%ebx),%eax
|
||||
movl st_last_u(%esi),%edx
|
||||
shrl $20,%eax // iu = integral pixel u
|
||||
movl %eax,st_last_u(%edi) // surf->last_u = iu;
|
||||
cmpl %edx,%eax
|
||||
jle LInsertAndExit // iu <= surf->last_u, so nothing to emit
|
||||
|
||||
subl %edx,%eax
|
||||
movl %edx,espan_t_u(%ebp) // span->u = surf->last_u;
|
||||
|
||||
movl %eax,espan_t_count(%ebp) // span->count = iu - span->u;
|
||||
movl C(current_iv),%eax
|
||||
movl %eax,espan_t_v(%ebp) // span->v = current_iv;
|
||||
movl st_spans(%esi),%eax
|
||||
movl %eax,espan_t_pnext(%ebp) // span->pnext = surf->spans;
|
||||
movl %ebp,st_spans(%esi) // surf->spans = span;
|
||||
addl $(espan_t_size),%ebp
|
||||
|
||||
LInsertAndExit:
|
||||
// insert before surf2
|
||||
movl %esi,st_next(%edi) // surf->next = surf2;
|
||||
movl st_prev(%esi),%eax
|
||||
movl %eax,st_prev(%edi) // surf->prev = surf2->prev;
|
||||
movl %edi,st_prev(%esi) // surf2->prev = surf;
|
||||
movl %edi,st_next(%eax) // surf2->prev->next = surf;
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// leading edge done
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// see if there are any more edges
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
Lgs_nextedge:
|
||||
movl et_next(%ebx),%ebx
|
||||
cmpl $(C(edge_tail)),%ebx
|
||||
jnz Lgs_edgeloop
|
||||
|
||||
// clean up at the right edge
|
||||
Lgs_lastspan:
|
||||
|
||||
// now that we've reached the right edge of the screen, we're done with any
|
||||
// unfinished surfaces, so emit a span for whatever's on top
|
||||
movl 0x12345678,%esi // surfaces[1].st_next
|
||||
LPatch3:
|
||||
movl C(edge_tail_u_shift20),%eax
|
||||
xorl %ecx,%ecx
|
||||
movl st_last_u(%esi),%edx
|
||||
subl %edx,%eax
|
||||
jle Lgs_resetspanstate
|
||||
|
||||
movl %edx,espan_t_u(%ebp)
|
||||
movl %eax,espan_t_count(%ebp)
|
||||
movl C(current_iv),%eax
|
||||
movl %eax,espan_t_v(%ebp)
|
||||
movl st_spans(%esi),%eax
|
||||
movl %eax,espan_t_pnext(%ebp)
|
||||
movl %ebp,st_spans(%esi)
|
||||
addl $(espan_t_size),%ebp
|
||||
|
||||
// reset spanstate for all surfaces in the surface stack
|
||||
Lgs_resetspanstate:
|
||||
movl %ecx,st_spanstate(%esi)
|
||||
movl st_next(%esi),%esi
|
||||
cmpl $0x12345678,%esi // &surfaces[1]
|
||||
LPatch4:
|
||||
jnz Lgs_resetspanstate
|
||||
|
||||
// store the final span_p
|
||||
movl %ebp,C(span_p)
|
||||
|
||||
popl %ebx // restore register variables
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp // restore the caller's stack frame
|
||||
ret
|
||||
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// 1/z sorting for bmodels in the same leaf
|
||||
// ---------------------------------------------------------------
|
||||
.align 4
|
||||
Lxl_done:
|
||||
incl %edx
|
||||
movl %edx,st_spanstate(%edi)
|
||||
|
||||
jmp Lgs_nextedge
|
||||
|
||||
|
||||
.align 4
|
||||
Lzcheck_for_newtop:
|
||||
movl et_u(%ebx),%eax
|
||||
subl $0xFFFFF,%eax
|
||||
movl %eax,Ltemp
|
||||
fildl Ltemp
|
||||
|
||||
fmuls float_1_div_0100000h // fu = (float)(edge->u - 0xFFFFF) *
|
||||
// (1.0 / 0x100000);
|
||||
|
||||
fld %st(0) // fu | fu
|
||||
fmuls st_d_zistepu(%edi) // fu*surf->d_zistepu | fu
|
||||
flds C(fv) // fv | fu*surf->d_zistepu | fu
|
||||
fmuls st_d_zistepv(%edi) // fv*surf->d_zistepv | fu*surf->d_zistepu | fu
|
||||
fxch %st(1) // fu*surf->d_zistepu | fv*surf->d_zistepv | fu
|
||||
fadds st_d_ziorigin(%edi) // fu*surf->d_zistepu + surf->d_ziorigin |
|
||||
// fv*surf->d_zistepv | fu
|
||||
|
||||
flds st_d_zistepu(%esi) // surf2->d_zistepu |
|
||||
// fu*surf->d_zistepu + surf->d_ziorigin |
|
||||
// fv*surf->d_zistepv | fu
|
||||
fmul %st(3),%st(0) // fu*surf2->d_zistepu |
|
||||
// fu*surf->d_zistepu + surf->d_ziorigin |
|
||||
// fv*surf->d_zistepv | fu
|
||||
fxch %st(1) // fu*surf->d_zistepu + surf->d_ziorigin |
|
||||
// fu*surf2->d_zistepu |
|
||||
// fv*surf->d_zistepv | fu
|
||||
faddp %st(0),%st(2) // fu*surf2->d_zistepu | newzi | fu
|
||||
|
||||
flds C(fv) // fv | fu*surf2->d_zistepu | newzi | fu
|
||||
fmuls st_d_zistepv(%esi) // fv*surf2->d_zistepv |
|
||||
// fu*surf2->d_zistepu | newzi | fu
|
||||
fld %st(2) // newzi | fv*surf2->d_zistepv |
|
||||
// fu*surf2->d_zistepu | newzi | fu
|
||||
fmuls float_point_999 // newzibottom | fv*surf2->d_zistepv |
|
||||
// fu*surf2->d_zistepu | newzi | fu
|
||||
|
||||
fxch %st(2) // fu*surf2->d_zistepu | fv*surf2->d_zistepv |
|
||||
// newzibottom | newzi | fu
|
||||
fadds st_d_ziorigin(%esi) // fu*surf2->d_zistepu + surf2->d_ziorigin |
|
||||
// fv*surf2->d_zistepv | newzibottom | newzi |
|
||||
// fu
|
||||
faddp %st(0),%st(1) // testzi | newzibottom | newzi | fu
|
||||
fxch %st(1) // newzibottom | testzi | newzi | fu
|
||||
|
||||
// if (newzibottom >= testzi)
|
||||
// goto newtop;
|
||||
|
||||
fcomp %st(1) // testzi | newzi | fu
|
||||
|
||||
fxch %st(1) // newzi | testzi | fu
|
||||
fmuls float_1_point_001 // newzitop | testzi | fu
|
||||
fxch %st(1) // testzi | newzitop | fu
|
||||
|
||||
fnstsw %ax
|
||||
testb $0x01,%ah
|
||||
jz Lnewtop_fpop3
|
||||
|
||||
// if (newzitop >= testzi)
|
||||
// {
|
||||
|
||||
fcomp %st(1) // newzitop | fu
|
||||
fnstsw %ax
|
||||
testb $0x45,%ah
|
||||
jz Lsortloop_fpop2
|
||||
|
||||
// if (surf->d_zistepu >= surf2->d_zistepu)
|
||||
// goto newtop;
|
||||
|
||||
flds st_d_zistepu(%edi) // surf->d_zistepu | newzitop | fu
|
||||
fcomps st_d_zistepu(%esi) // newzitop | fu
|
||||
fnstsw %ax
|
||||
testb $0x01,%ah
|
||||
jz Lnewtop_fpop2
|
||||
|
||||
Lsortloop_fpop2:
|
||||
fstp %st(0) // clear the FP stack
|
||||
fstp %st(0)
|
||||
movl st_key(%edi),%eax
|
||||
jmp Lsortloop
|
||||
|
||||
|
||||
.globl C(R_EdgeCodeEnd)
|
||||
C(R_EdgeCodeEnd):
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Surface array address code patching routine
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
.align 4
|
||||
.globl C(R_SurfacePatch)
|
||||
C(R_SurfacePatch):
|
||||
|
||||
movl C(surfaces),%eax
|
||||
addl $(st_size),%eax
|
||||
movl %eax,LPatch4-4
|
||||
|
||||
addl $(st_next),%eax
|
||||
movl %eax,LPatch0-4
|
||||
movl %eax,LPatch2-4
|
||||
movl %eax,LPatch3-4
|
||||
|
||||
ret
|
||||
|
||||
#endif // id386
|
||||
|
64
source/r_varsa.S
Normal file
64
source/r_varsa.S
Normal file
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
Copyright (C) 1996-1997 Id Software, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
*/
|
||||
//
|
||||
// r_varsa.s
|
||||
//
|
||||
|
||||
#include "asm_i386.h"
|
||||
#include "quakeasm.h"
|
||||
#include "asm_draw.h"
|
||||
#include "d_ifacea.h"
|
||||
|
||||
#if id386
|
||||
|
||||
.data
|
||||
|
||||
//-------------------------------------------------------
|
||||
// ASM-only variables
|
||||
//-------------------------------------------------------
|
||||
.globl float_1, float_particle_z_clip, float_point5
|
||||
.globl float_minus_1, float_0
|
||||
float_0: .single 0.0
|
||||
float_1: .single 1.0
|
||||
float_minus_1: .single -1.0
|
||||
float_particle_z_clip: .single PARTICLE_Z_CLIP
|
||||
float_point5: .single 0.5
|
||||
|
||||
.globl fp_16, fp_64k, fp_1m, fp_64kx64k
|
||||
.globl fp_1m_minus_1
|
||||
.globl fp_8
|
||||
fp_1m: .single 1048576.0
|
||||
fp_1m_minus_1: .single 1048575.0
|
||||
fp_64k: .single 65536.0
|
||||
fp_8: .single 8.0
|
||||
fp_16: .single 16.0
|
||||
fp_64kx64k: .long 0x4f000000 // (float)0x8000*0x10000
|
||||
|
||||
|
||||
.globl FloatZero, Float2ToThe31nd, FloatMinus2ToThe31nd
|
||||
FloatZero: .long 0
|
||||
Float2ToThe31nd: .long 0x4f000000
|
||||
FloatMinus2ToThe31nd: .long 0xcf000000
|
||||
|
||||
.globl C(r_bmodelactive)
|
||||
C(r_bmodelactive): .long 0
|
||||
|
||||
#endif // id386
|
||||
|
218
source/snd_mixa.S
Normal file
218
source/snd_mixa.S
Normal file
|
@ -0,0 +1,218 @@
|
|||
/*
|
||||
Copyright (C) 1996-1997 Id Software, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
*/
|
||||
//
|
||||
// snd_mixa.s
|
||||
// x86 assembly-language sound code
|
||||
//
|
||||
|
||||
#include "asm_i386.h"
|
||||
#include "quakeasm.h"
|
||||
|
||||
#if id386
|
||||
|
||||
.text
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// 8-bit sound-mixing code
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
#define ch 4+16
|
||||
#define sc 8+16
|
||||
#define count 12+16
|
||||
|
||||
.globl C(SND_PaintChannelFrom8)
|
||||
C(SND_PaintChannelFrom8):
|
||||
pushl %esi // preserve register variables
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
pushl %ebp
|
||||
|
||||
// int data;
|
||||
// short *lscale, *rscale;
|
||||
// unsigned char *sfx;
|
||||
// int i;
|
||||
|
||||
movl ch(%esp),%ebx
|
||||
movl sc(%esp),%esi
|
||||
|
||||
// if (ch->leftvol > 255)
|
||||
// ch->leftvol = 255;
|
||||
// if (ch->rightvol > 255)
|
||||
// ch->rightvol = 255;
|
||||
movl ch_leftvol(%ebx),%eax
|
||||
movl ch_rightvol(%ebx),%edx
|
||||
cmpl $255,%eax
|
||||
jna LLeftSet
|
||||
movl $255,%eax
|
||||
LLeftSet:
|
||||
cmpl $255,%edx
|
||||
jna LRightSet
|
||||
movl $255,%edx
|
||||
LRightSet:
|
||||
|
||||
// lscale = snd_scaletable[ch->leftvol >> 3];
|
||||
// rscale = snd_scaletable[ch->rightvol >> 3];
|
||||
// sfx = (signed char *)sc->data + ch->pos;
|
||||
// ch->pos += count;
|
||||
andl $0xF8,%eax
|
||||
addl $(sfxc_data),%esi
|
||||
andl $0xF8,%edx
|
||||
movl ch_pos(%ebx),%edi
|
||||
movl count(%esp),%ecx
|
||||
addl %edi,%esi
|
||||
shll $7,%eax
|
||||
addl %ecx,%edi
|
||||
shll $7,%edx
|
||||
movl %edi,ch_pos(%ebx)
|
||||
addl $(C(snd_scaletable)),%eax
|
||||
addl $(C(snd_scaletable)),%edx
|
||||
subl %ebx,%ebx
|
||||
movb -1(%esi,%ecx,1),%bl
|
||||
|
||||
testl $1,%ecx
|
||||
jz LMix8Loop
|
||||
|
||||
movl (%eax,%ebx,4),%edi
|
||||
movl (%edx,%ebx,4),%ebp
|
||||
addl C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size),%edi
|
||||
addl C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size),%ebp
|
||||
movl %edi,C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size)
|
||||
movl %ebp,C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size)
|
||||
movb -2(%esi,%ecx,1),%bl
|
||||
|
||||
decl %ecx
|
||||
jz LDone
|
||||
|
||||
// for (i=0 ; i<count ; i++)
|
||||
// {
|
||||
LMix8Loop:
|
||||
|
||||
// data = sfx[i];
|
||||
// paintbuffer[i].left += lscale[data];
|
||||
// paintbuffer[i].right += rscale[data];
|
||||
movl (%eax,%ebx,4),%edi
|
||||
movl (%edx,%ebx,4),%ebp
|
||||
addl C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size),%edi
|
||||
addl C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size),%ebp
|
||||
movb -2(%esi,%ecx,1),%bl
|
||||
movl %edi,C(paintbuffer)+psp_left-psp_size(,%ecx,psp_size)
|
||||
movl %ebp,C(paintbuffer)+psp_right-psp_size(,%ecx,psp_size)
|
||||
|
||||
movl (%eax,%ebx,4),%edi
|
||||
movl (%edx,%ebx,4),%ebp
|
||||
movb -3(%esi,%ecx,1),%bl
|
||||
addl C(paintbuffer)+psp_left-psp_size*2(,%ecx,psp_size),%edi
|
||||
addl C(paintbuffer)+psp_right-psp_size*2(,%ecx,psp_size),%ebp
|
||||
movl %edi,C(paintbuffer)+psp_left-psp_size*2(,%ecx,psp_size)
|
||||
movl %ebp,C(paintbuffer)+psp_right-psp_size*2(,%ecx,psp_size)
|
||||
|
||||
// }
|
||||
subl $2,%ecx
|
||||
jnz LMix8Loop
|
||||
|
||||
LDone:
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
|
||||
ret
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Transfer of stereo buffer to 16-bit DMA buffer code
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
.globl C(Snd_WriteLinearBlastStereo16)
|
||||
C(Snd_WriteLinearBlastStereo16):
|
||||
pushl %esi // preserve register variables
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
// int i;
|
||||
// int val;
|
||||
movl C(snd_linear_count),%ecx
|
||||
movl C(snd_p),%ebx
|
||||
movl C(snd_vol),%esi
|
||||
movl C(snd_out),%edi
|
||||
|
||||
// for (i=0 ; i<snd_linear_count ; i+=2)
|
||||
// {
|
||||
LWLBLoopTop:
|
||||
|
||||
// val = (snd_p[i]*snd_vol)>>8;
|
||||
// if (val > 0x7fff)
|
||||
// snd_out[i] = 0x7fff;
|
||||
// else if (val < (short)0x8000)
|
||||
// snd_out[i] = (short)0x8000;
|
||||
// else
|
||||
// snd_out[i] = val;
|
||||
movl -8(%ebx,%ecx,4),%eax
|
||||
imull %esi,%eax
|
||||
sarl $8,%eax
|
||||
cmpl $0x7FFF,%eax
|
||||
jg LClampHigh
|
||||
cmpl $0xFFFF8000,%eax
|
||||
jnl LClampDone
|
||||
movl $0xFFFF8000,%eax
|
||||
jmp LClampDone
|
||||
LClampHigh:
|
||||
movl $0x7FFF,%eax
|
||||
LClampDone:
|
||||
|
||||
// val = (snd_p[i+1]*snd_vol)>>8;
|
||||
// if (val > 0x7fff)
|
||||
// snd_out[i+1] = 0x7fff;
|
||||
// else if (val < (short)0x8000)
|
||||
// snd_out[i+1] = (short)0x8000;
|
||||
// else
|
||||
// snd_out[i+1] = val;
|
||||
movl -4(%ebx,%ecx,4),%edx
|
||||
imull %esi,%edx
|
||||
sarl $8,%edx
|
||||
cmpl $0x7FFF,%edx
|
||||
jg LClampHigh2
|
||||
cmpl $0xFFFF8000,%edx
|
||||
jnl LClampDone2
|
||||
movl $0xFFFF8000,%edx
|
||||
jmp LClampDone2
|
||||
LClampHigh2:
|
||||
movl $0x7FFF,%edx
|
||||
LClampDone2:
|
||||
shll $16,%edx
|
||||
andl $0xFFFF,%eax
|
||||
orl %eax,%edx
|
||||
movl %edx,-4(%edi,%ecx,2)
|
||||
|
||||
// }
|
||||
subl $2,%ecx
|
||||
jnz LWLBLoopTop
|
||||
|
||||
// snd_p += snd_linear_count;
|
||||
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
|
||||
ret
|
||||
|
||||
|
||||
#endif // id386
|
||||
|
172
source/surf16.S
Normal file
172
source/surf16.S
Normal file
|
@ -0,0 +1,172 @@
|
|||
/*
|
||||
Copyright (C) 1996-1997 Id Software, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
*/
|
||||
//
|
||||
// surf16.s
|
||||
// x86 assembly-language 16 bpp surface block drawing code.
|
||||
//
|
||||
|
||||
#include "asm_i386.h"
|
||||
#include "quakeasm.h"
|
||||
#include "asm_draw.h"
|
||||
|
||||
#if id386
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Surface block drawer
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
.data
|
||||
|
||||
k: .long 0
|
||||
loopentry: .long 0
|
||||
|
||||
.align 4
|
||||
blockjumptable16:
|
||||
.long LEnter2_16
|
||||
.long LEnter4_16
|
||||
.long 0, LEnter8_16
|
||||
.long 0, 0, 0, LEnter16_16
|
||||
|
||||
|
||||
.text
|
||||
|
||||
.align 4
|
||||
.globl C(R_Surf16Start)
|
||||
C(R_Surf16Start):
|
||||
|
||||
.align 4
|
||||
.globl C(R_DrawSurfaceBlock16)
|
||||
C(R_DrawSurfaceBlock16):
|
||||
pushl %ebp // preserve caller's stack frame
|
||||
pushl %edi
|
||||
pushl %esi // preserve register variables
|
||||
pushl %ebx
|
||||
|
||||
movl C(blocksize),%eax
|
||||
movl C(prowdestbase),%edi
|
||||
movl C(pbasesource),%esi
|
||||
movl C(sourcesstep),%ebx
|
||||
movl blockjumptable16-4(,%eax,2),%ecx
|
||||
movl %eax,k
|
||||
movl %ecx,loopentry
|
||||
movl C(lightleft),%edx
|
||||
movl C(lightright),%ebp
|
||||
|
||||
Lblockloop16:
|
||||
|
||||
subl %edx,%ebp
|
||||
movb C(blockdivshift),%cl
|
||||
sarl %cl,%ebp
|
||||
jns Lp1_16
|
||||
testl C(blockdivmask),%ebp
|
||||
jz Lp1_16
|
||||
incl %ebp
|
||||
Lp1_16:
|
||||
|
||||
subl %eax,%eax
|
||||
subl %ecx,%ecx // high words must be 0 in loop for addressing
|
||||
|
||||
jmp *loopentry
|
||||
|
||||
.align 4
|
||||
|
||||
#include "block16.h"
|
||||
|
||||
movl C(pbasesource),%esi
|
||||
movl C(lightleft),%edx
|
||||
movl C(lightright),%ebp
|
||||
movl C(sourcetstep),%eax
|
||||
movl C(lightrightstep),%ecx
|
||||
movl C(prowdestbase),%edi
|
||||
|
||||
addl %eax,%esi
|
||||
addl %ecx,%ebp
|
||||
|
||||
movl C(lightleftstep),%eax
|
||||
movl C(surfrowbytes),%ecx
|
||||
|
||||
addl %eax,%edx
|
||||
addl %ecx,%edi
|
||||
|
||||
movl %esi,C(pbasesource)
|
||||
movl %ebp,C(lightright)
|
||||
movl k,%eax
|
||||
movl %edx,C(lightleft)
|
||||
decl %eax
|
||||
movl %edi,C(prowdestbase)
|
||||
movl %eax,k
|
||||
jnz Lblockloop16
|
||||
|
||||
popl %ebx // restore register variables
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp // restore the caller's stack frame
|
||||
ret
|
||||
|
||||
.globl C(R_Surf16End)
|
||||
C(R_Surf16End):
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Code patching routines
|
||||
//----------------------------------------------------------------------
|
||||
.data
|
||||
|
||||
.align 4
|
||||
LPatchTable16:
|
||||
.long LBPatch0-4
|
||||
.long LBPatch1-4
|
||||
.long LBPatch2-4
|
||||
.long LBPatch3-4
|
||||
.long LBPatch4-4
|
||||
.long LBPatch5-4
|
||||
.long LBPatch6-4
|
||||
.long LBPatch7-4
|
||||
.long LBPatch8-4
|
||||
.long LBPatch9-4
|
||||
.long LBPatch10-4
|
||||
.long LBPatch11-4
|
||||
.long LBPatch12-4
|
||||
.long LBPatch13-4
|
||||
.long LBPatch14-4
|
||||
.long LBPatch15-4
|
||||
|
||||
.text
|
||||
|
||||
.align 4
|
||||
.globl C(R_Surf16Patch)
|
||||
C(R_Surf16Patch):
|
||||
pushl %ebx
|
||||
|
||||
movl C(colormap),%eax
|
||||
movl $LPatchTable16,%ebx
|
||||
movl $16,%ecx
|
||||
LPatchLoop16:
|
||||
movl (%ebx),%edx
|
||||
addl $4,%ebx
|
||||
movl %eax,(%edx)
|
||||
decl %ecx
|
||||
jnz LPatchLoop16
|
||||
|
||||
popl %ebx
|
||||
|
||||
ret
|
||||
|
||||
|
||||
#endif // id386
|
783
source/surf8.S
Normal file
783
source/surf8.S
Normal file
|
@ -0,0 +1,783 @@
|
|||
/*
|
||||
Copyright (C) 1996-1997 Id Software, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
*/
|
||||
//
|
||||
// surf8.s
|
||||
// x86 assembly-language 8 bpp surface block drawing code.
|
||||
//
|
||||
|
||||
#include "asm_i386.h"
|
||||
#include "quakeasm.h"
|
||||
#include "asm_draw.h"
|
||||
|
||||
#if id386
|
||||
|
||||
.data
|
||||
|
||||
sb_v: .long 0
|
||||
|
||||
.text
|
||||
|
||||
.align 4
|
||||
.globl C(R_Surf8Start)
|
||||
C(R_Surf8Start):
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Surface block drawer for mip level 0
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
.align 4
|
||||
.globl C(R_DrawSurfaceBlock8_mip0)
|
||||
C(R_DrawSurfaceBlock8_mip0):
|
||||
pushl %ebp // preserve caller's stack frame
|
||||
pushl %edi
|
||||
pushl %esi // preserve register variables
|
||||
pushl %ebx
|
||||
|
||||
// for (v=0 ; v<numvblocks ; v++)
|
||||
// {
|
||||
movl C(r_lightptr),%ebx
|
||||
movl C(r_numvblocks),%eax
|
||||
|
||||
movl %eax,sb_v
|
||||
movl C(prowdestbase),%edi
|
||||
|
||||
movl C(pbasesource),%esi
|
||||
|
||||
Lv_loop_mip0:
|
||||
|
||||
// lightleft = lightptr[0];
|
||||
// lightright = lightptr[1];
|
||||
// lightdelta = (lightleft - lightright) & 0xFFFFF;
|
||||
movl (%ebx),%eax // lightleft
|
||||
movl 4(%ebx),%edx // lightright
|
||||
|
||||
movl %eax,%ebp
|
||||
movl C(r_lightwidth),%ecx
|
||||
|
||||
movl %edx,C(lightright)
|
||||
subl %edx,%ebp
|
||||
|
||||
andl $0xFFFFF,%ebp
|
||||
leal (%ebx,%ecx,4),%ebx
|
||||
|
||||
// lightptr += lightwidth;
|
||||
movl %ebx,C(r_lightptr)
|
||||
|
||||
// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
|
||||
// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
|
||||
// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
|
||||
// 0xF0000000;
|
||||
movl 4(%ebx),%ecx // lightptr[1]
|
||||
movl (%ebx),%ebx // lightptr[0]
|
||||
|
||||
subl %eax,%ebx
|
||||
subl %edx,%ecx
|
||||
|
||||
sarl $4,%ecx
|
||||
orl $0xF0000000,%ebp
|
||||
|
||||
sarl $4,%ebx
|
||||
movl %ecx,C(lightrightstep)
|
||||
|
||||
subl %ecx,%ebx
|
||||
andl $0xFFFFF,%ebx
|
||||
|
||||
orl $0xF0000000,%ebx
|
||||
subl %ecx,%ecx // high word must be 0 in loop for addressing
|
||||
|
||||
movl %ebx,C(lightdeltastep)
|
||||
subl %ebx,%ebx // high word must be 0 in loop for addressing
|
||||
|
||||
Lblockloop8_mip0:
|
||||
movl %ebp,C(lightdelta)
|
||||
movb 14(%esi),%cl
|
||||
|
||||
sarl $4,%ebp
|
||||
movb %dh,%bh
|
||||
|
||||
movb 15(%esi),%bl
|
||||
addl %ebp,%edx
|
||||
|
||||
movb %dh,%ch
|
||||
addl %ebp,%edx
|
||||
|
||||
movb 0x12345678(%ebx),%ah
|
||||
LBPatch0:
|
||||
movb 13(%esi),%bl
|
||||
|
||||
movb 0x12345678(%ecx),%al
|
||||
LBPatch1:
|
||||
movb 12(%esi),%cl
|
||||
|
||||
movb %dh,%bh
|
||||
addl %ebp,%edx
|
||||
|
||||
rorl $16,%eax
|
||||
movb %dh,%ch
|
||||
|
||||
addl %ebp,%edx
|
||||
movb 0x12345678(%ebx),%ah
|
||||
LBPatch2:
|
||||
|
||||
movb 11(%esi),%bl
|
||||
movb 0x12345678(%ecx),%al
|
||||
LBPatch3:
|
||||
|
||||
movb 10(%esi),%cl
|
||||
movl %eax,12(%edi)
|
||||
|
||||
movb %dh,%bh
|
||||
addl %ebp,%edx
|
||||
|
||||
movb %dh,%ch
|
||||
addl %ebp,%edx
|
||||
|
||||
movb 0x12345678(%ebx),%ah
|
||||
LBPatch4:
|
||||
movb 9(%esi),%bl
|
||||
|
||||
movb 0x12345678(%ecx),%al
|
||||
LBPatch5:
|
||||
movb 8(%esi),%cl
|
||||
|
||||
movb %dh,%bh
|
||||
addl %ebp,%edx
|
||||
|
||||
rorl $16,%eax
|
||||
movb %dh,%ch
|
||||
|
||||
addl %ebp,%edx
|
||||
movb 0x12345678(%ebx),%ah
|
||||
LBPatch6:
|
||||
|
||||
movb 7(%esi),%bl
|
||||
movb 0x12345678(%ecx),%al
|
||||
LBPatch7:
|
||||
|
||||
movb 6(%esi),%cl
|
||||
movl %eax,8(%edi)
|
||||
|
||||
movb %dh,%bh
|
||||
addl %ebp,%edx
|
||||
|
||||
movb %dh,%ch
|
||||
addl %ebp,%edx
|
||||
|
||||
movb 0x12345678(%ebx),%ah
|
||||
LBPatch8:
|
||||
movb 5(%esi),%bl
|
||||
|
||||
movb 0x12345678(%ecx),%al
|
||||
LBPatch9:
|
||||
movb 4(%esi),%cl
|
||||
|
||||
movb %dh,%bh
|
||||
addl %ebp,%edx
|
||||
|
||||
rorl $16,%eax
|
||||
movb %dh,%ch
|
||||
|
||||
addl %ebp,%edx
|
||||
movb 0x12345678(%ebx),%ah
|
||||
LBPatch10:
|
||||
|
||||
movb 3(%esi),%bl
|
||||
movb 0x12345678(%ecx),%al
|
||||
LBPatch11:
|
||||
|
||||
movb 2(%esi),%cl
|
||||
movl %eax,4(%edi)
|
||||
|
||||
movb %dh,%bh
|
||||
addl %ebp,%edx
|
||||
|
||||
movb %dh,%ch
|
||||
addl %ebp,%edx
|
||||
|
||||
movb 0x12345678(%ebx),%ah
|
||||
LBPatch12:
|
||||
movb 1(%esi),%bl
|
||||
|
||||
movb 0x12345678(%ecx),%al
|
||||
LBPatch13:
|
||||
movb (%esi),%cl
|
||||
|
||||
movb %dh,%bh
|
||||
addl %ebp,%edx
|
||||
|
||||
rorl $16,%eax
|
||||
movb %dh,%ch
|
||||
|
||||
movb 0x12345678(%ebx),%ah
|
||||
LBPatch14:
|
||||
movl C(lightright),%edx
|
||||
|
||||
movb 0x12345678(%ecx),%al
|
||||
LBPatch15:
|
||||
movl C(lightdelta),%ebp
|
||||
|
||||
movl %eax,(%edi)
|
||||
|
||||
addl C(sourcetstep),%esi
|
||||
addl C(surfrowbytes),%edi
|
||||
|
||||
addl C(lightrightstep),%edx
|
||||
addl C(lightdeltastep),%ebp
|
||||
|
||||
movl %edx,C(lightright)
|
||||
jc Lblockloop8_mip0
|
||||
|
||||
// if (pbasesource >= r_sourcemax)
|
||||
// pbasesource -= stepback;
|
||||
|
||||
cmpl C(r_sourcemax),%esi
|
||||
jb LSkip_mip0
|
||||
subl C(r_stepback),%esi
|
||||
LSkip_mip0:
|
||||
|
||||
movl C(r_lightptr),%ebx
|
||||
decl sb_v
|
||||
|
||||
jnz Lv_loop_mip0
|
||||
|
||||
popl %ebx // restore register variables
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp // restore the caller's stack frame
|
||||
ret
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Surface block drawer for mip level 1
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
.align 4
|
||||
.globl C(R_DrawSurfaceBlock8_mip1)
|
||||
C(R_DrawSurfaceBlock8_mip1):
|
||||
pushl %ebp // preserve caller's stack frame
|
||||
pushl %edi
|
||||
pushl %esi // preserve register variables
|
||||
pushl %ebx
|
||||
|
||||
// for (v=0 ; v<numvblocks ; v++)
|
||||
// {
|
||||
movl C(r_lightptr),%ebx
|
||||
movl C(r_numvblocks),%eax
|
||||
|
||||
movl %eax,sb_v
|
||||
movl C(prowdestbase),%edi
|
||||
|
||||
movl C(pbasesource),%esi
|
||||
|
||||
Lv_loop_mip1:
|
||||
|
||||
// lightleft = lightptr[0];
|
||||
// lightright = lightptr[1];
|
||||
// lightdelta = (lightleft - lightright) & 0xFFFFF;
|
||||
movl (%ebx),%eax // lightleft
|
||||
movl 4(%ebx),%edx // lightright
|
||||
|
||||
movl %eax,%ebp
|
||||
movl C(r_lightwidth),%ecx
|
||||
|
||||
movl %edx,C(lightright)
|
||||
subl %edx,%ebp
|
||||
|
||||
andl $0xFFFFF,%ebp
|
||||
leal (%ebx,%ecx,4),%ebx
|
||||
|
||||
// lightptr += lightwidth;
|
||||
movl %ebx,C(r_lightptr)
|
||||
|
||||
// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
|
||||
// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
|
||||
// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
|
||||
// 0xF0000000;
|
||||
movl 4(%ebx),%ecx // lightptr[1]
|
||||
movl (%ebx),%ebx // lightptr[0]
|
||||
|
||||
subl %eax,%ebx
|
||||
subl %edx,%ecx
|
||||
|
||||
sarl $3,%ecx
|
||||
orl $0x70000000,%ebp
|
||||
|
||||
sarl $3,%ebx
|
||||
movl %ecx,C(lightrightstep)
|
||||
|
||||
subl %ecx,%ebx
|
||||
andl $0xFFFFF,%ebx
|
||||
|
||||
orl $0xF0000000,%ebx
|
||||
subl %ecx,%ecx // high word must be 0 in loop for addressing
|
||||
|
||||
movl %ebx,C(lightdeltastep)
|
||||
subl %ebx,%ebx // high word must be 0 in loop for addressing
|
||||
|
||||
Lblockloop8_mip1:
|
||||
movl %ebp,C(lightdelta)
|
||||
movb 6(%esi),%cl
|
||||
|
||||
sarl $3,%ebp
|
||||
movb %dh,%bh
|
||||
|
||||
movb 7(%esi),%bl
|
||||
addl %ebp,%edx
|
||||
|
||||
movb %dh,%ch
|
||||
addl %ebp,%edx
|
||||
|
||||
movb 0x12345678(%ebx),%ah
|
||||
LBPatch22:
|
||||
movb 5(%esi),%bl
|
||||
|
||||
movb 0x12345678(%ecx),%al
|
||||
LBPatch23:
|
||||
movb 4(%esi),%cl
|
||||
|
||||
movb %dh,%bh
|
||||
addl %ebp,%edx
|
||||
|
||||
rorl $16,%eax
|
||||
movb %dh,%ch
|
||||
|
||||
addl %ebp,%edx
|
||||
movb 0x12345678(%ebx),%ah
|
||||
LBPatch24:
|
||||
|
||||
movb 3(%esi),%bl
|
||||
movb 0x12345678(%ecx),%al
|
||||
LBPatch25:
|
||||
|
||||
movb 2(%esi),%cl
|
||||
movl %eax,4(%edi)
|
||||
|
||||
movb %dh,%bh
|
||||
addl %ebp,%edx
|
||||
|
||||
movb %dh,%ch
|
||||
addl %ebp,%edx
|
||||
|
||||
movb 0x12345678(%ebx),%ah
|
||||
LBPatch26:
|
||||
movb 1(%esi),%bl
|
||||
|
||||
movb 0x12345678(%ecx),%al
|
||||
LBPatch27:
|
||||
movb (%esi),%cl
|
||||
|
||||
movb %dh,%bh
|
||||
addl %ebp,%edx
|
||||
|
||||
rorl $16,%eax
|
||||
movb %dh,%ch
|
||||
|
||||
movb 0x12345678(%ebx),%ah
|
||||
LBPatch28:
|
||||
movl C(lightright),%edx
|
||||
|
||||
movb 0x12345678(%ecx),%al
|
||||
LBPatch29:
|
||||
movl C(lightdelta),%ebp
|
||||
|
||||
movl %eax,(%edi)
|
||||
movl C(sourcetstep),%eax
|
||||
|
||||
addl %eax,%esi
|
||||
movl C(surfrowbytes),%eax
|
||||
|
||||
addl %eax,%edi
|
||||
movl C(lightrightstep),%eax
|
||||
|
||||
addl %eax,%edx
|
||||
movl C(lightdeltastep),%eax
|
||||
|
||||
addl %eax,%ebp
|
||||
movl %edx,C(lightright)
|
||||
|
||||
jc Lblockloop8_mip1
|
||||
|
||||
// if (pbasesource >= r_sourcemax)
|
||||
// pbasesource -= stepback;
|
||||
|
||||
cmpl C(r_sourcemax),%esi
|
||||
jb LSkip_mip1
|
||||
subl C(r_stepback),%esi
|
||||
LSkip_mip1:
|
||||
|
||||
movl C(r_lightptr),%ebx
|
||||
decl sb_v
|
||||
|
||||
jnz Lv_loop_mip1
|
||||
|
||||
popl %ebx // restore register variables
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp // restore the caller's stack frame
|
||||
ret
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Surface block drawer for mip level 2
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
.align 4
|
||||
.globl C(R_DrawSurfaceBlock8_mip2)
|
||||
C(R_DrawSurfaceBlock8_mip2):
|
||||
pushl %ebp // preserve caller's stack frame
|
||||
pushl %edi
|
||||
pushl %esi // preserve register variables
|
||||
pushl %ebx
|
||||
|
||||
// for (v=0 ; v<numvblocks ; v++)
|
||||
// {
|
||||
movl C(r_lightptr),%ebx
|
||||
movl C(r_numvblocks),%eax
|
||||
|
||||
movl %eax,sb_v
|
||||
movl C(prowdestbase),%edi
|
||||
|
||||
movl C(pbasesource),%esi
|
||||
|
||||
Lv_loop_mip2:
|
||||
|
||||
// lightleft = lightptr[0];
|
||||
// lightright = lightptr[1];
|
||||
// lightdelta = (lightleft - lightright) & 0xFFFFF;
|
||||
movl (%ebx),%eax // lightleft
|
||||
movl 4(%ebx),%edx // lightright
|
||||
|
||||
movl %eax,%ebp
|
||||
movl C(r_lightwidth),%ecx
|
||||
|
||||
movl %edx,C(lightright)
|
||||
subl %edx,%ebp
|
||||
|
||||
andl $0xFFFFF,%ebp
|
||||
leal (%ebx,%ecx,4),%ebx
|
||||
|
||||
// lightptr += lightwidth;
|
||||
movl %ebx,C(r_lightptr)
|
||||
|
||||
// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
|
||||
// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
|
||||
// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
|
||||
// 0xF0000000;
|
||||
movl 4(%ebx),%ecx // lightptr[1]
|
||||
movl (%ebx),%ebx // lightptr[0]
|
||||
|
||||
subl %eax,%ebx
|
||||
subl %edx,%ecx
|
||||
|
||||
sarl $2,%ecx
|
||||
orl $0x30000000,%ebp
|
||||
|
||||
sarl $2,%ebx
|
||||
movl %ecx,C(lightrightstep)
|
||||
|
||||
subl %ecx,%ebx
|
||||
|
||||
andl $0xFFFFF,%ebx
|
||||
|
||||
orl $0xF0000000,%ebx
|
||||
subl %ecx,%ecx // high word must be 0 in loop for addressing
|
||||
|
||||
movl %ebx,C(lightdeltastep)
|
||||
subl %ebx,%ebx // high word must be 0 in loop for addressing
|
||||
|
||||
Lblockloop8_mip2:
|
||||
movl %ebp,C(lightdelta)
|
||||
movb 2(%esi),%cl
|
||||
|
||||
sarl $2,%ebp
|
||||
movb %dh,%bh
|
||||
|
||||
movb 3(%esi),%bl
|
||||
addl %ebp,%edx
|
||||
|
||||
movb %dh,%ch
|
||||
addl %ebp,%edx
|
||||
|
||||
movb 0x12345678(%ebx),%ah
|
||||
LBPatch18:
|
||||
movb 1(%esi),%bl
|
||||
|
||||
movb 0x12345678(%ecx),%al
|
||||
LBPatch19:
|
||||
movb (%esi),%cl
|
||||
|
||||
movb %dh,%bh
|
||||
addl %ebp,%edx
|
||||
|
||||
rorl $16,%eax
|
||||
movb %dh,%ch
|
||||
|
||||
movb 0x12345678(%ebx),%ah
|
||||
LBPatch20:
|
||||
movl C(lightright),%edx
|
||||
|
||||
movb 0x12345678(%ecx),%al
|
||||
LBPatch21:
|
||||
movl C(lightdelta),%ebp
|
||||
|
||||
movl %eax,(%edi)
|
||||
movl C(sourcetstep),%eax
|
||||
|
||||
addl %eax,%esi
|
||||
movl C(surfrowbytes),%eax
|
||||
|
||||
addl %eax,%edi
|
||||
movl C(lightrightstep),%eax
|
||||
|
||||
addl %eax,%edx
|
||||
movl C(lightdeltastep),%eax
|
||||
|
||||
addl %eax,%ebp
|
||||
movl %edx,C(lightright)
|
||||
|
||||
jc Lblockloop8_mip2
|
||||
|
||||
// if (pbasesource >= r_sourcemax)
|
||||
// pbasesource -= stepback;
|
||||
|
||||
cmpl C(r_sourcemax),%esi
|
||||
jb LSkip_mip2
|
||||
subl C(r_stepback),%esi
|
||||
LSkip_mip2:
|
||||
|
||||
movl C(r_lightptr),%ebx
|
||||
decl sb_v
|
||||
|
||||
jnz Lv_loop_mip2
|
||||
|
||||
popl %ebx // restore register variables
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp // restore the caller's stack frame
|
||||
ret
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Surface block drawer for mip level 3
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
.align 4
|
||||
.globl C(R_DrawSurfaceBlock8_mip3)
|
||||
C(R_DrawSurfaceBlock8_mip3):
|
||||
pushl %ebp // preserve caller's stack frame
|
||||
pushl %edi
|
||||
pushl %esi // preserve register variables
|
||||
pushl %ebx
|
||||
|
||||
// for (v=0 ; v<numvblocks ; v++)
|
||||
// {
|
||||
movl C(r_lightptr),%ebx
|
||||
movl C(r_numvblocks),%eax
|
||||
|
||||
movl %eax,sb_v
|
||||
movl C(prowdestbase),%edi
|
||||
|
||||
movl C(pbasesource),%esi
|
||||
|
||||
Lv_loop_mip3:
|
||||
|
||||
// lightleft = lightptr[0];
|
||||
// lightright = lightptr[1];
|
||||
// lightdelta = (lightleft - lightright) & 0xFFFFF;
|
||||
movl (%ebx),%eax // lightleft
|
||||
movl 4(%ebx),%edx // lightright
|
||||
|
||||
movl %eax,%ebp
|
||||
movl C(r_lightwidth),%ecx
|
||||
|
||||
movl %edx,C(lightright)
|
||||
subl %edx,%ebp
|
||||
|
||||
andl $0xFFFFF,%ebp
|
||||
leal (%ebx,%ecx,4),%ebx
|
||||
|
||||
movl %ebp,C(lightdelta)
|
||||
// lightptr += lightwidth;
|
||||
movl %ebx,C(r_lightptr)
|
||||
|
||||
// lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
|
||||
// lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
|
||||
// lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
|
||||
// 0xF0000000;
|
||||
movl 4(%ebx),%ecx // lightptr[1]
|
||||
movl (%ebx),%ebx // lightptr[0]
|
||||
|
||||
subl %eax,%ebx
|
||||
subl %edx,%ecx
|
||||
|
||||
sarl $1,%ecx
|
||||
|
||||
sarl $1,%ebx
|
||||
movl %ecx,C(lightrightstep)
|
||||
|
||||
subl %ecx,%ebx
|
||||
andl $0xFFFFF,%ebx
|
||||
|
||||
sarl $1,%ebp
|
||||
orl $0xF0000000,%ebx
|
||||
|
||||
movl %ebx,C(lightdeltastep)
|
||||
subl %ebx,%ebx // high word must be 0 in loop for addressing
|
||||
|
||||
movb 1(%esi),%bl
|
||||
subl %ecx,%ecx // high word must be 0 in loop for addressing
|
||||
|
||||
movb %dh,%bh
|
||||
movb (%esi),%cl
|
||||
|
||||
addl %ebp,%edx
|
||||
movb %dh,%ch
|
||||
|
||||
movb 0x12345678(%ebx),%al
|
||||
LBPatch16:
|
||||
movl C(lightright),%edx
|
||||
|
||||
movb %al,1(%edi)
|
||||
movb 0x12345678(%ecx),%al
|
||||
LBPatch17:
|
||||
|
||||
movb %al,(%edi)
|
||||
movl C(sourcetstep),%eax
|
||||
|
||||
addl %eax,%esi
|
||||
movl C(surfrowbytes),%eax
|
||||
|
||||
addl %eax,%edi
|
||||
movl C(lightdeltastep),%eax
|
||||
|
||||
movl C(lightdelta),%ebp
|
||||
movb (%esi),%cl
|
||||
|
||||
addl %eax,%ebp
|
||||
movl C(lightrightstep),%eax
|
||||
|
||||
sarl $1,%ebp
|
||||
addl %eax,%edx
|
||||
|
||||
movb %dh,%bh
|
||||
movb 1(%esi),%bl
|
||||
|
||||
addl %ebp,%edx
|
||||
movb %dh,%ch
|
||||
|
||||
movb 0x12345678(%ebx),%al
|
||||
LBPatch30:
|
||||
movl C(sourcetstep),%edx
|
||||
|
||||
movb %al,1(%edi)
|
||||
movb 0x12345678(%ecx),%al
|
||||
LBPatch31:
|
||||
|
||||
movb %al,(%edi)
|
||||
movl C(surfrowbytes),%ebp
|
||||
|
||||
addl %edx,%esi
|
||||
addl %ebp,%edi
|
||||
|
||||
// if (pbasesource >= r_sourcemax)
|
||||
// pbasesource -= stepback;
|
||||
|
||||
cmpl C(r_sourcemax),%esi
|
||||
jb LSkip_mip3
|
||||
subl C(r_stepback),%esi
|
||||
LSkip_mip3:
|
||||
|
||||
movl C(r_lightptr),%ebx
|
||||
decl sb_v
|
||||
|
||||
jnz Lv_loop_mip3
|
||||
|
||||
popl %ebx // restore register variables
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp // restore the caller's stack frame
|
||||
ret
|
||||
|
||||
|
||||
.globl C(R_Surf8End)
|
||||
C(R_Surf8End):
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Code patching routines
|
||||
//----------------------------------------------------------------------
|
||||
.data
|
||||
|
||||
.align 4
|
||||
LPatchTable8:
|
||||
.long LBPatch0-4
|
||||
.long LBPatch1-4
|
||||
.long LBPatch2-4
|
||||
.long LBPatch3-4
|
||||
.long LBPatch4-4
|
||||
.long LBPatch5-4
|
||||
.long LBPatch6-4
|
||||
.long LBPatch7-4
|
||||
.long LBPatch8-4
|
||||
.long LBPatch9-4
|
||||
.long LBPatch10-4
|
||||
.long LBPatch11-4
|
||||
.long LBPatch12-4
|
||||
.long LBPatch13-4
|
||||
.long LBPatch14-4
|
||||
.long LBPatch15-4
|
||||
.long LBPatch16-4
|
||||
.long LBPatch17-4
|
||||
.long LBPatch18-4
|
||||
.long LBPatch19-4
|
||||
.long LBPatch20-4
|
||||
.long LBPatch21-4
|
||||
.long LBPatch22-4
|
||||
.long LBPatch23-4
|
||||
.long LBPatch24-4
|
||||
.long LBPatch25-4
|
||||
.long LBPatch26-4
|
||||
.long LBPatch27-4
|
||||
.long LBPatch28-4
|
||||
.long LBPatch29-4
|
||||
.long LBPatch30-4
|
||||
.long LBPatch31-4
|
||||
|
||||
.text
|
||||
|
||||
.align 4
|
||||
.globl C(R_Surf8Patch)
|
||||
C(R_Surf8Patch):
|
||||
pushl %ebx
|
||||
|
||||
movl C(colormap),%eax
|
||||
movl $LPatchTable8,%ebx
|
||||
movl $32,%ecx
|
||||
LPatchLoop8:
|
||||
movl (%ebx),%edx
|
||||
addl $4,%ebx
|
||||
movl %eax,(%edx)
|
||||
decl %ecx
|
||||
jnz LPatchLoop8
|
||||
|
||||
popl %ebx
|
||||
|
||||
ret
|
||||
|
||||
#endif // id386
|
|
@ -7,3 +7,8 @@ void
|
|||
Draw_EndDisc(void)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
Cmd_ForwardToServer (void)
|
||||
{
|
||||
}
|
||||
|
|
95
source/sys_dosa.S
Normal file
95
source/sys_dosa.S
Normal file
|
@ -0,0 +1,95 @@
|
|||
//
|
||||
// sys_dosa.s
|
||||
// x86 assembly-language DOS-dependent routines.
|
||||
|
||||
#include "asm_i386.h"
|
||||
#include "quakeasm.h"
|
||||
|
||||
|
||||
.data
|
||||
|
||||
.align 4
|
||||
fpenv:
|
||||
.long 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
.text
|
||||
|
||||
.globl C(MaskExceptions)
|
||||
C(MaskExceptions):
|
||||
fnstenv fpenv
|
||||
orl $0x3F,fpenv
|
||||
fldenv fpenv
|
||||
|
||||
ret
|
||||
|
||||
#if 0
|
||||
.globl C(unmaskexceptions)
|
||||
C(unmaskexceptions):
|
||||
fnstenv fpenv
|
||||
andl $0xFFFFFFE0,fpenv
|
||||
fldenv fpenv
|
||||
|
||||
ret
|
||||
#endif
|
||||
|
||||
.data
|
||||
|
||||
.align 4
|
||||
.globl ceil_cw, single_cw, full_cw, cw, pushed_cw
|
||||
ceil_cw: .long 0
|
||||
single_cw: .long 0
|
||||
full_cw: .long 0
|
||||
cw: .long 0
|
||||
pushed_cw: .long 0
|
||||
|
||||
.text
|
||||
|
||||
.globl C(Sys_LowFPPrecision)
|
||||
C(Sys_LowFPPrecision):
|
||||
fldcw single_cw
|
||||
|
||||
ret
|
||||
|
||||
.globl C(Sys_HighFPPrecision)
|
||||
C(Sys_HighFPPrecision):
|
||||
fldcw full_cw
|
||||
|
||||
ret
|
||||
|
||||
.globl C(Sys_PushFPCW_SetHigh)
|
||||
C(Sys_PushFPCW_SetHigh):
|
||||
fnstcw pushed_cw
|
||||
fldcw full_cw
|
||||
|
||||
ret
|
||||
|
||||
.globl C(Sys_PopFPCW)
|
||||
C(Sys_PopFPCW):
|
||||
fldcw pushed_cw
|
||||
|
||||
ret
|
||||
|
||||
.globl C(Sys_SetFPCW)
|
||||
C(Sys_SetFPCW):
|
||||
fnstcw cw
|
||||
movl cw,%eax
|
||||
#if id386
|
||||
andb $0xF0,%ah
|
||||
orb $0x03,%ah // round mode, 64-bit precision
|
||||
#endif
|
||||
movl %eax,full_cw
|
||||
|
||||
#if id386
|
||||
andb $0xF0,%ah
|
||||
orb $0x0C,%ah // chop mode, single precision
|
||||
#endif
|
||||
movl %eax,single_cw
|
||||
|
||||
#if id386
|
||||
andb $0xF0,%ah
|
||||
orb $0x08,%ah // ceil mode, single precision
|
||||
#endif
|
||||
movl %eax,ceil_cw
|
||||
|
||||
ret
|
||||
|
115
source/sys_wina.S
Normal file
115
source/sys_wina.S
Normal file
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
Copyright (C) 1996-1997 Id Software, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
*/
|
||||
//
|
||||
// sys_wina.s
|
||||
// x86 assembly-language Win-dependent routines.
|
||||
|
||||
#include "asm_i386.h"
|
||||
#include "quakeasm.h"
|
||||
|
||||
//@@@ should be id386-dependent, and have an equivalent C path
|
||||
|
||||
.data
|
||||
|
||||
.align 4
|
||||
fpenv:
|
||||
.long 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
.text
|
||||
|
||||
.globl C(MaskExceptions)
|
||||
C(MaskExceptions):
|
||||
fnstenv fpenv
|
||||
orl $0x3F,fpenv
|
||||
fldenv fpenv
|
||||
|
||||
ret
|
||||
|
||||
#if 0
|
||||
.globl C(unmaskexceptions)
|
||||
C(unmaskexceptions):
|
||||
fnstenv fpenv
|
||||
andl $0xFFFFFFE0,fpenv
|
||||
fldenv fpenv
|
||||
|
||||
ret
|
||||
#endif
|
||||
|
||||
.data
|
||||
|
||||
.align 4
|
||||
.globl ceil_cw, single_cw, full_cw, cw, pushed_cw
|
||||
ceil_cw: .long 0
|
||||
single_cw: .long 0
|
||||
full_cw: .long 0
|
||||
cw: .long 0
|
||||
pushed_cw: .long 0
|
||||
|
||||
.text
|
||||
|
||||
.globl C(Sys_LowFPPrecision)
|
||||
C(Sys_LowFPPrecision):
|
||||
fldcw single_cw
|
||||
|
||||
ret
|
||||
|
||||
.globl C(Sys_HighFPPrecision)
|
||||
C(Sys_HighFPPrecision):
|
||||
fldcw full_cw
|
||||
|
||||
ret
|
||||
|
||||
.globl C(Sys_PushFPCW_SetHigh)
|
||||
C(Sys_PushFPCW_SetHigh):
|
||||
fnstcw pushed_cw
|
||||
fldcw full_cw
|
||||
|
||||
ret
|
||||
|
||||
.globl C(Sys_PopFPCW)
|
||||
C(Sys_PopFPCW):
|
||||
fldcw pushed_cw
|
||||
|
||||
ret
|
||||
|
||||
.globl C(Sys_SetFPCW)
|
||||
C(Sys_SetFPCW):
|
||||
fnstcw cw
|
||||
movl cw,%eax
|
||||
#if id386
|
||||
andb $0xF0,%ah
|
||||
orb $0x03,%ah // round mode, 64-bit precision
|
||||
#endif
|
||||
movl %eax,full_cw
|
||||
|
||||
#if id386
|
||||
andb $0xF0,%ah
|
||||
orb $0x0C,%ah // chop mode, single precision
|
||||
#endif
|
||||
movl %eax,single_cw
|
||||
|
||||
#if id386
|
||||
andb $0xF0,%ah
|
||||
orb $0x08,%ah // ceil mode, single precision
|
||||
#endif
|
||||
movl %eax,ceil_cw
|
||||
|
||||
ret
|
||||
|
Loading…
Reference in a new issue