2006-02-24 04:48:15 +00:00
|
|
|
;*
|
|
|
|
;* tmap.nas
|
|
|
|
;* The texture-mapping inner loops in pure assembly language.
|
|
|
|
;*
|
|
|
|
;*---------------------------------------------------------------------------
|
2006-06-11 01:37:00 +00:00
|
|
|
;* Copyright 1998-2006 Randy Heit
|
2006-02-24 04:48:15 +00:00
|
|
|
;* All rights reserved.
|
|
|
|
;*
|
|
|
|
;* Redistribution and use in source and binary forms, with or without
|
|
|
|
;* modification, are permitted provided that the following conditions
|
|
|
|
;* are met:
|
|
|
|
;*
|
|
|
|
;* 1. Redistributions of source code must retain the above copyright
|
|
|
|
;* notice, this list of conditions and the following disclaimer.
|
|
|
|
;* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
;* notice, this list of conditions and the following disclaimer in the
|
|
|
|
;* documentation and/or other materials provided with the distribution.
|
|
|
|
;* 3. The name of the author may not be used to endorse or promote products
|
|
|
|
;* derived from this software without specific prior written permission.
|
|
|
|
;*
|
|
|
|
;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
|
|
;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
|
|
;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
|
|
;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
|
|
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
|
|
;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
|
|
;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
;*---------------------------------------------------------------------------
|
|
|
|
;*
|
|
|
|
|
|
|
|
BITS 32
|
|
|
|
|
About a week's worth of changes here. As a heads-up, I wouldn't be
surprised if this doesn't build in Linux right now. The CMakeLists.txt
were checked with MinGW and NMake, but how they fair under Linux is an
unknown to me at this time.
- Converted most sprintf (and all wsprintf) calls to either mysnprintf or
FStrings, depending on the situation.
- Changed the strings in the wbstartstruct to be FStrings.
- Changed myvsnprintf() to output nothing if count is greater than INT_MAX.
This is so that I can use a series of mysnprintf() calls and advance the
pointer for each one. Once the pointer goes beyond the end of the buffer,
the count will go negative, but since it's an unsigned type it will be
seen as excessively huge instead. This should not be a problem, as there's
no reason for ZDoom to be using text buffers larger than 2 GB anywhere.
- Ripped out the disabled bit from FGameConfigFile::MigrateOldConfig().
- Changed CalcMapName() to return an FString instead of a pointer to a static
buffer.
- Changed startmap in d_main.cpp into an FString.
- Changed CheckWarpTransMap() to take an FString& as the first argument.
- Changed d_mapname in g_level.cpp into an FString.
- Changed DoSubstitution() in ct_chat.cpp to place the substitutions in an
FString.
- Fixed: The MAPINFO parser wrote into the string buffer to construct a map
name when given a Hexen map number. This was fine with the old scanner
code, but only a happy coincidence prevents it from crashing with the new
code
- Added the 'B' conversion specifier to StringFormat::VWorker() for printing
binary numbers.
- Added CMake support for building with MinGW, MSYS, and NMake. Linux support
is probably broken until I get around to booting into Linux again. Niceties
provided over the existing Makefiles they're replacing:
* All command-line builds can use the same build system, rather than having
a separate one for MinGW and another for Linux.
* Microsoft's NMake tool is supported as a target.
* Progress meters.
* Parallel makes work from a fresh checkout without needing to be primed
first with a single-threaded make.
* Porting to other architectures should be simplified, whenever that day
comes.
- Replaced the makewad tool with zipdir. This handles the dependency tracking
itself instead of generating an external makefile to do it, since I couldn't
figure out how to generate a makefile with an external tool and include it
with a CMake-generated makefile. Where makewad used a master list of files
to generate the package file, zipdir just zips the entire contents of one or
more directories.
- Added the gdtoa package from netlib's fp library so that ZDoom's printf-style
formatting can be entirely independant of the CRT.
SVN r1082 (trunk)
2008-07-23 04:57:26 +00:00
|
|
|
%include "valgrind.inc"
|
2008-06-08 02:31:30 +00:00
|
|
|
|
2006-02-24 04:48:15 +00:00
|
|
|
; Segment/section definition macros.
|
|
|
|
|
|
|
|
SECTION .data
|
|
|
|
|
|
|
|
%define SPACEFILLER4 (0x44444444)
|
|
|
|
|
|
|
|
; If you change this in r_draw.c, be sure to change it here, too!
|
|
|
|
FUZZTABLE equ 50
|
|
|
|
|
2008-02-28 05:24:06 +00:00
|
|
|
%ifndef M_TARGET_LINUX
|
|
|
|
|
|
|
|
%define ylookup _ylookup
|
|
|
|
%define centery _centery
|
|
|
|
%define fuzzpos _fuzzpos
|
|
|
|
%define fuzzoffset _fuzzoffset
|
|
|
|
%define NormalLight _NormalLight
|
- Ported vlinetallasm4 to AMD64 assembly. Even with the increased number of
registers AMD64 provides, this routine still needs to be written as self-
modifying code for maximum performance. The additional registers do allow
for further optimization over the x86 version by allowing all four pixels
to be in flight at the same time. The end result is that AMD64 ASM is about
2.18 times faster than AMD64 C and about 1.06 times faster than x86 ASM.
(For further comparison, AMD64 C and x86 C are practically the same for
this function.) Should I port any more assembly to AMD64, mvlineasm4 is the
most likely candidate, but it's not used enough at this point to bother.
Also, this may or may not work with Linux at the moment, since it doesn't
have the eh_handler metadata. Win64 is easier, since I just need to
structure the function prologue and epilogue properly and use some
assembler directives/macros to automatically generate the metadata. And
that brings up another point: You need YASM to assemble the AMD64 code,
because NASM doesn't support the Win64 metadata directives.
- Added an SSE version of DoBlending. This is strictly C intrinsics.
VC++ still throws around unneccessary register moves. GCC seems to be
pretty close to optimal, requiring only about 2 cycles/color. They're
both faster than my hand-written MMX routine, so I don't need to feel
bad about not hand-optimizing this for x64 builds.
- Removed an extra instruction from DoBlending_MMX, transposed two
instructions, and unrolled it once, shaving off about 80 cycles from the
time required to blend 256 palette entries. Why? Because I tried writing
a C version of the routine using compiler intrinsics and was appalled by
all the extra movq's VC++ added to the code. GCC was better, but still
generated extra instructions. I only wanted a C version because I can't
use inline assembly with VC++'s x64 compiler, and x64 assembly is a bit
of a pain. (It's a pain because Linux and Windows have different calling
conventions, and you need to maintain extra metadata for functions.) So,
the assembly version stays and the C version stays out.
- Removed all the pixel doubling r_detail modes, since the one platform they
were intended to assist (486) actually sees very little benefit from them.
- Rewrote CheckMMX in C and renamed it to CheckCPU.
- Fixed: CPUID function 0x80000005 is specified to return detailed L1 cache
only for AMD processors, so we must not use it on other architectures, or
we end up overwriting the L1 cache line size with 0 or some other number
we don't actually understand.
SVN r1134 (trunk)
2008-08-09 03:13:43 +00:00
|
|
|
%define viewheight _viewheight
|
2008-02-28 05:24:06 +00:00
|
|
|
%define fuzzviewheight _fuzzviewheight
|
|
|
|
%define CPU _CPU
|
|
|
|
|
|
|
|
%define dc_pitch _dc_pitch
|
|
|
|
%define dc_colormap _dc_colormap
|
|
|
|
%define dc_color _dc_color
|
|
|
|
%define dc_iscale _dc_iscale
|
|
|
|
%define dc_texturefrac _dc_texturefrac
|
|
|
|
%define dc_srcblend _dc_srcblend
|
|
|
|
%define dc_destblend _dc_destblend
|
|
|
|
%define dc_source _dc_source
|
|
|
|
%define dc_yl _dc_yl
|
|
|
|
%define dc_yh _dc_yh
|
|
|
|
%define dc_x _dc_x
|
|
|
|
%define dc_count _dc_count
|
|
|
|
%define dc_dest _dc_dest
|
|
|
|
%define dc_destorg _dc_destorg
|
|
|
|
|
|
|
|
%define Col2RGB8 _Col2RGB8
|
|
|
|
%define RGB32k _RGB32k
|
|
|
|
|
|
|
|
%define dc_ctspan _dc_ctspan
|
|
|
|
%define dc_temp _dc_temp
|
|
|
|
|
|
|
|
%define ds_xstep _ds_xstep
|
|
|
|
%define ds_ystep _ds_ystep
|
|
|
|
%define ds_colormap _ds_colormap
|
|
|
|
%define ds_source _ds_source
|
|
|
|
%define ds_x1 _ds_x1
|
|
|
|
%define ds_x2 _ds_x2
|
|
|
|
%define ds_xfrac _ds_xfrac
|
|
|
|
%define ds_yfrac _ds_yfrac
|
|
|
|
%define ds_y _ds_y
|
|
|
|
|
|
|
|
%define ds_cursource _ds_cursource
|
|
|
|
%define ds_curcolormap _ds_curcolormap
|
|
|
|
|
|
|
|
%define R_SetSpanSource_ASM _R_SetSpanSource_ASM
|
|
|
|
%define R_SetSpanSize_ASM _R_SetSpanSize_ASM
|
|
|
|
%define R_SetSpanColormap_ASM _R_SetSpanColormap_ASM
|
|
|
|
%define R_SetupShadedCol _R_SetupShadedCol
|
|
|
|
%define R_SetupAddCol _R_SetupAddCol
|
|
|
|
%define R_SetupAddClampCol _R_SetupAddClampCol
|
|
|
|
|
|
|
|
%endif
|
2006-02-24 04:48:15 +00:00
|
|
|
|
|
|
|
EXTERN ylookup
|
|
|
|
EXTERN centery
|
|
|
|
EXTERN fuzzpos
|
|
|
|
EXTERN fuzzoffset
|
|
|
|
EXTERN NormalLight
|
- Ported vlinetallasm4 to AMD64 assembly. Even with the increased number of
registers AMD64 provides, this routine still needs to be written as self-
modifying code for maximum performance. The additional registers do allow
for further optimization over the x86 version by allowing all four pixels
to be in flight at the same time. The end result is that AMD64 ASM is about
2.18 times faster than AMD64 C and about 1.06 times faster than x86 ASM.
(For further comparison, AMD64 C and x86 C are practically the same for
this function.) Should I port any more assembly to AMD64, mvlineasm4 is the
most likely candidate, but it's not used enough at this point to bother.
Also, this may or may not work with Linux at the moment, since it doesn't
have the eh_handler metadata. Win64 is easier, since I just need to
structure the function prologue and epilogue properly and use some
assembler directives/macros to automatically generate the metadata. And
that brings up another point: You need YASM to assemble the AMD64 code,
because NASM doesn't support the Win64 metadata directives.
- Added an SSE version of DoBlending. This is strictly C intrinsics.
VC++ still throws around unneccessary register moves. GCC seems to be
pretty close to optimal, requiring only about 2 cycles/color. They're
both faster than my hand-written MMX routine, so I don't need to feel
bad about not hand-optimizing this for x64 builds.
- Removed an extra instruction from DoBlending_MMX, transposed two
instructions, and unrolled it once, shaving off about 80 cycles from the
time required to blend 256 palette entries. Why? Because I tried writing
a C version of the routine using compiler intrinsics and was appalled by
all the extra movq's VC++ added to the code. GCC was better, but still
generated extra instructions. I only wanted a C version because I can't
use inline assembly with VC++'s x64 compiler, and x64 assembly is a bit
of a pain. (It's a pain because Linux and Windows have different calling
conventions, and you need to maintain extra metadata for functions.) So,
the assembly version stays and the C version stays out.
- Removed all the pixel doubling r_detail modes, since the one platform they
were intended to assist (486) actually sees very little benefit from them.
- Rewrote CheckMMX in C and renamed it to CheckCPU.
- Fixed: CPUID function 0x80000005 is specified to return detailed L1 cache
only for AMD processors, so we must not use it on other architectures, or
we end up overwriting the L1 cache line size with 0 or some other number
we don't actually understand.
SVN r1134 (trunk)
2008-08-09 03:13:43 +00:00
|
|
|
EXTERN viewheight
|
2006-02-24 04:48:15 +00:00
|
|
|
EXTERN fuzzviewheight
|
|
|
|
EXTERN CPU
|
|
|
|
|
|
|
|
EXTERN dc_pitch
|
|
|
|
EXTERN dc_colormap
|
2008-02-27 03:11:35 +00:00
|
|
|
EXTERN dc_color
|
2006-02-24 04:48:15 +00:00
|
|
|
EXTERN dc_iscale
|
|
|
|
EXTERN dc_texturefrac
|
2008-02-28 05:24:06 +00:00
|
|
|
EXTERN dc_srcblend
|
|
|
|
EXTERN dc_destblend
|
2006-02-24 04:48:15 +00:00
|
|
|
EXTERN dc_source
|
|
|
|
EXTERN dc_yl
|
|
|
|
EXTERN dc_yh
|
|
|
|
EXTERN dc_x
|
|
|
|
EXTERN dc_count
|
|
|
|
EXTERN dc_dest
|
|
|
|
EXTERN dc_destorg
|
|
|
|
|
|
|
|
EXTERN dc_ctspan
|
|
|
|
EXTERN dc_temp
|
|
|
|
|
2008-02-27 03:11:35 +00:00
|
|
|
EXTERN Col2RGB8
|
|
|
|
EXTERN RGB32k
|
|
|
|
|
2006-02-24 04:48:15 +00:00
|
|
|
EXTERN ds_xstep
|
|
|
|
EXTERN ds_ystep
|
|
|
|
EXTERN ds_colormap
|
|
|
|
EXTERN ds_source
|
|
|
|
EXTERN ds_x1
|
|
|
|
EXTERN ds_x2
|
|
|
|
EXTERN ds_xfrac
|
|
|
|
EXTERN ds_yfrac
|
|
|
|
EXTERN ds_y
|
|
|
|
|
|
|
|
GLOBAL ds_cursource
|
|
|
|
GLOBAL ds_curcolormap
|
|
|
|
|
|
|
|
|
|
|
|
ds_cursource:
|
|
|
|
DD 0
|
|
|
|
|
|
|
|
ds_curcolormap:
|
|
|
|
DD 0
|
|
|
|
|
|
|
|
|
|
|
|
; Local stuff:
|
|
|
|
lastAddress DD 0
|
|
|
|
pixelcount DD 0
|
|
|
|
|
|
|
|
SECTION .text
|
|
|
|
|
|
|
|
|
|
|
|
GLOBAL @R_SetSpanSource_ASM@4
|
|
|
|
GLOBAL R_SetSpanSource_ASM
|
|
|
|
|
|
|
|
R_SetSpanSource_ASM:
|
|
|
|
mov ecx,[esp+4]
|
|
|
|
|
|
|
|
@R_SetSpanSource_ASM@4:
|
|
|
|
mov [spreada+2],ecx
|
|
|
|
mov [spreadb+2],ecx
|
|
|
|
mov [spreadc+2],ecx
|
|
|
|
mov [spreadd+2],ecx
|
|
|
|
mov [spreade+2],ecx
|
|
|
|
mov [spreadf+2],ecx
|
|
|
|
mov [spreadg+2],ecx
|
|
|
|
|
|
|
|
mov [mspreada+2],ecx
|
|
|
|
mov [mspreadb+2],ecx
|
|
|
|
mov [mspreadc+2],ecx
|
|
|
|
mov [mspreadd+2],ecx
|
|
|
|
mov [mspreade+2],ecx
|
|
|
|
mov [mspreadf+2],ecx
|
|
|
|
mov [mspreadg+2],ecx
|
2008-06-08 02:31:30 +00:00
|
|
|
|
|
|
|
selfmod spreada, mspreadg+6
|
2006-02-24 04:48:15 +00:00
|
|
|
|
|
|
|
mov [ds_cursource],ecx
|
|
|
|
ret
|
|
|
|
|
|
|
|
GLOBAL @R_SetSpanColormap_ASM@4
|
|
|
|
GLOBAL R_SetSpanColormap_ASM
|
|
|
|
|
|
|
|
R_SetSpanColormap_ASM:
|
|
|
|
mov ecx,[esp+4]
|
|
|
|
|
|
|
|
@R_SetSpanColormap_ASM@4:
|
|
|
|
mov [spmapa+2],ecx
|
|
|
|
mov [spmapb+2],ecx
|
|
|
|
mov [spmapc+2],ecx
|
|
|
|
mov [spmapd+2],ecx
|
|
|
|
mov [spmape+2],ecx
|
|
|
|
mov [spmapf+2],ecx
|
|
|
|
mov [spmapg+2],ecx
|
|
|
|
|
|
|
|
mov [mspmapa+2],ecx
|
|
|
|
mov [mspmapb+2],ecx
|
|
|
|
mov [mspmapc+2],ecx
|
|
|
|
mov [mspmapd+2],ecx
|
|
|
|
mov [mspmape+2],ecx
|
|
|
|
mov [mspmapf+2],ecx
|
|
|
|
mov [mspmapg+2],ecx
|
2008-06-08 02:31:30 +00:00
|
|
|
|
|
|
|
selfmod spmapa, mspmapg+6
|
2006-02-24 04:48:15 +00:00
|
|
|
|
|
|
|
mov [ds_curcolormap],ecx
|
|
|
|
ret
|
|
|
|
|
|
|
|
GLOBAL R_SetSpanSize_ASM
|
|
|
|
|
|
|
|
EXTERN SetTiltedSpanSize
|
|
|
|
|
|
|
|
R_SetSpanSize_ASM:
|
|
|
|
mov edx,[esp+4]
|
|
|
|
mov ecx,[esp+8]
|
|
|
|
call SetTiltedSpanSize
|
|
|
|
|
|
|
|
mov [dsy1+2],dl
|
|
|
|
mov [dsy2+2],dl
|
|
|
|
|
|
|
|
mov [dsx1+2],cl
|
|
|
|
mov [dsx2+2],cl
|
|
|
|
mov [dsx3+2],cl
|
|
|
|
mov [dsx4+2],cl
|
|
|
|
mov [dsx5+2],cl
|
|
|
|
mov [dsx6+2],cl
|
|
|
|
mov [dsx7+2],cl
|
|
|
|
|
|
|
|
mov [dmsy1+2],dl
|
|
|
|
mov [dmsy2+2],dl
|
|
|
|
|
|
|
|
mov [dmsx1+2],cl
|
|
|
|
mov [dmsx2+2],cl
|
|
|
|
mov [dmsx3+2],cl
|
|
|
|
mov [dmsx4+2],cl
|
|
|
|
mov [dmsx5+2],cl
|
|
|
|
mov [dmsx6+2],cl
|
|
|
|
mov [dmsx7+2],cl
|
|
|
|
|
|
|
|
push ecx
|
|
|
|
add ecx,edx
|
|
|
|
mov eax,1
|
|
|
|
shl eax,cl
|
|
|
|
dec eax
|
|
|
|
mov [dsm1+2],eax
|
|
|
|
mov [dsm5+1],eax
|
|
|
|
mov [dsm6+1],eax
|
|
|
|
mov [dsm7+1],eax
|
|
|
|
|
|
|
|
mov [dmsm1+2],eax
|
|
|
|
mov [dmsm5+1],eax
|
|
|
|
mov [dmsm6+1],eax
|
|
|
|
mov [dmsm7+1],eax
|
|
|
|
pop ecx
|
|
|
|
ror eax,cl
|
|
|
|
mov [dsm2+2],eax
|
|
|
|
mov [dsm3+2],eax
|
|
|
|
mov [dsm4+2],eax
|
|
|
|
|
|
|
|
mov [dmsm2+2],eax
|
|
|
|
mov [dmsm3+2],eax
|
|
|
|
mov [dmsm4+2],eax
|
|
|
|
and eax,0xffff
|
|
|
|
not eax
|
|
|
|
mov [dsm8+2],eax
|
|
|
|
mov [dsm9+2],eax
|
|
|
|
|
|
|
|
mov [dmsm8+2],eax
|
|
|
|
mov [dmsm9+2],eax
|
|
|
|
|
|
|
|
neg dl
|
|
|
|
mov [dsy3+2],dl
|
|
|
|
mov [dsy4+2],dl
|
|
|
|
|
|
|
|
mov [dmsy3+2],dl
|
|
|
|
mov [dmsy4+2],dl
|
|
|
|
|
2008-06-08 02:31:30 +00:00
|
|
|
selfmod dsy1, dmsm7+6
|
|
|
|
|
2006-02-24 04:48:15 +00:00
|
|
|
aret: ret
|
|
|
|
|
2010-08-13 03:14:05 +00:00
|
|
|
%ifdef M_TARGET_MACHO
|
|
|
|
SECTION .text align=64
|
|
|
|
%else
|
2006-02-24 04:48:15 +00:00
|
|
|
SECTION .rtext progbits alloc exec write align=64
|
2010-08-13 03:14:05 +00:00
|
|
|
%endif
|
|
|
|
|
|
|
|
%ifdef M_TARGET_MACHO
|
2010-08-13 22:12:42 +00:00
|
|
|
GLOBAL _rtext_tmap_start
|
|
|
|
_rtext_tmap_start:
|
2010-08-13 03:14:05 +00:00
|
|
|
%endif
|
2006-02-24 04:48:15 +00:00
|
|
|
|
2008-06-08 02:31:30 +00:00
|
|
|
rtext_start:
|
|
|
|
|
2006-02-24 04:48:15 +00:00
|
|
|
GLOBAL @R_DrawSpanP_ASM@0
|
|
|
|
GLOBAL _R_DrawSpanP_ASM
|
|
|
|
GLOBAL R_DrawSpanP_ASM
|
|
|
|
|
|
|
|
; eax: scratch
|
|
|
|
; ebx: zero
|
|
|
|
; ecx: yfrac at top end, xfrac int part at low end
|
|
|
|
; edx: xfrac frac part at top end
|
|
|
|
; edi: dest
|
|
|
|
; ebp: scratch
|
|
|
|
; esi: count
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
@R_DrawSpanP_ASM@0:
|
|
|
|
_R_DrawSpanP_ASM:
|
|
|
|
R_DrawSpanP_ASM:
|
|
|
|
mov eax,[ds_x2]
|
|
|
|
mov ecx,[ds_x1]
|
|
|
|
sub eax,ecx
|
2007-02-02 23:41:51 +00:00
|
|
|
jl near rdspret ; count < 0: nothing to do, so leave
|
2006-02-24 04:48:15 +00:00
|
|
|
|
|
|
|
push ebx
|
|
|
|
push edi
|
|
|
|
push ebp
|
|
|
|
push esi
|
|
|
|
|
|
|
|
mov edi,ecx
|
|
|
|
add edi,[dc_destorg]
|
|
|
|
mov ecx,[ds_y]
|
|
|
|
add edi,[ylookup+ecx*4]
|
|
|
|
mov edx,[ds_xstep]
|
|
|
|
dsy1: shl edx,6
|
|
|
|
mov ebp,[ds_xstep]
|
|
|
|
dsy3: shr ebp,26
|
|
|
|
xor ebx,ebx
|
|
|
|
lea esi,[eax+1]
|
|
|
|
mov [ds_xstep],edx
|
|
|
|
mov edx,[ds_ystep]
|
|
|
|
mov ecx,[ds_xfrac]
|
|
|
|
dsy4: shr ecx,26
|
|
|
|
dsm8: and edx,0xffffffc0
|
|
|
|
or ebp,edx
|
|
|
|
mov [ds_ystep],ebp
|
|
|
|
mov ebp,[ds_yfrac]
|
|
|
|
mov edx,[ds_xfrac]
|
|
|
|
dsy2: shl edx,6
|
|
|
|
dsm9: and ebp,0xffffffc0
|
|
|
|
or ecx,ebp
|
|
|
|
shr esi,1
|
|
|
|
jnc dseven1
|
|
|
|
|
|
|
|
; do odd pixel
|
|
|
|
|
|
|
|
mov ebp,ecx
|
|
|
|
dsx1: rol ebp,6
|
|
|
|
dsm1: and ebp,0xfff
|
|
|
|
add edx,[ds_xstep]
|
|
|
|
adc ecx,[ds_ystep]
|
|
|
|
spreada mov bl,[ebp+SPACEFILLER4]
|
|
|
|
spmapa mov bl,[ebx+SPACEFILLER4]
|
|
|
|
mov [edi],bl
|
|
|
|
inc edi
|
|
|
|
|
|
|
|
dseven1 shr esi,1
|
|
|
|
jnc dsrest
|
|
|
|
|
|
|
|
; do two more pixels
|
|
|
|
mov ebp,ecx
|
|
|
|
add edx,[ds_xstep]
|
|
|
|
adc ecx,[ds_ystep]
|
|
|
|
dsm2: and ebp,0xfc00003f
|
|
|
|
dsx2: rol ebp,6
|
|
|
|
mov eax,ecx
|
|
|
|
add edx,[ds_xstep]
|
|
|
|
adc ecx,[ds_ystep]
|
|
|
|
spreadb mov bl,[ebp+SPACEFILLER4] ;read texel1
|
|
|
|
dsx3: rol eax,6
|
|
|
|
dsm6: and eax,0xfff
|
|
|
|
spmapb mov bl,[ebx+SPACEFILLER4] ;map texel1
|
|
|
|
mov [edi],bl ;store texel1
|
|
|
|
add edi,2
|
|
|
|
spreadc mov bl,[eax+SPACEFILLER4] ;read texel2
|
|
|
|
spmapc mov bl,[ebx+SPACEFILLER4] ;map texel2
|
|
|
|
mov [edi-1],bl ;store texel2
|
|
|
|
|
|
|
|
; do the rest
|
|
|
|
|
|
|
|
dsrest test esi,esi
|
|
|
|
jz near dsdone
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
dsloop mov ebp,ecx
|
|
|
|
spstep1d add edx,[ds_xstep]
|
|
|
|
spstep2d adc ecx,[ds_ystep]
|
|
|
|
dsm3: and ebp,0xfc00003f
|
|
|
|
dsx4: rol ebp,6
|
|
|
|
mov eax,ecx
|
|
|
|
spstep1e add edx,[ds_xstep]
|
|
|
|
spstep2e adc ecx,[ds_ystep]
|
|
|
|
spreadd mov bl,[ebp+SPACEFILLER4] ;read texel1
|
|
|
|
dsx5: rol eax,6
|
|
|
|
dsm5: and eax,0xfff
|
|
|
|
spmapd mov bl,[ebx+SPACEFILLER4] ;map texel1
|
|
|
|
mov [edi],bl ;store texel1
|
|
|
|
mov ebp,ecx
|
|
|
|
spreade mov bl,[eax+SPACEFILLER4] ;read texel2
|
|
|
|
spstep1f add edx,[ds_xstep]
|
|
|
|
spstep2f adc ecx,[ds_ystep]
|
|
|
|
dsm4: and ebp,0xfc00003f
|
|
|
|
dsx6: rol ebp,6
|
|
|
|
spmape mov bl,[ebx+SPACEFILLER4] ;map texel2
|
|
|
|
mov eax,ecx
|
|
|
|
mov [edi+1],bl ;store texel2
|
|
|
|
spreadf mov bl,[ebp+SPACEFILLER4] ;read texel3
|
|
|
|
spmapf mov bl,[ebx+SPACEFILLER4] ;map texel3
|
|
|
|
add edi,4
|
|
|
|
dsx7: rol eax,6
|
|
|
|
dsm7: and eax,0xfff
|
|
|
|
mov [edi-2],bl ;store texel3
|
|
|
|
spreadg mov bl,[eax+SPACEFILLER4] ;read texel4
|
|
|
|
spstep1g add edx,[ds_xstep]
|
|
|
|
spstep2g adc ecx,[ds_ystep]
|
|
|
|
spmapg mov bl,[ebx+SPACEFILLER4] ;map texel4
|
|
|
|
dec esi
|
|
|
|
mov [edi-1],bl ;store texel4
|
|
|
|
jnz near dsloop
|
|
|
|
|
|
|
|
dsdone pop esi
|
|
|
|
pop ebp
|
|
|
|
pop edi
|
|
|
|
pop ebx
|
|
|
|
|
|
|
|
rdspret ret
|
|
|
|
|
|
|
|
; This is the same as the previous routine, except it doesn't draw pixels
|
|
|
|
; where the texture's color value is 0.
|
|
|
|
|
|
|
|
GLOBAL @R_DrawSpanMaskedP_ASM@0
|
|
|
|
GLOBAL _R_DrawSpanMaskedP_ASM
|
|
|
|
GLOBAL R_DrawSpanMaskedP_ASM
|
|
|
|
|
|
|
|
; eax: scratch
|
|
|
|
; ebx: zero
|
|
|
|
; ecx: yfrac at top end, xfrac int part at low end
|
|
|
|
; edx: xfrac frac part at top end
|
|
|
|
; edi: dest
|
|
|
|
; ebp: scratch
|
|
|
|
; esi: count
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
@R_DrawSpanMaskedP_ASM@0:
|
|
|
|
_R_DrawSpanMaskedP_ASM:
|
|
|
|
R_DrawSpanMaskedP_ASM:
|
|
|
|
mov eax,[ds_x2]
|
|
|
|
mov ecx,[ds_x1]
|
|
|
|
sub eax,ecx
|
|
|
|
jl rdspret ; count < 0: nothing to do, so leave
|
|
|
|
|
|
|
|
push ebx
|
|
|
|
push edi
|
|
|
|
push ebp
|
|
|
|
push esi
|
|
|
|
|
|
|
|
mov edi,ecx
|
|
|
|
add edi,[dc_destorg]
|
|
|
|
mov ecx,[ds_y]
|
|
|
|
add edi,[ylookup+ecx*4]
|
|
|
|
mov edx,[ds_xstep]
|
|
|
|
dmsy1: shl edx,6
|
|
|
|
mov ebp,[ds_xstep]
|
|
|
|
dmsy3: shr ebp,26
|
|
|
|
xor ebx,ebx
|
|
|
|
lea esi,[eax+1]
|
|
|
|
mov [ds_xstep],edx
|
|
|
|
mov edx,[ds_ystep]
|
|
|
|
mov ecx,[ds_xfrac]
|
|
|
|
dmsy4: shr ecx,26
|
|
|
|
dmsm8: and edx,0xffffffc0
|
|
|
|
or ebp,edx
|
|
|
|
mov [ds_ystep],ebp
|
|
|
|
mov ebp,[ds_yfrac]
|
|
|
|
mov edx,[ds_xfrac]
|
|
|
|
dmsy2: shl edx,6
|
|
|
|
dmsm9: and ebp,0xffffffc0
|
|
|
|
or ecx,ebp
|
|
|
|
shr esi,1
|
|
|
|
jnc dmseven1
|
|
|
|
|
|
|
|
; do odd pixel
|
|
|
|
|
|
|
|
mov ebp,ecx
|
|
|
|
dmsx1: rol ebp,6
|
|
|
|
dmsm1: and ebp,0xfff
|
|
|
|
add edx,[ds_xstep]
|
|
|
|
adc ecx,[ds_ystep]
|
|
|
|
mspreada mov bl,[ebp+SPACEFILLER4]
|
|
|
|
cmp bl,0
|
|
|
|
je mspskipa
|
|
|
|
mspmapa mov bl,[ebx+SPACEFILLER4]
|
|
|
|
mov [edi],bl
|
|
|
|
mspskipa: inc edi
|
|
|
|
|
|
|
|
dmseven1 shr esi,1
|
|
|
|
jnc dmsrest
|
|
|
|
|
|
|
|
; do two more pixels
|
|
|
|
mov ebp,ecx
|
|
|
|
add edx,[ds_xstep]
|
|
|
|
adc ecx,[ds_ystep]
|
|
|
|
dmsm2: and ebp,0xfc00003f
|
|
|
|
dmsx2: rol ebp,6
|
|
|
|
mov eax,ecx
|
|
|
|
add edx,[ds_xstep]
|
|
|
|
adc ecx,[ds_ystep]
|
|
|
|
mspreadb mov bl,[ebp+SPACEFILLER4] ;read texel1
|
|
|
|
dmsx3: rol eax,6
|
|
|
|
dmsm6: and eax,0xfff
|
|
|
|
cmp bl,0
|
|
|
|
je mspskipb
|
|
|
|
mspmapb mov bl,[ebx+SPACEFILLER4] ;map texel1
|
|
|
|
mov [edi],bl ;store texel1
|
|
|
|
mspskipb add edi,2
|
|
|
|
mspreadc mov bl,[eax+SPACEFILLER4] ;read texel2
|
|
|
|
cmp bl,0
|
|
|
|
je dmsrest
|
|
|
|
mspmapc mov bl,[ebx+SPACEFILLER4] ;map texel2
|
|
|
|
mov [edi-1],bl ;store texel2
|
|
|
|
|
|
|
|
; do the rest
|
|
|
|
|
|
|
|
dmsrest test esi,esi
|
|
|
|
jz near dmsdone
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
dmsloop mov ebp,ecx
|
|
|
|
mspstep1d add edx,[ds_xstep]
|
|
|
|
mspstep2d adc ecx,[ds_ystep]
|
|
|
|
dmsm3: and ebp,0xfc00003f
|
|
|
|
dmsx4: rol ebp,6
|
|
|
|
mov eax,ecx
|
|
|
|
mspstep1e add edx,[ds_xstep]
|
|
|
|
mspstep2e adc ecx,[ds_ystep]
|
|
|
|
mspreadd mov bl,[ebp+SPACEFILLER4] ;read texel1
|
|
|
|
dmsx5: rol eax,6
|
|
|
|
dmsm5: and eax,0xfff
|
|
|
|
cmp bl,0
|
|
|
|
mov ebp,ecx
|
|
|
|
je mspreade
|
|
|
|
mspmapd mov bl,[ebx+SPACEFILLER4] ;map texel1
|
|
|
|
mov [edi],bl ;store texel1
|
|
|
|
mspreade mov bl,[eax+SPACEFILLER4] ;read texel2
|
|
|
|
mspstep1f add edx,[ds_xstep]
|
|
|
|
mspstep2f adc ecx,[ds_ystep]
|
|
|
|
dmsm4: and ebp,0xfc00003f
|
|
|
|
dmsx6: rol ebp,6
|
|
|
|
cmp bl,0
|
|
|
|
mov eax,ecx
|
|
|
|
je mspreadf
|
|
|
|
mspmape mov bl,[ebx+SPACEFILLER4] ;map texel2
|
|
|
|
mov [edi+1],bl ;store texel2
|
|
|
|
mspreadf mov bl,[ebp+SPACEFILLER4] ;read texel3
|
|
|
|
add edi,4
|
|
|
|
dmsx7: rol eax,6
|
|
|
|
dmsm7: and eax,0xfff
|
|
|
|
cmp bl,0
|
|
|
|
je mspreadg
|
|
|
|
mspmapf mov bl,[ebx+SPACEFILLER4] ;map texel3
|
|
|
|
mov [edi-2],bl ;store texel3
|
|
|
|
mspreadg mov bl,[eax+SPACEFILLER4] ;read texel4
|
|
|
|
mspstep1g add edx,[ds_xstep]
|
|
|
|
mspstep2g adc ecx,[ds_ystep]
|
|
|
|
cmp bl,0
|
|
|
|
je mspskipg
|
|
|
|
mspmapg mov bl,[ebx+SPACEFILLER4] ;map texel4
|
|
|
|
mov [edi-1],bl ;store texel4
|
|
|
|
mspskipg dec esi
|
|
|
|
jnz near dmsloop
|
|
|
|
|
|
|
|
dmsdone pop esi
|
|
|
|
pop ebp
|
|
|
|
pop edi
|
|
|
|
pop ebx
|
|
|
|
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;*----------------------------------------------------------------------
|
|
|
|
;*
|
|
|
|
;* R_DrawColumnP
|
|
|
|
;*
|
|
|
|
;*----------------------------------------------------------------------
|
|
|
|
|
|
|
|
GLOBAL @R_DrawColumnP_ASM@0
|
|
|
|
GLOBAL _R_DrawColumnP_ASM
|
|
|
|
GLOBAL R_DrawColumnP_ASM
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
R_DrawColumnP_ASM:
|
|
|
|
_R_DrawColumnP_ASM:
|
|
|
|
@R_DrawColumnP_ASM@0:
|
|
|
|
|
|
|
|
; count = dc_yh - dc_yl;
|
|
|
|
|
|
|
|
mov ecx,[dc_count]
|
|
|
|
test ecx,ecx
|
|
|
|
jle near rdcpret ; count <= 0: nothing to do, so leave
|
|
|
|
|
|
|
|
push ebp ; save registers
|
|
|
|
push ebx
|
|
|
|
push edi
|
|
|
|
push esi
|
|
|
|
|
|
|
|
; dest = ylookup[dc_yl] + dc_x + dc_destorg;
|
|
|
|
|
|
|
|
mov edi,[dc_dest]
|
|
|
|
mov ebp,ecx
|
|
|
|
mov ebx,[dc_texturefrac] ; ebx = frac
|
|
|
|
rdcp1: sub edi,SPACEFILLER4
|
|
|
|
mov ecx,ebx
|
|
|
|
shr ecx,16
|
|
|
|
mov esi,[dc_source]
|
|
|
|
mov edx,[dc_iscale]
|
|
|
|
mov eax,[dc_colormap]
|
|
|
|
|
|
|
|
cmp BYTE [CPU+66],byte 5
|
|
|
|
jg rdcploop2
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
; The registers should now look like this:
|
|
|
|
;
|
|
|
|
; [31 .. 16][15 .. 8][7 .. 0]
|
|
|
|
; eax [colormap ]
|
|
|
|
; ebx [yi ][yf ]
|
|
|
|
; ecx [scratch ]
|
|
|
|
; edx [dyi ][dyf ]
|
|
|
|
; esi [source texture column ]
|
|
|
|
; edi [destination screen pointer ]
|
|
|
|
; ebp [counter ]
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
|
|
; Note the partial register stalls on anything better than a Pentium
|
|
|
|
; That's why there are two versions of this loop.
|
|
|
|
|
|
|
|
rdcploop:
|
|
|
|
mov cl,[esi+ecx] ; Fetch texel
|
|
|
|
xor ch,ch
|
|
|
|
add ebx,edx ; increment frac
|
|
|
|
rdcp2: add edi,SPACEFILLER4 ; increment destination pointer
|
|
|
|
mov cl,[eax+ecx] ; colormap texel
|
|
|
|
mov [edi],cl ; Store texel
|
|
|
|
mov ecx,ebx
|
|
|
|
shr ecx,16
|
|
|
|
dec ebp
|
|
|
|
jnz rdcploop ; loop
|
|
|
|
|
|
|
|
pop esi
|
|
|
|
pop edi
|
|
|
|
pop ebx
|
|
|
|
pop ebp
|
|
|
|
rdcpret:
|
|
|
|
ret
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
rdcploop2:
|
|
|
|
movzx ecx,byte [esi+ecx] ; Fetch texel
|
|
|
|
add ebx,edx ; increment frac
|
|
|
|
mov cl,[eax+ecx] ; colormap texel
|
|
|
|
rdcp3: add edi,SPACEFILLER4 ; increment destination pointer
|
|
|
|
mov [edi],cl ; Store texel
|
|
|
|
mov ecx,ebx
|
|
|
|
shr ecx,16
|
|
|
|
dec ebp
|
|
|
|
jnz rdcploop2 ; loop
|
|
|
|
|
|
|
|
pop esi
|
|
|
|
pop edi
|
|
|
|
pop ebx
|
|
|
|
pop ebp
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;*----------------------------------------------------------------------
|
|
|
|
;*
|
|
|
|
;* R_DrawFuzzColumnP
|
|
|
|
;*
|
|
|
|
;*----------------------------------------------------------------------
|
|
|
|
|
|
|
|
GLOBAL @R_DrawFuzzColumnP_ASM@0
|
|
|
|
GLOBAL _R_DrawFuzzColumnP_ASM
|
|
|
|
GLOBAL R_DrawFuzzColumnP_ASM
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
R_DrawFuzzColumnP_ASM:
|
|
|
|
_R_DrawFuzzColumnP_ASM:
|
|
|
|
@R_DrawFuzzColumnP_ASM@0:
|
|
|
|
|
|
|
|
; Adjust borders. Low...
|
|
|
|
mov eax,[dc_yl]
|
|
|
|
push ebx
|
|
|
|
push esi
|
|
|
|
push edi
|
|
|
|
push ebp
|
|
|
|
|
|
|
|
cmp eax,0
|
|
|
|
jg .ylok
|
|
|
|
|
|
|
|
mov eax,1
|
|
|
|
nop
|
|
|
|
|
|
|
|
; ...and high.
|
|
|
|
.ylok mov edx,[fuzzviewheight]
|
|
|
|
mov esi,[dc_yh]
|
|
|
|
cmp esi,edx
|
|
|
|
jle .yhok
|
|
|
|
|
|
|
|
mov esi,edx
|
|
|
|
nop
|
|
|
|
|
|
|
|
.yhok mov edx,[dc_x]
|
|
|
|
sub esi,eax ; esi = count
|
|
|
|
js near .dfcdone ; Zero length (or less)
|
|
|
|
|
|
|
|
mov edi,[ylookup+eax*4]
|
|
|
|
mov ebx,edx
|
|
|
|
add edi,[dc_destorg]
|
|
|
|
mov eax,[NormalLight]
|
|
|
|
mov ecx,[fuzzpos]
|
|
|
|
add edi,ebx
|
|
|
|
add eax,256*6
|
|
|
|
inc esi
|
|
|
|
mov ebp,[dc_pitch]
|
|
|
|
mov edx,FUZZTABLE
|
|
|
|
test ecx,ecx
|
|
|
|
je .fuzz0
|
|
|
|
|
|
|
|
;
|
|
|
|
; esi = count
|
|
|
|
; edi = dest
|
|
|
|
; ecx = fuzzpos
|
|
|
|
; eax = colormap 6
|
|
|
|
;
|
|
|
|
|
|
|
|
; first loop: end with fuzzpos or count 0, whichever happens first
|
|
|
|
|
|
|
|
sub edx,ecx ; edx = # of entries left in fuzzoffset
|
|
|
|
mov ebx,esi
|
|
|
|
cmp esi,edx
|
|
|
|
jle .enuf
|
|
|
|
mov esi,edx
|
|
|
|
.enuf sub ebx,esi
|
|
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
|
|
push ebx
|
|
|
|
xor ebx,ebx
|
|
|
|
|
|
|
|
.loop1 inc ecx
|
|
|
|
mov bl,[edi+edx]
|
|
|
|
dec esi
|
|
|
|
mov bl,[eax+ebx]
|
|
|
|
mov [edi],bl
|
|
|
|
lea edi,[edi+ebp]
|
|
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
|
|
jnz .loop1
|
|
|
|
|
|
|
|
; second loop: Chunk it into groups of FUZZTABLE-sized spans and do those
|
|
|
|
|
|
|
|
pop esi
|
|
|
|
cmp ecx,FUZZTABLE
|
|
|
|
jl .savefuzzpos
|
|
|
|
xor ecx,ecx
|
|
|
|
nop
|
|
|
|
.fuzz0 cmp esi,FUZZTABLE
|
|
|
|
jl .chunked
|
|
|
|
|
|
|
|
.oloop lea edx,[esi-FUZZTABLE]
|
|
|
|
mov esi,FUZZTABLE
|
|
|
|
push edx
|
|
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
|
|
|
|
|
|
.iloop inc ecx
|
|
|
|
mov bl,[edi+edx]
|
|
|
|
dec esi
|
|
|
|
mov bl,[eax+ebx]
|
|
|
|
mov [edi],bl
|
|
|
|
lea edi,[edi+ebp]
|
|
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
|
|
jnz .iloop
|
|
|
|
|
|
|
|
pop esi
|
|
|
|
xor ecx,ecx
|
|
|
|
cmp esi,FUZZTABLE
|
|
|
|
jge .oloop
|
|
|
|
|
|
|
|
; third loop: Do whatever is left
|
|
|
|
|
2007-12-11 02:38:38 +00:00
|
|
|
.chunked:
|
2006-02-24 04:48:15 +00:00
|
|
|
test esi,esi
|
|
|
|
jle .savefuzzpos
|
|
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
|
|
nop
|
|
|
|
|
|
|
|
.loop3 inc ecx
|
|
|
|
mov bl,[edi+edx]
|
|
|
|
dec esi
|
|
|
|
mov bl,[eax+ebx]
|
|
|
|
mov [edi],bl
|
|
|
|
lea edi,[edi+ebp]
|
|
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
|
|
jnz .loop3
|
|
|
|
|
|
|
|
.savefuzzpos:
|
|
|
|
mov [fuzzpos],ecx
|
|
|
|
.dfcdone:
|
|
|
|
pop ebp
|
|
|
|
pop edi
|
|
|
|
pop esi
|
|
|
|
pop ebx
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
|
|
;*----------------------------------------------------------------------
|
|
|
|
;*
|
|
|
|
;* R_DrawColumnHorizP_ASM
|
|
|
|
;*
|
|
|
|
;*----------------------------------------------------------------------
|
|
|
|
|
|
|
|
GLOBAL @R_DrawColumnHorizP_ASM@0
|
|
|
|
GLOBAL _R_DrawColumnHorizP_ASM
|
|
|
|
GLOBAL R_DrawColumnHorizP_ASM
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
@R_DrawColumnHorizP_ASM@0:
|
|
|
|
R_DrawColumnHorizP_ASM:
|
|
|
|
_R_DrawColumnHorizP_ASM:
|
|
|
|
|
|
|
|
; count = dc_yh - dc_yl;
|
|
|
|
|
|
|
|
mov eax,[dc_yh]
|
|
|
|
mov ecx,[dc_yl]
|
|
|
|
sub eax,ecx
|
|
|
|
mov edx,[dc_x]
|
|
|
|
|
|
|
|
jl near .leave ; count < 0: nothing to do, so leave
|
|
|
|
|
|
|
|
push ebp ; save registers
|
|
|
|
push ebx
|
|
|
|
push edi
|
|
|
|
push esi
|
|
|
|
|
|
|
|
inc eax ; make 0 count mean 0 pixels
|
|
|
|
and edx,3
|
|
|
|
push eax
|
|
|
|
mov esi,[dc_ctspan+edx*4]
|
|
|
|
lea eax,[dc_temp+ecx*4+edx] ; eax = top of column in buffer
|
|
|
|
mov ebp,[dc_yh]
|
|
|
|
mov [esi],ecx
|
|
|
|
mov [esi+4],ebp
|
|
|
|
add esi,8
|
|
|
|
mov edi,[dc_source]
|
|
|
|
mov [dc_ctspan+edx*4],esi
|
|
|
|
mov esi,[dc_iscale]
|
|
|
|
mov ecx,[dc_texturefrac] ; ecx = frac
|
|
|
|
mov dl,[edi] ; load cache
|
|
|
|
mov ebx,[esp]
|
|
|
|
and ebx,0xfffffff8
|
|
|
|
jnz .mthan8
|
|
|
|
|
|
|
|
; Register usage in the following code is:
|
|
|
|
;
|
|
|
|
; eax: dest
|
|
|
|
; edi: source
|
|
|
|
; ecx: frac (16.16)
|
|
|
|
; esi: fracstep (16.16)
|
|
|
|
; ebx: add1
|
|
|
|
; ebp: add2
|
|
|
|
; dl: texel1
|
|
|
|
; dh: texel2
|
|
|
|
;[esp] count
|
|
|
|
|
|
|
|
; there are fewer than 8 pixels to draw
|
|
|
|
|
|
|
|
mov ebx,[esp]
|
|
|
|
.lthan8 shr ebx,1
|
|
|
|
jnc .even
|
|
|
|
|
|
|
|
; do one pixel before loop (little opportunity for pairing)
|
|
|
|
|
|
|
|
mov ebp,ecx ; copy frac to ebx
|
|
|
|
add ecx,esi ; increment frac
|
|
|
|
shr ebp,16 ; shift frac over to low end
|
|
|
|
add eax,4
|
|
|
|
mov dl,[edi+ebp]
|
|
|
|
mov [eax-4],dl
|
|
|
|
|
|
|
|
.even test ebx,ebx
|
|
|
|
jz near .done
|
|
|
|
|
|
|
|
.loop2 mov [esp],ebx ; save counter
|
|
|
|
mov ebx,ecx ; copy frac for texel1 to ebx
|
|
|
|
shr ebx,16 ; shift frac for texel1 to low end
|
|
|
|
add ecx,esi ; increment frac
|
|
|
|
mov ebp,ecx ; copy frac for texel2 to ebp
|
|
|
|
shr ebp,16 ; shift frac for texel2 to low end
|
|
|
|
add ecx,esi ; increment frac
|
|
|
|
mov dl,[edi+ebx] ; read texel1
|
|
|
|
mov ebx,[esp] ; fetch counter
|
|
|
|
mov dh,[edi+ebp] ; read texel2
|
|
|
|
mov [eax],dl ; write texel1
|
|
|
|
mov [eax+4],dh ; write texel2
|
|
|
|
add eax,8 ; increment dest
|
|
|
|
dec ebx ; decrement counter
|
|
|
|
jnz .loop2 ; loop until it hits 0
|
|
|
|
|
|
|
|
jmp .done
|
|
|
|
|
|
|
|
; there are more than 8 pixels to draw. position eax as close to a 32 byte
|
|
|
|
; boundary as possible, then do whatever is left.
|
|
|
|
|
|
|
|
.mthan8 test eax,4
|
|
|
|
jz .try2
|
|
|
|
|
|
|
|
mov ebp,ecx ; frac: in ebp
|
|
|
|
add ecx,esi ; step
|
|
|
|
shr ebp,16 ; frac: shift
|
|
|
|
add eax,4 ; increment dest
|
|
|
|
mov ebx,[esp] ; fetch counter
|
|
|
|
mov dl,[edi+ebp] ; tex: read
|
|
|
|
dec ebx ; decrement counter
|
|
|
|
mov [eax-4],dl ; tex: write
|
|
|
|
mov [esp],ebx ; store counter
|
|
|
|
|
|
|
|
.try2 test eax,8
|
|
|
|
jz .try4
|
|
|
|
|
|
|
|
mov ebx,ecx ; frac1: in ebx
|
|
|
|
add ecx,esi ; step
|
|
|
|
shr ebx,16 ; frac1: shift
|
|
|
|
mov ebp,ecx ; frac2: in ebp
|
|
|
|
shr ebp,16 ; frac2: shift
|
|
|
|
add ecx,esi ; step
|
|
|
|
mov dl,[edi+ebx] ; tex1: read
|
|
|
|
mov ebx,[esp] ; fetch counter
|
|
|
|
mov dh,[edi+ebp] ; tex2: read
|
|
|
|
mov [eax],dl ; tex1: write
|
|
|
|
mov [eax+4],dh ; tex2: write
|
|
|
|
sub ebx,2 ; decrement counter
|
|
|
|
add eax,8 ; increment dest
|
|
|
|
mov [esp],ebx ; store counter
|
|
|
|
|
|
|
|
.try4 test eax,16
|
|
|
|
jz .try8
|
|
|
|
|
|
|
|
mov ebx,ecx ; frac1: in ebx
|
|
|
|
add ecx,esi ; step
|
|
|
|
shr ebx,16 ; frac1: shift
|
|
|
|
mov ebp,ecx ; frac2: in ebp
|
|
|
|
shr ebp,16 ; frac2: shift
|
|
|
|
add ecx,esi ; step
|
|
|
|
mov dl,[edi+ebx] ; tex1: read
|
|
|
|
mov ebx,ecx ; frac3: in ebx
|
|
|
|
shr ebx,16 ; frac3: shift
|
|
|
|
mov dh,[edi+ebp] ; tex2: read
|
|
|
|
add ecx,esi ; step
|
|
|
|
mov [eax],dl ; tex1: write
|
|
|
|
mov [eax+4],dh ; tex2: write
|
|
|
|
mov ebp,ecx ; frac4: in ebp
|
|
|
|
shr ebp,16 ; frac4: shift
|
|
|
|
add ecx,esi ; step
|
|
|
|
mov dl,[edi+ebx] ; tex3: read
|
|
|
|
mov ebx,[esp] ; fetch counter
|
|
|
|
mov dh,[edi+ebp] ; tex4: read
|
|
|
|
sub ebx,4 ; decrement counter
|
|
|
|
mov [esp],ebx ; store counter
|
|
|
|
mov [eax+8],dl ; tex3: write
|
|
|
|
mov [eax+12],dh ; tex4: write
|
|
|
|
add eax,16 ; increment dest
|
|
|
|
|
|
|
|
.try8 mov ebx,[esp] ; make counter count groups of 8
|
|
|
|
sub esp,4
|
|
|
|
shr ebx,3
|
|
|
|
jmp .tail8
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
.loop8 mov [esp],ebx ; save counter
|
|
|
|
mov ebx,ecx ; frac1: in ebx
|
|
|
|
shr ebx,16 ; frac1: shift
|
|
|
|
add ecx,esi ; step
|
|
|
|
mov ebp,ecx ; frac2: in ebp
|
|
|
|
shr ebp,16 ; frac2: shift
|
|
|
|
add ecx,esi ; step
|
|
|
|
mov dl,[edi+ebx] ; tex1: read
|
|
|
|
mov ebx,ecx ; frac3: in ebx
|
|
|
|
mov dh,[edi+ebp] ; tex2: read
|
|
|
|
shr ebx,16 ; frac3: shift
|
|
|
|
add ecx,esi ; step
|
|
|
|
mov [eax],dl ; tex1: write
|
|
|
|
mov [eax+4],dh ; tex2: write
|
|
|
|
mov ebp,ecx ; frac4: in ebp
|
|
|
|
shr ebp,16 ; frac4: shift
|
|
|
|
add ecx,esi ; step
|
|
|
|
mov dl,[edi+ebx] ; tex3: read
|
|
|
|
mov ebx,ecx ; frac5: in ebx
|
|
|
|
mov dh,[edi+ebp] ; tex4: read
|
|
|
|
shr ebx,16 ; frac5: shift
|
|
|
|
mov [eax+8],dl ; tex3: write
|
|
|
|
mov [eax+12],dh ; tex4: write
|
|
|
|
add ecx,esi ; step
|
|
|
|
mov ebp,ecx ; frac6: in ebp
|
|
|
|
shr ebp,16 ; frac6: shift
|
|
|
|
mov dl,[edi+ebx] ; tex5: read
|
|
|
|
add ecx,esi ; step
|
|
|
|
mov ebx,ecx ; frac7: in ebx
|
|
|
|
mov [eax+16],dl ; tex5: write
|
|
|
|
shr ebx,16 ; frac7: shift
|
|
|
|
mov dh,[edi+ebp] ; tex6: read
|
|
|
|
add ecx,esi ; step
|
|
|
|
mov ebp,ecx ; frac8: in ebp
|
|
|
|
mov [eax+20],dh ; tex6: write
|
|
|
|
shr ebp,16 ; frac8: shift
|
|
|
|
add eax,32 ; increment dest pointer
|
|
|
|
mov dl,[edi+ebx] ; tex7: read
|
|
|
|
mov ebx,[esp] ; fetch counter
|
|
|
|
mov [eax-8],dl ; tex7: write
|
|
|
|
mov dh,[edi+ebp] ; tex8: read
|
|
|
|
add ecx,esi ; step
|
|
|
|
mov [eax-4],dh ; tex8: write
|
|
|
|
mov dl,[eax] ; load cache
|
|
|
|
dec ebx ; decrement counter
|
|
|
|
.tail8 jnz near .loop8 ; loop if more to do
|
|
|
|
|
|
|
|
pop ebp
|
|
|
|
mov ebx,[esp]
|
|
|
|
and ebx,7
|
|
|
|
jnz near .lthan8
|
|
|
|
|
|
|
|
.done pop eax
|
|
|
|
pop esi
|
|
|
|
pop edi
|
|
|
|
pop ebx
|
|
|
|
pop ebp
|
|
|
|
.leave ret
|
|
|
|
|
|
|
|
|
|
|
|
;*----------------------------------------------------------------------
|
|
|
|
;*
|
|
|
|
;* rt_copy1col_asm
|
|
|
|
;*
|
|
|
|
;* ecx = hx
|
|
|
|
;* edx = sx
|
|
|
|
;* [esp+4] = yl
|
|
|
|
;* [esp+8] = yh
|
|
|
|
;*
|
|
|
|
;*----------------------------------------------------------------------
|
|
|
|
|
|
|
|
GLOBAL @rt_copy1col_asm@16
|
|
|
|
GLOBAL _rt_copy1col_asm
|
|
|
|
GLOBAL rt_copy1col_asm
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
rt_copy1col_asm:
|
|
|
|
_rt_copy1col_asm:
|
|
|
|
pop eax
|
|
|
|
mov edx,[esp+4*3]
|
|
|
|
mov ecx,[esp+4*2]
|
|
|
|
push edx
|
|
|
|
push ecx
|
|
|
|
mov ecx,[esp+4*2]
|
|
|
|
mov edx,[esp+4*3]
|
|
|
|
push eax
|
|
|
|
|
|
|
|
@rt_copy1col_asm@16:
|
|
|
|
mov eax, [esp+4]
|
|
|
|
push ebx
|
|
|
|
mov ebx, [esp+12]
|
|
|
|
push esi
|
|
|
|
sub ebx, eax
|
|
|
|
push edi
|
|
|
|
js .done
|
|
|
|
|
|
|
|
lea esi,[eax*4]
|
|
|
|
inc ebx ; ebx = count
|
|
|
|
mov eax,edx
|
|
|
|
lea ecx,[dc_temp+ecx+esi] ; ecx = source
|
|
|
|
mov edi,[ylookup+esi]
|
|
|
|
mov esi,[dc_pitch] ; esi = pitch
|
|
|
|
add eax,edi ; eax = dest
|
|
|
|
add eax,[dc_destorg]
|
|
|
|
|
|
|
|
shr ebx,1
|
|
|
|
jnc .even
|
|
|
|
|
|
|
|
mov dl,[ecx]
|
|
|
|
add ecx,4
|
|
|
|
mov [eax],dl
|
|
|
|
add eax,esi
|
|
|
|
|
|
|
|
.even and ebx,ebx
|
|
|
|
jz .done
|
|
|
|
|
|
|
|
.loop mov dl,[ecx]
|
|
|
|
mov dh,[ecx+4]
|
|
|
|
mov [eax],dl
|
|
|
|
mov [eax+esi],dh
|
|
|
|
add ecx,8
|
|
|
|
lea eax,[eax+esi*2]
|
|
|
|
dec ebx
|
|
|
|
jnz .loop
|
|
|
|
|
|
|
|
.done pop edi
|
|
|
|
pop esi
|
|
|
|
pop ebx
|
|
|
|
ret 8
|
|
|
|
|
|
|
|
;*----------------------------------------------------------------------
|
|
|
|
;*
|
|
|
|
;* rt_copy4cols_asm
|
|
|
|
;*
|
|
|
|
;* ecx = sx
|
|
|
|
;* edx = yl
|
|
|
|
;* [esp+4] = yh
|
|
|
|
;*
|
|
|
|
;*----------------------------------------------------------------------
|
|
|
|
|
|
|
|
GLOBAL @rt_copy4cols_asm@12
|
|
|
|
GLOBAL _rt_copy4cols_asm
|
|
|
|
GLOBAL rt_copy4cols_asm
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
rt_copy4cols_asm:
|
|
|
|
_rt_copy4cols_asm:
|
|
|
|
pop eax
|
|
|
|
mov ecx,[esp+8]
|
|
|
|
mov edx,[esp+4]
|
|
|
|
push ecx
|
|
|
|
mov ecx,[esp+4]
|
|
|
|
push eax
|
|
|
|
|
|
|
|
@rt_copy4cols_asm@12:
|
|
|
|
push ebx
|
|
|
|
mov ebx,[esp+8]
|
|
|
|
push esi
|
|
|
|
sub ebx,edx
|
|
|
|
push edi
|
|
|
|
js .done
|
|
|
|
|
|
|
|
inc ebx ; ebx = count
|
|
|
|
mov eax,ecx
|
|
|
|
mov esi,[ylookup+edx*4]
|
|
|
|
lea ecx,[dc_temp+edx*4] ; ecx = source
|
|
|
|
mov edx,[dc_pitch] ; edx = pitch
|
|
|
|
add eax,esi ; eax = dest
|
|
|
|
add eax,[dc_destorg]
|
|
|
|
|
|
|
|
shr ebx,1
|
|
|
|
jnc .even
|
|
|
|
|
|
|
|
mov esi,[ecx]
|
|
|
|
add ecx,4
|
|
|
|
mov [eax],esi
|
|
|
|
add eax,edx
|
|
|
|
|
|
|
|
.even and ebx,ebx
|
|
|
|
jz .done
|
|
|
|
|
|
|
|
.loop mov esi,[ecx]
|
|
|
|
mov edi,[ecx+4]
|
|
|
|
mov [eax],esi
|
|
|
|
mov [eax+edx],edi
|
|
|
|
add ecx,8
|
|
|
|
lea eax,[eax+edx*2]
|
|
|
|
dec ebx
|
|
|
|
jnz .loop
|
|
|
|
|
|
|
|
.done pop edi
|
|
|
|
pop esi
|
|
|
|
pop ebx
|
|
|
|
ret 4
|
|
|
|
|
|
|
|
;*----------------------------------------------------------------------
|
|
|
|
;*
|
|
|
|
;* rt_map1col_asm
|
|
|
|
;*
|
|
|
|
;* ecx = hx
|
|
|
|
;* edx = sx
|
|
|
|
;* [esp+4] = yl
|
|
|
|
;* [esp+8] = yh
|
|
|
|
;*
|
|
|
|
;*----------------------------------------------------------------------
|
|
|
|
|
|
|
|
GLOBAL @rt_map1col_asm@16
|
|
|
|
GLOBAL _rt_map1col_asm
|
|
|
|
GLOBAL rt_map1col_asm
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
rt_map1col_asm:
|
|
|
|
_rt_map1col_asm:
|
|
|
|
pop eax
|
|
|
|
mov edx,[esp+4*3]
|
|
|
|
mov ecx,[esp+4*2]
|
|
|
|
push edx
|
|
|
|
push ecx
|
|
|
|
mov ecx,[esp+4*2]
|
|
|
|
mov edx,[esp+4*3]
|
|
|
|
push eax
|
|
|
|
|
|
|
|
@rt_map1col_asm@16:
|
|
|
|
mov eax,[esp+4]
|
|
|
|
push ebx
|
|
|
|
mov ebx,[esp+12]
|
|
|
|
push ebp
|
|
|
|
push esi
|
|
|
|
sub ebx, eax
|
|
|
|
push edi
|
|
|
|
js .done
|
|
|
|
|
|
|
|
lea edi,[eax*4]
|
|
|
|
mov esi,[dc_colormap] ; esi = colormap
|
|
|
|
inc ebx ; ebx = count
|
|
|
|
mov eax,edx
|
|
|
|
lea ebp,[dc_temp+ecx+edi] ; ebp = source
|
|
|
|
mov ecx,[ylookup+edi]
|
|
|
|
mov edi,[dc_pitch] ; edi = pitch
|
|
|
|
add eax,ecx ; eax = dest
|
|
|
|
xor ecx,ecx
|
|
|
|
xor edx,edx
|
|
|
|
add eax,[dc_destorg]
|
|
|
|
|
|
|
|
shr ebx,1
|
|
|
|
jnc .even
|
|
|
|
|
|
|
|
mov dl,[ebp]
|
|
|
|
add ebp,4
|
|
|
|
mov dl,[esi+edx]
|
|
|
|
mov [eax],dl
|
|
|
|
add eax,edi
|
|
|
|
|
|
|
|
.even and ebx,ebx
|
|
|
|
jz .done
|
|
|
|
|
|
|
|
.loop mov dl,[ebp]
|
|
|
|
mov cl,[ebp+4]
|
|
|
|
add ebp,8
|
|
|
|
mov dl,[esi+edx]
|
|
|
|
mov cl,[esi+ecx]
|
|
|
|
mov [eax],dl
|
|
|
|
mov [eax+edi],cl
|
|
|
|
dec ebx
|
|
|
|
lea eax,[eax+edi*2]
|
|
|
|
jnz .loop
|
|
|
|
|
|
|
|
.done pop edi
|
|
|
|
pop esi
|
|
|
|
pop ebp
|
|
|
|
pop ebx
|
|
|
|
ret 8
|
|
|
|
|
|
|
|
;*----------------------------------------------------------------------
|
|
|
|
;*
|
|
|
|
;* rt_map4cols_asm
|
|
|
|
;*
|
|
|
|
;* rt_map4cols_asm1 is for PPro and above
|
|
|
|
;* rt_map4cols_asm2 is for Pentium and below
|
|
|
|
;*
|
|
|
|
;* ecx = sx
|
|
|
|
;* edx = yl
|
|
|
|
;* [esp+4] = yh
|
|
|
|
;*
|
|
|
|
;*----------------------------------------------------------------------
|
|
|
|
|
|
|
|
GLOBAL @rt_map4cols_asm1@12
|
|
|
|
GLOBAL _rt_map4cols_asm1
|
|
|
|
GLOBAL rt_map4cols_asm1
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
rt_map4cols_asm1:
|
|
|
|
_rt_map4cols_asm1:
|
|
|
|
pop eax
|
|
|
|
mov ecx,[esp+8]
|
|
|
|
mov edx,[esp+4]
|
|
|
|
push ecx
|
|
|
|
mov ecx,[esp+4]
|
|
|
|
push eax
|
|
|
|
|
|
|
|
@rt_map4cols_asm1@12:
|
|
|
|
push ebx
|
|
|
|
mov ebx,[esp+8]
|
|
|
|
push ebp
|
|
|
|
push esi
|
|
|
|
sub ebx,edx
|
|
|
|
push edi
|
|
|
|
js near .done
|
|
|
|
|
|
|
|
mov esi,[dc_colormap] ; esi = colormap
|
|
|
|
shl edx,2
|
|
|
|
mov eax,ecx
|
|
|
|
inc ebx ; ebx = count
|
|
|
|
mov edi,[ylookup+edx]
|
|
|
|
lea ebp,[dc_temp+edx] ; ebp = source
|
|
|
|
add eax,edi ; eax = dest
|
|
|
|
mov edi,[dc_pitch] ; edi = pitch
|
|
|
|
add eax,[dc_destorg]
|
|
|
|
xor ecx,ecx
|
|
|
|
xor edx,edx
|
|
|
|
|
|
|
|
shr ebx,1
|
|
|
|
jnc .even
|
|
|
|
|
|
|
|
mov dl,[ebp]
|
|
|
|
mov cl,[ebp+1]
|
|
|
|
add ebp,4
|
|
|
|
mov dl,[esi+edx]
|
|
|
|
mov cl,[esi+ecx]
|
|
|
|
mov [eax],dl
|
|
|
|
mov [eax+1],cl
|
|
|
|
mov dl,[ebp-2]
|
|
|
|
mov cl,[ebp-1]
|
|
|
|
mov dl,[esi+edx]
|
|
|
|
mov cl,[esi+ecx]
|
|
|
|
mov [eax+2],dl
|
|
|
|
mov [eax+3],cl
|
|
|
|
add eax,edi
|
|
|
|
|
|
|
|
.even and ebx,ebx
|
|
|
|
jz .done
|
|
|
|
|
2007-12-11 02:38:38 +00:00
|
|
|
.loop:
|
2006-02-24 04:48:15 +00:00
|
|
|
mov dl,[ebp]
|
|
|
|
mov cl,[ebp+1]
|
|
|
|
add ebp,8
|
|
|
|
mov dl,[esi+edx]
|
|
|
|
mov cl,[esi+ecx]
|
|
|
|
mov [eax],dl
|
|
|
|
mov [eax+1],cl
|
|
|
|
mov dl,[ebp-6]
|
|
|
|
mov cl,[ebp-5]
|
|
|
|
mov dl,[esi+edx]
|
|
|
|
mov cl,[esi+ecx]
|
|
|
|
mov [eax+2],dl
|
|
|
|
mov [eax+3],cl
|
|
|
|
mov dl,[ebp-4]
|
|
|
|
mov cl,[ebp-3]
|
|
|
|
mov dl,[esi+edx]
|
|
|
|
mov cl,[esi+ecx]
|
|
|
|
mov [eax+edi],dl
|
|
|
|
mov [eax+edi+1],cl
|
|
|
|
mov dl,[ebp-2]
|
|
|
|
mov cl,[ebp-1]
|
|
|
|
mov dl,[esi+edx]
|
|
|
|
mov cl,[esi+ecx]
|
|
|
|
mov [eax+edi+2],dl
|
|
|
|
mov [eax+edi+3],cl
|
|
|
|
lea eax,[eax+edi*2]
|
|
|
|
dec ebx
|
|
|
|
|
|
|
|
jnz .loop
|
|
|
|
|
|
|
|
.done pop edi
|
|
|
|
pop esi
|
|
|
|
pop ebp
|
|
|
|
pop ebx
|
|
|
|
ret 4
|
|
|
|
|
|
|
|
GLOBAL @rt_map4cols_asm2@12
|
|
|
|
GLOBAL _rt_map4cols_asm2
|
|
|
|
GLOBAL rt_map4cols_asm2
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
rt_map4cols_asm2:
|
|
|
|
_rt_map4cols_asm2:
|
|
|
|
pop eax
|
|
|
|
mov ecx,[esp+8]
|
|
|
|
mov edx,[esp+4]
|
|
|
|
push ecx
|
|
|
|
mov ecx,[esp+4]
|
|
|
|
push eax
|
|
|
|
|
|
|
|
@rt_map4cols_asm2@12:
|
|
|
|
push ebx
|
|
|
|
mov ebx,[esp+8]
|
|
|
|
push ebp
|
|
|
|
push esi
|
|
|
|
sub ebx,edx
|
|
|
|
push edi
|
|
|
|
js near .done
|
|
|
|
|
|
|
|
mov esi,[dc_colormap] ; esi = colormap
|
|
|
|
shl edx,2
|
|
|
|
mov eax,ecx
|
|
|
|
inc ebx ; ebx = count
|
|
|
|
mov edi,[ylookup+edx]
|
|
|
|
lea ebp,[dc_temp+edx] ; ebp = source
|
|
|
|
add eax,edi ; eax = dest
|
|
|
|
mov edi,[dc_pitch] ; edi = pitch
|
|
|
|
add eax,[dc_destorg]
|
|
|
|
xor ecx,ecx
|
|
|
|
xor edx,edx
|
|
|
|
|
|
|
|
shr ebx,1
|
|
|
|
jnc .even
|
|
|
|
|
|
|
|
mov dl,[ebp]
|
|
|
|
mov cl,[ebp+1]
|
|
|
|
add ebp,4
|
|
|
|
mov dl,[esi+edx]
|
|
|
|
mov cl,[esi+ecx]
|
|
|
|
mov [eax],dl
|
|
|
|
mov [eax+1],cl
|
|
|
|
mov dl,[ebp-2]
|
|
|
|
mov cl,[ebp-1]
|
|
|
|
mov dl,[esi+edx]
|
|
|
|
mov cl,[esi+ecx]
|
|
|
|
mov [eax+2],dl
|
|
|
|
mov [eax+3],cl
|
|
|
|
add eax,edi
|
|
|
|
|
|
|
|
.even and ebx,ebx
|
|
|
|
jz .done
|
|
|
|
|
2007-12-11 02:38:38 +00:00
|
|
|
.loop:
|
2006-02-24 04:48:15 +00:00
|
|
|
mov dl,[ebp+3]
|
|
|
|
mov ch,[esi+edx]
|
|
|
|
mov dl,[ebp+2]
|
|
|
|
mov cl,[esi+edx]
|
|
|
|
shl ecx,16
|
|
|
|
mov dl,[ebp+1]
|
|
|
|
mov ch,[esi+edx]
|
|
|
|
mov dl,[ebp]
|
|
|
|
mov cl,[esi+edx]
|
|
|
|
mov [eax],ecx
|
|
|
|
add eax,edi
|
|
|
|
|
|
|
|
mov dl,[ebp+7]
|
|
|
|
mov ch,[esi+edx]
|
|
|
|
mov dl,[ebp+6]
|
|
|
|
mov cl,[esi+edx]
|
|
|
|
shl ecx,16
|
|
|
|
mov dl,[ebp+5]
|
|
|
|
mov ch,[esi+edx]
|
|
|
|
mov dl,[ebp+4]
|
|
|
|
mov cl,[esi+edx]
|
|
|
|
mov [eax],ecx
|
|
|
|
add eax,edi
|
|
|
|
add ebp,8
|
|
|
|
dec ebx
|
|
|
|
|
|
|
|
jnz .loop
|
|
|
|
|
|
|
|
.done pop edi
|
|
|
|
pop esi
|
|
|
|
pop ebp
|
|
|
|
pop ebx
|
|
|
|
ret 4
|
|
|
|
|
2008-02-27 03:11:35 +00:00
|
|
|
align 16
|
|
|
|
|
|
|
|
GLOBAL rt_shaded4cols_asm
|
|
|
|
GLOBAL _rt_shaded4cols_asm
|
|
|
|
|
|
|
|
rt_shaded4cols_asm:
|
|
|
|
_rt_shaded4cols_asm:
|
|
|
|
mov ecx,[esp+8]
|
|
|
|
push ebp
|
|
|
|
mov ebp,[esp+16]
|
|
|
|
sub ebp,ecx
|
|
|
|
js near s4nil
|
|
|
|
mov eax,[ylookup+ecx*4]
|
|
|
|
add eax,[dc_destorg] ; eax = destination
|
|
|
|
push ebx
|
|
|
|
push esi
|
|
|
|
inc ebp ; ebp = count
|
|
|
|
add eax,[esp+16]
|
|
|
|
push edi
|
|
|
|
lea esi,[dc_temp+ecx*4] ; esi = source
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
s4loop: movzx edx,byte [esi]
|
|
|
|
movzx ecx,byte [esi+1]
|
|
|
|
s4cm1: movzx edx,byte [SPACEFILLER4+edx] ; colormap
|
|
|
|
s4cm2: movzx edi,byte [SPACEFILLER4+ecx] ; colormap
|
|
|
|
shl edx,8
|
|
|
|
movzx ebx,byte [eax]
|
|
|
|
shl edi,8
|
|
|
|
movzx ecx,byte [eax+1]
|
|
|
|
sub ebx,edx
|
|
|
|
sub ecx,edi
|
|
|
|
mov ebx,[Col2RGB8+0x10000+ebx*4]
|
|
|
|
mov ecx,[Col2RGB8+0x10000+ecx*4]
|
|
|
|
s4fg1: add ebx,[SPACEFILLER4+edx*4]
|
|
|
|
s4fg2: add ecx,[SPACEFILLER4+edi*4]
|
|
|
|
or ebx,0x1f07c1f
|
|
|
|
or ecx,0x1f07c1f
|
|
|
|
mov edx,ebx
|
|
|
|
shr ebx,15
|
|
|
|
mov edi,ecx
|
|
|
|
shr ecx,15
|
|
|
|
and edx,ebx
|
|
|
|
and ecx,edi
|
|
|
|
mov bl,[RGB32k+edx]
|
|
|
|
movzx edx,byte [esi+2]
|
|
|
|
mov bh,[RGB32k+ecx]
|
|
|
|
movzx ecx,byte [esi+3]
|
|
|
|
mov [eax],bl
|
|
|
|
mov [eax+1],bh
|
|
|
|
|
|
|
|
s4cm3: movzx edx,byte [SPACEFILLER4+edx] ; colormap
|
|
|
|
s4cm4: movzx edi,byte [SPACEFILLER4+ecx] ; colormap
|
|
|
|
shl edx,8
|
|
|
|
movzx ebx,byte [eax+2]
|
|
|
|
shl edi,8
|
|
|
|
movzx ecx,byte [eax+3]
|
|
|
|
sub ebx,edx
|
|
|
|
sub ecx,edi
|
|
|
|
mov ebx,[Col2RGB8+0x10000+ebx*4]
|
|
|
|
mov ecx,[Col2RGB8+0x10000+ecx*4]
|
|
|
|
s4fg3: add ebx,[SPACEFILLER4+edx*4]
|
|
|
|
s4fg4: add ecx,[SPACEFILLER4+edi*4]
|
|
|
|
or ebx,0x1f07c1f
|
|
|
|
or ecx,0x1f07c1f
|
|
|
|
mov edx,ebx
|
|
|
|
shr ebx,15
|
|
|
|
mov edi,ecx
|
|
|
|
shr ecx,15
|
|
|
|
and edx,ebx
|
|
|
|
and ecx,edi
|
|
|
|
s4p: add eax,320 ; pitch
|
|
|
|
add esi,4
|
|
|
|
mov bl,[RGB32k+edx]
|
|
|
|
mov bh,[RGB32k+ecx]
|
|
|
|
s4p2: mov [eax-320+2],bl
|
|
|
|
s4p3: mov [eax-320+3],bh
|
|
|
|
dec ebp
|
|
|
|
jne s4loop
|
|
|
|
|
|
|
|
pop edi
|
|
|
|
pop esi
|
|
|
|
pop ebx
|
|
|
|
s4nil: pop ebp
|
|
|
|
ret
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
2008-02-28 05:24:06 +00:00
|
|
|
GLOBAL rt_add4cols_asm
|
|
|
|
GLOBAL _rt_add4cols_asm
|
|
|
|
|
|
|
|
rt_add4cols_asm:
|
|
|
|
_rt_add4cols_asm:
|
|
|
|
mov ecx,[esp+8]
|
|
|
|
push edi
|
|
|
|
mov edi,[esp+16]
|
|
|
|
sub edi,ecx
|
|
|
|
js near a4nil
|
|
|
|
mov eax,[ylookup+ecx*4]
|
|
|
|
add eax,[dc_destorg]
|
|
|
|
push ebx
|
|
|
|
push esi
|
|
|
|
push ebp
|
|
|
|
inc edi
|
|
|
|
add eax,[esp+20]
|
|
|
|
lea esi,[dc_temp+ecx*4]
|
|
|
|
|
|
|
|
align 16
|
|
|
|
a4loop:
|
|
|
|
movzx ebx,byte [esi]
|
|
|
|
movzx edx,byte [esi+1]
|
|
|
|
movzx ecx,byte [eax]
|
|
|
|
movzx ebp,byte [eax+1]
|
|
|
|
a4cm1: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap
|
|
|
|
a4cm2: movzx edx,byte [SPACEFILLER4+edx] ; colormap
|
|
|
|
a4bg1: mov ecx,[SPACEFILLER4+ecx*4] ; bg2rgb
|
|
|
|
a4bg2: mov ebp,[SPACEFILLER4+ebp*4] ; bg2rgb
|
|
|
|
a4fg1: add ecx,[SPACEFILLER4+ebx*4] ; fg2rgb
|
|
|
|
a4fg2: add ebp,[SPACEFILLER4+edx*4] ; fg2rgb
|
|
|
|
or ecx,0x01f07c1f
|
|
|
|
or ebp,0x01f07c1f
|
|
|
|
mov ebx,ecx
|
|
|
|
shr ecx,15
|
|
|
|
mov edx,ebp
|
|
|
|
shr ebp,15
|
|
|
|
and ecx,ebx
|
|
|
|
and ebp,edx
|
|
|
|
movzx ebx,byte [esi+2]
|
|
|
|
movzx edx,byte [esi+3]
|
|
|
|
mov cl,[RGB32k+ecx]
|
|
|
|
mov ch,[RGB32k+ebp]
|
|
|
|
mov [eax],cl
|
|
|
|
mov [eax+1],ch
|
|
|
|
|
|
|
|
movzx ecx,byte [eax+2]
|
|
|
|
movzx ebp,byte [eax+3]
|
|
|
|
a4cm3: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap
|
|
|
|
a4cm4: movzx edx,byte [SPACEFILLER4+edx] ; colormap
|
|
|
|
a4bg3: mov ecx,[SPACEFILLER4+ecx*4] ; bg2rgb
|
|
|
|
a4bg4: mov ebp,[SPACEFILLER4+ebp*4] ; bg2rgb
|
|
|
|
a4fg3: add ecx,[SPACEFILLER4+ebx*4] ; fg2rgb
|
|
|
|
a4fg4: add ebp,[SPACEFILLER4+edx*4] ; fg2rgb
|
|
|
|
or ecx,0x01f07c1f
|
|
|
|
or ebp,0x01f07c1f
|
|
|
|
mov ebx,ecx
|
|
|
|
shr ecx,15
|
|
|
|
mov edx,ebp
|
|
|
|
shr ebp,15
|
|
|
|
and ebx,ecx
|
|
|
|
and edx,ebp
|
|
|
|
mov cl,[RGB32k+ebx]
|
|
|
|
mov ch,[RGB32k+edx]
|
|
|
|
mov [eax+2],cl
|
|
|
|
mov [eax+3],ch
|
|
|
|
|
|
|
|
add esi,4
|
|
|
|
a4p: add eax,320 ; pitch
|
|
|
|
sub edi,1
|
|
|
|
jne a4loop
|
|
|
|
pop ebp
|
|
|
|
pop esi
|
|
|
|
pop ebx
|
|
|
|
a4nil: pop edi
|
|
|
|
ret
|
|
|
|
|
|
|
|
align 16
|
|
|
|
|
|
|
|
GLOBAL rt_addclamp4cols_asm
|
|
|
|
GLOBAL _rt_addclamp4cols_asm
|
|
|
|
|
|
|
|
rt_addclamp4cols_asm:
|
|
|
|
_rt_addclamp4cols_asm:
|
|
|
|
mov ecx,[esp+8]
|
|
|
|
push edi
|
|
|
|
mov edi,[esp+16]
|
|
|
|
sub edi,ecx
|
|
|
|
js near ac4nil
|
|
|
|
mov eax,[ylookup+ecx*4]
|
|
|
|
add eax,[dc_destorg]
|
|
|
|
push ebx
|
|
|
|
push esi
|
|
|
|
push ebp
|
|
|
|
inc edi
|
|
|
|
add eax,[esp+20]
|
|
|
|
lea esi,[dc_temp+ecx*4]
|
|
|
|
push edi
|
|
|
|
|
|
|
|
align 16
|
|
|
|
ac4loop:
|
|
|
|
movzx ebx,byte [esi]
|
|
|
|
movzx edx,byte [esi+1]
|
|
|
|
mov [esp],edi
|
|
|
|
ac4cm1: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap
|
|
|
|
ac4cm2: movzx edx,byte [SPACEFILLER4+edx] ; colormap
|
|
|
|
movzx ecx,byte [eax]
|
|
|
|
movzx ebp,byte [eax+1]
|
2008-02-29 23:31:40 +00:00
|
|
|
ac4fg1: mov ebx,[SPACEFILLER4+ebx*4] ; fg2rgb
|
|
|
|
ac4fg2: mov edx,[SPACEFILLER4+edx*4] ; fg2rgb
|
2008-02-28 05:24:06 +00:00
|
|
|
ac4bg1: add ebx,[SPACEFILLER4+ecx*4] ; bg2rgb
|
|
|
|
ac4bg2: add edx,[SPACEFILLER4+ebp*4] ; bg2rgb
|
|
|
|
mov ecx,ebx
|
|
|
|
or ebx,0x01f07c1f
|
|
|
|
and ecx,0x40100400
|
|
|
|
and ebx,0x3fffffff
|
|
|
|
mov edi,ecx
|
|
|
|
shr ecx,5
|
|
|
|
mov ebp,edx
|
|
|
|
sub edi,ecx
|
|
|
|
or edx,0x01f07c1f
|
|
|
|
or ebx,edi
|
|
|
|
mov ecx,ebx
|
|
|
|
shr ebx,15
|
|
|
|
and ebp,0x40100400
|
|
|
|
and ebx,ecx
|
|
|
|
and edx,0x3fffffff
|
|
|
|
mov edi,ebp
|
|
|
|
shr ebp,5
|
|
|
|
mov cl,[RGB32k+ebx]
|
|
|
|
sub edi,ebp
|
|
|
|
mov [eax],cl
|
|
|
|
or edx,edi
|
|
|
|
mov ebp,edx
|
|
|
|
shr edx,15
|
|
|
|
movzx ebx,byte [esi+2]
|
|
|
|
and ebp,edx
|
|
|
|
movzx edx,byte [esi+3]
|
|
|
|
ac4cm3: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap
|
|
|
|
mov cl,[RGB32k+ebp]
|
|
|
|
ac4cm4: movzx edx,byte [SPACEFILLER4+edx] ; colormap
|
|
|
|
mov [eax+1],cl
|
|
|
|
movzx ecx,byte [eax+2]
|
|
|
|
movzx ebp,byte [eax+3]
|
2008-02-29 23:31:40 +00:00
|
|
|
ac4fg3: mov ebx,[SPACEFILLER4+ebx*4] ; fg2rgb
|
|
|
|
ac4fg4: mov edx,[SPACEFILLER4+edx*4] ; fg2rgb
|
2008-02-28 05:24:06 +00:00
|
|
|
ac4bg3: add ebx,[SPACEFILLER4+ecx*4] ; bg2rgb
|
|
|
|
ac4bg4: add edx,[SPACEFILLER4+ebp*4] ; bg2rgb
|
|
|
|
mov ecx,ebx
|
|
|
|
or ebx,0x01f07c1f
|
|
|
|
and ecx,0x40100400
|
|
|
|
and ebx,0x3fffffff
|
|
|
|
mov edi,ecx
|
|
|
|
shr ecx,5
|
|
|
|
mov ebp,edx
|
|
|
|
sub edi,ecx
|
|
|
|
or edx,0x01f07c1f
|
|
|
|
or ebx,edi
|
|
|
|
mov ecx,ebx
|
|
|
|
shr ebx,15
|
|
|
|
and ebp,0x40100400
|
|
|
|
and ebx,ecx
|
|
|
|
and edx,0x3fffffff
|
|
|
|
mov edi,ebp
|
|
|
|
shr ebp,5
|
|
|
|
mov cl,[RGB32k+ebx]
|
|
|
|
sub edi,ebp
|
|
|
|
mov [eax+2],cl
|
|
|
|
or edx,edi
|
|
|
|
mov edi,[esp]
|
|
|
|
mov ebp,edx
|
|
|
|
shr edx,15
|
|
|
|
add esi,4
|
|
|
|
and edx,ebp
|
|
|
|
mov cl,[RGB32k+edx]
|
|
|
|
mov [eax+3],cl
|
|
|
|
|
|
|
|
ac4p: add eax,320 ; pitch
|
|
|
|
sub edi,1
|
|
|
|
jne ac4loop
|
|
|
|
pop edi
|
|
|
|
|
|
|
|
pop ebp
|
|
|
|
pop esi
|
|
|
|
pop ebx
|
|
|
|
ac4nil: pop edi
|
|
|
|
ret
|
|
|
|
|
2008-06-08 02:31:30 +00:00
|
|
|
rtext_end:
|
2010-08-13 03:14:05 +00:00
|
|
|
%ifdef M_TARGET_MACHO
|
2010-08-13 22:12:42 +00:00
|
|
|
GLOBAL _rtext_tmap_end
|
|
|
|
_rtext_tmap_end:
|
2010-08-13 03:14:05 +00:00
|
|
|
%endif
|
2008-02-28 05:24:06 +00:00
|
|
|
align 16
|
|
|
|
|
2006-02-24 04:48:15 +00:00
|
|
|
;************************
|
|
|
|
|
|
|
|
SECTION .text
|
|
|
|
|
2008-02-27 03:11:35 +00:00
|
|
|
GLOBAL R_SetupShadedCol
|
|
|
|
GLOBAL @R_SetupShadedCol@0
|
|
|
|
|
|
|
|
# Patch the values of dc_colormap and dc_color into the shaded column drawer.
|
|
|
|
|
|
|
|
R_SetupShadedCol:
|
|
|
|
@R_SetupShadedCol@0:
|
|
|
|
mov eax,[dc_colormap]
|
2008-02-28 22:15:24 +00:00
|
|
|
cmp [s4cm1+3],eax
|
|
|
|
je .cmdone
|
2008-02-27 03:11:35 +00:00
|
|
|
mov [s4cm1+3],eax
|
|
|
|
mov [s4cm2+3],eax
|
|
|
|
mov [s4cm3+3],eax
|
|
|
|
mov [s4cm4+3],eax
|
2008-02-28 22:15:24 +00:00
|
|
|
.cmdone mov eax,[dc_color]
|
2008-02-27 03:11:35 +00:00
|
|
|
lea eax,[Col2RGB8+eax*4]
|
2008-02-28 22:15:24 +00:00
|
|
|
cmp [s4fg1+3],eax
|
|
|
|
je .cdone
|
2008-02-27 03:11:35 +00:00
|
|
|
mov [s4fg1+3],eax
|
|
|
|
mov [s4fg2+3],eax
|
|
|
|
mov [s4fg3+3],eax
|
|
|
|
mov [s4fg4+3],eax
|
2008-06-08 02:31:30 +00:00
|
|
|
selfmod s4cm1, s4fg4+7
|
2008-02-28 22:15:24 +00:00
|
|
|
.cdone ret
|
2008-02-27 03:11:35 +00:00
|
|
|
|
2008-02-28 05:24:06 +00:00
|
|
|
GLOBAL R_SetupAddCol
|
|
|
|
GLOBAL @R_SetupAddCol@0
|
|
|
|
|
|
|
|
# Patch the values of dc_colormap, dc_srcblend, and dc_destblend into the
|
|
|
|
# unclamped adding column drawer.
|
|
|
|
|
|
|
|
R_SetupAddCol:
|
|
|
|
@R_SetupAddCol@0:
|
|
|
|
mov eax,[dc_colormap]
|
2008-02-28 22:15:24 +00:00
|
|
|
cmp [a4cm1+3],eax
|
|
|
|
je .cmdone
|
2008-02-28 05:24:06 +00:00
|
|
|
mov [a4cm1+3],eax
|
|
|
|
mov [a4cm2+3],eax
|
|
|
|
mov [a4cm3+3],eax
|
|
|
|
mov [a4cm4+3],eax
|
2008-02-28 22:15:24 +00:00
|
|
|
.cmdone mov eax,[dc_srcblend]
|
|
|
|
cmp [a4fg1+3],eax
|
|
|
|
je .sbdone
|
2008-02-28 05:24:06 +00:00
|
|
|
mov [a4fg1+3],eax
|
|
|
|
mov [a4fg2+3],eax
|
|
|
|
mov [a4fg3+3],eax
|
|
|
|
mov [a4fg4+3],eax
|
2008-02-28 22:15:24 +00:00
|
|
|
.sbdone mov eax,[dc_destblend]
|
|
|
|
cmp [a4bg1+3],eax
|
|
|
|
je .dbdone
|
2008-02-28 05:24:06 +00:00
|
|
|
mov [a4bg1+3],eax
|
|
|
|
mov [a4bg2+3],eax
|
|
|
|
mov [a4bg3+3],eax
|
|
|
|
mov [a4bg4+3],eax
|
2008-06-08 02:31:30 +00:00
|
|
|
selfmod a4cm1, a4bg4+7
|
2008-02-28 22:15:24 +00:00
|
|
|
.dbdone ret
|
2008-02-28 05:24:06 +00:00
|
|
|
|
|
|
|
GLOBAL R_SetupAddClampCol
|
|
|
|
GLOBAL @R_SetupAddClampCol@0
|
|
|
|
|
|
|
|
# Patch the values of dc_colormap, dc_srcblend, and dc_destblend into the
|
|
|
|
# add with clamping column drawer.
|
|
|
|
|
|
|
|
R_SetupAddClampCol:
|
|
|
|
@R_SetupAddClampCol@0:
|
|
|
|
mov eax,[dc_colormap]
|
2008-02-29 23:31:40 +00:00
|
|
|
cmp [ac4cm1+3],eax
|
2008-02-28 22:15:24 +00:00
|
|
|
je .cmdone
|
2008-02-28 05:24:06 +00:00
|
|
|
mov [ac4cm1+3],eax
|
|
|
|
mov [ac4cm2+3],eax
|
|
|
|
mov [ac4cm3+3],eax
|
|
|
|
mov [ac4cm4+3],eax
|
2008-02-28 22:15:24 +00:00
|
|
|
.cmdone mov eax,[dc_srcblend]
|
|
|
|
cmp [ac4fg1+3],eax
|
|
|
|
je .sbdone
|
2008-02-28 05:24:06 +00:00
|
|
|
mov [ac4fg1+3],eax
|
|
|
|
mov [ac4fg2+3],eax
|
|
|
|
mov [ac4fg3+3],eax
|
|
|
|
mov [ac4fg4+3],eax
|
2008-02-28 22:15:24 +00:00
|
|
|
.sbdone mov eax,[dc_destblend]
|
|
|
|
cmp [ac4bg1+3],eax
|
|
|
|
je .dbdone
|
2008-02-28 05:24:06 +00:00
|
|
|
mov [ac4bg1+3],eax
|
|
|
|
mov [ac4bg2+3],eax
|
|
|
|
mov [ac4bg3+3],eax
|
|
|
|
mov [ac4bg4+3],eax
|
2008-06-08 02:31:30 +00:00
|
|
|
selfmod ac4cm1, ac4bg4+7
|
2008-02-28 22:15:24 +00:00
|
|
|
.dbdone ret
|
2008-02-28 05:24:06 +00:00
|
|
|
|
2006-02-24 04:48:15 +00:00
|
|
|
EXTERN setvlinebpl_
|
|
|
|
EXTERN setpitch3
|
|
|
|
|
|
|
|
GLOBAL @ASM_PatchPitch@0
|
|
|
|
GLOBAL _ASM_PatchPitch
|
|
|
|
GLOBAL ASM_PatchPitch
|
|
|
|
|
|
|
|
ASM_PatchPitch:
|
|
|
|
_ASM_PatchPitch:
|
|
|
|
@ASM_PatchPitch@0:
|
2008-02-27 03:11:35 +00:00
|
|
|
mov eax,[dc_pitch]
|
|
|
|
mov [rdcp1+2],eax
|
|
|
|
mov [rdcp2+2],eax
|
|
|
|
mov [rdcp3+2],eax
|
|
|
|
mov [s4p+1],eax
|
2008-02-28 05:24:06 +00:00
|
|
|
mov [a4p+1],eax
|
|
|
|
mov [ac4p+1],eax
|
2008-02-27 03:11:35 +00:00
|
|
|
mov ecx,eax
|
|
|
|
neg ecx
|
|
|
|
inc ecx
|
|
|
|
inc ecx
|
|
|
|
mov [s4p2+2],ecx
|
|
|
|
inc ecx
|
|
|
|
mov [s4p3+2],ecx
|
2008-06-08 02:31:30 +00:00
|
|
|
selfmod rtext_start, rtext_end
|
2008-02-27 03:11:35 +00:00
|
|
|
call setpitch3
|
|
|
|
jmp setvlinebpl_
|