mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-24 04:51:41 +00:00
37658cb4f7
SVN r3090 (trunk)
1893 lines
37 KiB
NASM
1893 lines
37 KiB
NASM
;*
|
|
;* tmap.nas
|
|
;* The texture-mapping inner loops in pure assembly language.
|
|
;*
|
|
;*---------------------------------------------------------------------------
|
|
;* Copyright 1998-2006 Randy Heit
|
|
;* All rights reserved.
|
|
;*
|
|
;* Redistribution and use in source and binary forms, with or without
|
|
;* modification, are permitted provided that the following conditions
|
|
;* are met:
|
|
;*
|
|
;* 1. Redistributions of source code must retain the above copyright
|
|
;* notice, this list of conditions and the following disclaimer.
|
|
;* 2. Redistributions in binary form must reproduce the above copyright
|
|
;* notice, this list of conditions and the following disclaimer in the
|
|
;* documentation and/or other materials provided with the distribution.
|
|
;* 3. The name of the author may not be used to endorse or promote products
|
|
;* derived from this software without specific prior written permission.
|
|
;*
|
|
;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
;*---------------------------------------------------------------------------
|
|
;*
|
|
|
|
BITS 32
|
|
|
|
%include "valgrind.inc"
|
|
|
|
; Segment/section definition macros.
|
|
|
|
SECTION .data
|
|
|
|
%define SPACEFILLER4 (0x44444444)
|
|
|
|
; If you change this in r_draw.c, be sure to change it here, too!
|
|
FUZZTABLE equ 50
|
|
|
|
%ifndef M_TARGET_LINUX
|
|
|
|
%define ylookup _ylookup
|
|
%define centery _centery
|
|
%define fuzzpos _fuzzpos
|
|
%define fuzzoffset _fuzzoffset
|
|
%define NormalLight _NormalLight
|
|
%define viewheight _viewheight
|
|
%define fuzzviewheight _fuzzviewheight
|
|
%define CPU _CPU
|
|
|
|
%define dc_pitch _dc_pitch
|
|
%define dc_colormap _dc_colormap
|
|
%define dc_color _dc_color
|
|
%define dc_iscale _dc_iscale
|
|
%define dc_texturefrac _dc_texturefrac
|
|
%define dc_srcblend _dc_srcblend
|
|
%define dc_destblend _dc_destblend
|
|
%define dc_source _dc_source
|
|
%define dc_yl _dc_yl
|
|
%define dc_yh _dc_yh
|
|
%define dc_x _dc_x
|
|
%define dc_count _dc_count
|
|
%define dc_dest _dc_dest
|
|
%define dc_destorg _dc_destorg
|
|
|
|
%define Col2RGB8 _Col2RGB8
|
|
%define RGB32k _RGB32k
|
|
|
|
%define dc_ctspan _dc_ctspan
|
|
%define dc_temp _dc_temp
|
|
|
|
%define ds_xstep _ds_xstep
|
|
%define ds_ystep _ds_ystep
|
|
%define ds_colormap _ds_colormap
|
|
%define ds_source _ds_source
|
|
%define ds_x1 _ds_x1
|
|
%define ds_x2 _ds_x2
|
|
%define ds_xfrac _ds_xfrac
|
|
%define ds_yfrac _ds_yfrac
|
|
%define ds_y _ds_y
|
|
|
|
%define ds_cursource _ds_cursource
|
|
%define ds_curcolormap _ds_curcolormap
|
|
|
|
%define R_SetSpanSource_ASM _R_SetSpanSource_ASM
|
|
%define R_SetSpanSize_ASM _R_SetSpanSize_ASM
|
|
%define R_SetSpanColormap_ASM _R_SetSpanColormap_ASM
|
|
%define R_SetupShadedCol _R_SetupShadedCol
|
|
%define R_SetupAddCol _R_SetupAddCol
|
|
%define R_SetupAddClampCol _R_SetupAddClampCol
|
|
|
|
%endif
|
|
|
|
EXTERN ylookup
|
|
EXTERN centery
|
|
EXTERN fuzzpos
|
|
EXTERN fuzzoffset
|
|
EXTERN NormalLight
|
|
EXTERN viewheight
|
|
EXTERN fuzzviewheight
|
|
EXTERN CPU
|
|
|
|
EXTERN dc_pitch
|
|
EXTERN dc_colormap
|
|
EXTERN dc_color
|
|
EXTERN dc_iscale
|
|
EXTERN dc_texturefrac
|
|
EXTERN dc_srcblend
|
|
EXTERN dc_destblend
|
|
EXTERN dc_source
|
|
EXTERN dc_yl
|
|
EXTERN dc_yh
|
|
EXTERN dc_x
|
|
EXTERN dc_count
|
|
EXTERN dc_dest
|
|
EXTERN dc_destorg
|
|
|
|
EXTERN dc_ctspan
|
|
EXTERN dc_temp
|
|
|
|
EXTERN Col2RGB8
|
|
EXTERN RGB32k
|
|
|
|
EXTERN ds_xstep
|
|
EXTERN ds_ystep
|
|
EXTERN ds_colormap
|
|
EXTERN ds_source
|
|
EXTERN ds_x1
|
|
EXTERN ds_x2
|
|
EXTERN ds_xfrac
|
|
EXTERN ds_yfrac
|
|
EXTERN ds_y
|
|
|
|
GLOBAL ds_cursource
|
|
GLOBAL ds_curcolormap
|
|
|
|
|
|
ds_cursource:
|
|
DD 0
|
|
|
|
ds_curcolormap:
|
|
DD 0
|
|
|
|
|
|
; Local stuff:
|
|
lastAddress DD 0
|
|
pixelcount DD 0
|
|
|
|
SECTION .text
|
|
|
|
|
|
GLOBAL @R_SetSpanSource_ASM@4
|
|
GLOBAL R_SetSpanSource_ASM
|
|
|
|
R_SetSpanSource_ASM:
|
|
mov ecx,[esp+4]
|
|
|
|
@R_SetSpanSource_ASM@4:
|
|
mov [spreada+2],ecx
|
|
mov [spreadb+2],ecx
|
|
mov [spreadc+2],ecx
|
|
mov [spreadd+2],ecx
|
|
mov [spreade+2],ecx
|
|
mov [spreadf+2],ecx
|
|
mov [spreadg+2],ecx
|
|
|
|
mov [mspreada+2],ecx
|
|
mov [mspreadb+2],ecx
|
|
mov [mspreadc+2],ecx
|
|
mov [mspreadd+2],ecx
|
|
mov [mspreade+2],ecx
|
|
mov [mspreadf+2],ecx
|
|
mov [mspreadg+2],ecx
|
|
|
|
selfmod spreada, mspreadg+6
|
|
|
|
mov [ds_cursource],ecx
|
|
ret
|
|
|
|
GLOBAL @R_SetSpanColormap_ASM@4
|
|
GLOBAL R_SetSpanColormap_ASM
|
|
|
|
R_SetSpanColormap_ASM:
|
|
mov ecx,[esp+4]
|
|
|
|
@R_SetSpanColormap_ASM@4:
|
|
mov [spmapa+2],ecx
|
|
mov [spmapb+2],ecx
|
|
mov [spmapc+2],ecx
|
|
mov [spmapd+2],ecx
|
|
mov [spmape+2],ecx
|
|
mov [spmapf+2],ecx
|
|
mov [spmapg+2],ecx
|
|
|
|
mov [mspmapa+2],ecx
|
|
mov [mspmapb+2],ecx
|
|
mov [mspmapc+2],ecx
|
|
mov [mspmapd+2],ecx
|
|
mov [mspmape+2],ecx
|
|
mov [mspmapf+2],ecx
|
|
mov [mspmapg+2],ecx
|
|
|
|
selfmod spmapa, mspmapg+6
|
|
|
|
mov [ds_curcolormap],ecx
|
|
ret
|
|
|
|
GLOBAL R_SetSpanSize_ASM
|
|
|
|
EXTERN SetTiltedSpanSize
|
|
|
|
R_SetSpanSize_ASM:
|
|
mov edx,[esp+4]
|
|
mov ecx,[esp+8]
|
|
call SetTiltedSpanSize
|
|
|
|
mov [dsy1+2],dl
|
|
mov [dsy2+2],dl
|
|
|
|
mov [dsx1+2],cl
|
|
mov [dsx2+2],cl
|
|
mov [dsx3+2],cl
|
|
mov [dsx4+2],cl
|
|
mov [dsx5+2],cl
|
|
mov [dsx6+2],cl
|
|
mov [dsx7+2],cl
|
|
|
|
mov [dmsy1+2],dl
|
|
mov [dmsy2+2],dl
|
|
|
|
mov [dmsx1+2],cl
|
|
mov [dmsx2+2],cl
|
|
mov [dmsx3+2],cl
|
|
mov [dmsx4+2],cl
|
|
mov [dmsx5+2],cl
|
|
mov [dmsx6+2],cl
|
|
mov [dmsx7+2],cl
|
|
|
|
push ecx
|
|
add ecx,edx
|
|
mov eax,1
|
|
shl eax,cl
|
|
dec eax
|
|
mov [dsm1+2],eax
|
|
mov [dsm5+1],eax
|
|
mov [dsm6+1],eax
|
|
mov [dsm7+1],eax
|
|
|
|
mov [dmsm1+2],eax
|
|
mov [dmsm5+1],eax
|
|
mov [dmsm6+1],eax
|
|
mov [dmsm7+1],eax
|
|
pop ecx
|
|
ror eax,cl
|
|
mov [dsm2+2],eax
|
|
mov [dsm3+2],eax
|
|
mov [dsm4+2],eax
|
|
|
|
mov [dmsm2+2],eax
|
|
mov [dmsm3+2],eax
|
|
mov [dmsm4+2],eax
|
|
and eax,0xffff
|
|
not eax
|
|
mov [dsm8+2],eax
|
|
mov [dsm9+2],eax
|
|
|
|
mov [dmsm8+2],eax
|
|
mov [dmsm9+2],eax
|
|
|
|
neg dl
|
|
mov [dsy3+2],dl
|
|
mov [dsy4+2],dl
|
|
|
|
mov [dmsy3+2],dl
|
|
mov [dmsy4+2],dl
|
|
|
|
selfmod dsy1, dmsm7+6
|
|
|
|
aret: ret
|
|
|
|
%ifdef M_TARGET_MACHO
|
|
SECTION .text align=64
|
|
%else
|
|
SECTION .rtext progbits alloc exec write align=64
|
|
%endif
|
|
|
|
%ifdef M_TARGET_MACHO
|
|
GLOBAL _rtext_tmap_start
|
|
_rtext_tmap_start:
|
|
%endif
|
|
|
|
rtext_start:
|
|
|
|
GLOBAL @R_DrawSpanP_ASM@0
|
|
GLOBAL _R_DrawSpanP_ASM
|
|
GLOBAL R_DrawSpanP_ASM
|
|
|
|
; eax: scratch
|
|
; ebx: zero
|
|
; ecx: yfrac at top end, xfrac int part at low end
|
|
; edx: xfrac frac part at top end
|
|
; edi: dest
|
|
; ebp: scratch
|
|
; esi: count
|
|
; [esp]: xstep
|
|
; [esp+4]: ystep
|
|
|
|
align 16
|
|
|
|
@R_DrawSpanP_ASM@0:
|
|
_R_DrawSpanP_ASM:
|
|
R_DrawSpanP_ASM:
|
|
mov eax,[ds_x2]
|
|
mov ecx,[ds_x1]
|
|
sub eax,ecx
|
|
jl near rdspret ; count < 0: nothing to do, so leave
|
|
|
|
push ebx
|
|
push edi
|
|
push ebp
|
|
push esi
|
|
sub esp, 8
|
|
|
|
mov edi,ecx
|
|
add edi,[dc_destorg]
|
|
mov ecx,[ds_y]
|
|
add edi,[ylookup+ecx*4]
|
|
mov edx,[ds_xstep]
|
|
dsy1: shl edx,6
|
|
mov ebp,[ds_xstep]
|
|
dsy3: shr ebp,26
|
|
xor ebx,ebx
|
|
lea esi,[eax+1]
|
|
mov [esp],edx
|
|
mov edx,[ds_ystep]
|
|
mov ecx,[ds_xfrac]
|
|
dsy4: shr ecx,26
|
|
dsm8: and edx,strict dword 0xffffffc0
|
|
or ebp,edx
|
|
mov [esp+4],ebp
|
|
mov ebp,[ds_yfrac]
|
|
mov edx,[ds_xfrac]
|
|
dsy2: shl edx,6
|
|
dsm9: and ebp,strict dword 0xffffffc0
|
|
or ecx,ebp
|
|
shr esi,1
|
|
jnc dseven1
|
|
|
|
; do odd pixel
|
|
|
|
mov ebp,ecx
|
|
dsx1: rol ebp,6
|
|
dsm1: and ebp,0xfff
|
|
add edx,[esp]
|
|
adc ecx,[esp+4]
|
|
spreada mov bl,[ebp+SPACEFILLER4]
|
|
spmapa mov bl,[ebx+SPACEFILLER4]
|
|
mov [edi],bl
|
|
inc edi
|
|
|
|
dseven1 shr esi,1
|
|
jnc dsrest
|
|
|
|
; do two more pixels
|
|
mov ebp,ecx
|
|
add edx,[esp]
|
|
adc ecx,[esp+4]
|
|
dsm2: and ebp,0xfc00003f
|
|
dsx2: rol ebp,6
|
|
mov eax,ecx
|
|
add edx,[esp]
|
|
adc ecx,[esp+4]
|
|
spreadb mov bl,[ebp+SPACEFILLER4] ;read texel1
|
|
dsx3: rol eax,6
|
|
dsm6: and eax,0xfff
|
|
spmapb mov bl,[ebx+SPACEFILLER4] ;map texel1
|
|
mov [edi],bl ;store texel1
|
|
add edi,2
|
|
spreadc mov bl,[eax+SPACEFILLER4] ;read texel2
|
|
spmapc mov bl,[ebx+SPACEFILLER4] ;map texel2
|
|
mov [edi-1],bl ;store texel2
|
|
|
|
; do the rest
|
|
|
|
dsrest test esi,esi
|
|
jz near dsdone
|
|
|
|
align 16
|
|
|
|
dsloop mov ebp,ecx
|
|
spstep1d add edx,[esp]
|
|
spstep2d adc ecx,[esp+4]
|
|
dsm3: and ebp,0xfc00003f
|
|
dsx4: rol ebp,6
|
|
mov eax,ecx
|
|
spstep1e add edx,[esp]
|
|
spstep2e adc ecx,[esp+4]
|
|
spreadd mov bl,[ebp+SPACEFILLER4] ;read texel1
|
|
dsx5: rol eax,6
|
|
dsm5: and eax,0xfff
|
|
spmapd mov bl,[ebx+SPACEFILLER4] ;map texel1
|
|
mov [edi],bl ;store texel1
|
|
mov ebp,ecx
|
|
spreade mov bl,[eax+SPACEFILLER4] ;read texel2
|
|
spstep1f add edx,[esp]
|
|
spstep2f adc ecx,[esp+4]
|
|
dsm4: and ebp,0xfc00003f
|
|
dsx6: rol ebp,6
|
|
spmape mov bl,[ebx+SPACEFILLER4] ;map texel2
|
|
mov eax,ecx
|
|
mov [edi+1],bl ;store texel2
|
|
spreadf mov bl,[ebp+SPACEFILLER4] ;read texel3
|
|
spmapf mov bl,[ebx+SPACEFILLER4] ;map texel3
|
|
add edi,4
|
|
dsx7: rol eax,6
|
|
dsm7: and eax,0xfff
|
|
mov [edi-2],bl ;store texel3
|
|
spreadg mov bl,[eax+SPACEFILLER4] ;read texel4
|
|
spstep1g add edx,[esp]
|
|
spstep2g adc ecx,[esp+4]
|
|
spmapg mov bl,[ebx+SPACEFILLER4] ;map texel4
|
|
dec esi
|
|
mov [edi-1],bl ;store texel4
|
|
jnz near dsloop
|
|
|
|
dsdone add esp,8
|
|
pop esi
|
|
pop ebp
|
|
pop edi
|
|
pop ebx
|
|
|
|
rdspret ret
|
|
|
|
; This is the same as the previous routine, except it doesn't draw pixels
|
|
; where the texture's color value is 0.
|
|
|
|
GLOBAL @R_DrawSpanMaskedP_ASM@0
|
|
GLOBAL _R_DrawSpanMaskedP_ASM
|
|
GLOBAL R_DrawSpanMaskedP_ASM
|
|
|
|
; eax: scratch
|
|
; ebx: zero
|
|
; ecx: yfrac at top end, xfrac int part at low end
|
|
; edx: xfrac frac part at top end
|
|
; edi: dest
|
|
; ebp: scratch
|
|
; esi: count
|
|
; [esp]: xstep
|
|
; [esp+4]: ystep
|
|
|
|
align 16
|
|
|
|
@R_DrawSpanMaskedP_ASM@0:
|
|
_R_DrawSpanMaskedP_ASM:
|
|
R_DrawSpanMaskedP_ASM:
|
|
mov eax,[ds_x2]
|
|
mov ecx,[ds_x1]
|
|
sub eax,ecx
|
|
jl rdspret ; count < 0: nothing to do, so leave
|
|
|
|
push ebx
|
|
push edi
|
|
push ebp
|
|
push esi
|
|
sub esp,8
|
|
|
|
mov edi,ecx
|
|
add edi,[dc_destorg]
|
|
mov ecx,[ds_y]
|
|
add edi,[ylookup+ecx*4]
|
|
mov edx,[ds_xstep]
|
|
dmsy1: shl edx,6
|
|
mov ebp,[ds_xstep]
|
|
dmsy3: shr ebp,26
|
|
xor ebx,ebx
|
|
lea esi,[eax+1]
|
|
mov [esp],edx
|
|
mov edx,[ds_ystep]
|
|
mov ecx,[ds_xfrac]
|
|
dmsy4: shr ecx,26
|
|
dmsm8: and edx,strict dword 0xffffffc0
|
|
or ebp,edx
|
|
mov [esp+4],ebp
|
|
mov ebp,[ds_yfrac]
|
|
mov edx,[ds_xfrac]
|
|
dmsy2: shl edx,6
|
|
dmsm9: and ebp,strict dword 0xffffffc0
|
|
or ecx,ebp
|
|
shr esi,1
|
|
jnc dmseven1
|
|
|
|
; do odd pixel
|
|
|
|
mov ebp,ecx
|
|
dmsx1: rol ebp,6
|
|
dmsm1: and ebp,0xfff
|
|
add edx,[esp]
|
|
adc ecx,[esp+4]
|
|
mspreada mov bl,[ebp+SPACEFILLER4]
|
|
cmp bl,0
|
|
je mspskipa
|
|
mspmapa mov bl,[ebx+SPACEFILLER4]
|
|
mov [edi],bl
|
|
mspskipa: inc edi
|
|
|
|
dmseven1 shr esi,1
|
|
jnc dmsrest
|
|
|
|
; do two more pixels
|
|
mov ebp,ecx
|
|
add edx,[esp]
|
|
adc ecx,[esp+4]
|
|
dmsm2: and ebp,0xfc00003f
|
|
dmsx2: rol ebp,6
|
|
mov eax,ecx
|
|
add edx,[esp]
|
|
adc ecx,[esp+4]
|
|
mspreadb mov bl,[ebp+SPACEFILLER4] ;read texel1
|
|
dmsx3: rol eax,6
|
|
dmsm6: and eax,0xfff
|
|
cmp bl,0
|
|
je mspskipb
|
|
mspmapb mov bl,[ebx+SPACEFILLER4] ;map texel1
|
|
mov [edi],bl ;store texel1
|
|
mspskipb add edi,2
|
|
mspreadc mov bl,[eax+SPACEFILLER4] ;read texel2
|
|
cmp bl,0
|
|
je dmsrest
|
|
mspmapc mov bl,[ebx+SPACEFILLER4] ;map texel2
|
|
mov [edi-1],bl ;store texel2
|
|
|
|
; do the rest
|
|
|
|
dmsrest test esi,esi
|
|
jz near dmsdone
|
|
|
|
align 16
|
|
|
|
dmsloop mov ebp,ecx
|
|
mspstep1d add edx,[esp]
|
|
mspstep2d adc ecx,[esp+4]
|
|
dmsm3: and ebp,0xfc00003f
|
|
dmsx4: rol ebp,6
|
|
mov eax,ecx
|
|
mspstep1e add edx,[esp]
|
|
mspstep2e adc ecx,[esp+4]
|
|
mspreadd mov bl,[ebp+SPACEFILLER4] ;read texel1
|
|
dmsx5: rol eax,6
|
|
dmsm5: and eax,0xfff
|
|
cmp bl,0
|
|
mov ebp,ecx
|
|
je mspreade
|
|
mspmapd mov bl,[ebx+SPACEFILLER4] ;map texel1
|
|
mov [edi],bl ;store texel1
|
|
mspreade mov bl,[eax+SPACEFILLER4] ;read texel2
|
|
mspstep1f add edx,[esp]
|
|
mspstep2f adc ecx,[esp+4]
|
|
dmsm4: and ebp,0xfc00003f
|
|
dmsx6: rol ebp,6
|
|
cmp bl,0
|
|
mov eax,ecx
|
|
je mspreadf
|
|
mspmape mov bl,[ebx+SPACEFILLER4] ;map texel2
|
|
mov [edi+1],bl ;store texel2
|
|
mspreadf mov bl,[ebp+SPACEFILLER4] ;read texel3
|
|
add edi,4
|
|
dmsx7: rol eax,6
|
|
dmsm7: and eax,0xfff
|
|
cmp bl,0
|
|
je mspreadg
|
|
mspmapf mov bl,[ebx+SPACEFILLER4] ;map texel3
|
|
mov [edi-2],bl ;store texel3
|
|
mspreadg mov bl,[eax+SPACEFILLER4] ;read texel4
|
|
mspstep1g add edx,[esp]
|
|
mspstep2g adc ecx,[esp+4]
|
|
cmp bl,0
|
|
je mspskipg
|
|
mspmapg mov bl,[ebx+SPACEFILLER4] ;map texel4
|
|
mov [edi-1],bl ;store texel4
|
|
mspskipg dec esi
|
|
jnz near dmsloop
|
|
|
|
dmsdone add esp,8
|
|
pop esi
|
|
pop ebp
|
|
pop edi
|
|
pop ebx
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
;*----------------------------------------------------------------------
|
|
;*
|
|
;* R_DrawColumnP
|
|
;*
|
|
;*----------------------------------------------------------------------
|
|
|
|
GLOBAL @R_DrawColumnP_ASM@0
|
|
GLOBAL _R_DrawColumnP_ASM
|
|
GLOBAL R_DrawColumnP_ASM
|
|
|
|
align 16
|
|
|
|
R_DrawColumnP_ASM:
|
|
_R_DrawColumnP_ASM:
|
|
@R_DrawColumnP_ASM@0:
|
|
|
|
; count = dc_yh - dc_yl;
|
|
|
|
mov ecx,[dc_count]
|
|
test ecx,ecx
|
|
jle near rdcpret ; count <= 0: nothing to do, so leave
|
|
|
|
push ebp ; save registers
|
|
push ebx
|
|
push edi
|
|
push esi
|
|
|
|
; dest = ylookup[dc_yl] + dc_x + dc_destorg;
|
|
|
|
mov edi,[dc_dest]
|
|
mov ebp,ecx
|
|
mov ebx,[dc_texturefrac] ; ebx = frac
|
|
rdcp1: sub edi,SPACEFILLER4
|
|
mov ecx,ebx
|
|
shr ecx,16
|
|
mov esi,[dc_source]
|
|
mov edx,[dc_iscale]
|
|
mov eax,[dc_colormap]
|
|
|
|
cmp BYTE [CPU+66],byte 5
|
|
jg rdcploop2
|
|
|
|
align 16
|
|
|
|
; The registers should now look like this:
|
|
;
|
|
; [31 .. 16][15 .. 8][7 .. 0]
|
|
; eax [colormap ]
|
|
; ebx [yi ][yf ]
|
|
; ecx [scratch ]
|
|
; edx [dyi ][dyf ]
|
|
; esi [source texture column ]
|
|
; edi [destination screen pointer ]
|
|
; ebp [counter ]
|
|
;
|
|
|
|
|
|
; Note the partial register stalls on anything better than a Pentium
|
|
; That's why there are two versions of this loop.
|
|
|
|
rdcploop:
|
|
mov cl,[esi+ecx] ; Fetch texel
|
|
xor ch,ch
|
|
add ebx,edx ; increment frac
|
|
rdcp2: add edi,SPACEFILLER4 ; increment destination pointer
|
|
mov cl,[eax+ecx] ; colormap texel
|
|
mov [edi],cl ; Store texel
|
|
mov ecx,ebx
|
|
shr ecx,16
|
|
dec ebp
|
|
jnz rdcploop ; loop
|
|
|
|
pop esi
|
|
pop edi
|
|
pop ebx
|
|
pop ebp
|
|
rdcpret:
|
|
ret
|
|
|
|
align 16
|
|
|
|
rdcploop2:
|
|
movzx ecx,byte [esi+ecx] ; Fetch texel
|
|
add ebx,edx ; increment frac
|
|
mov cl,[eax+ecx] ; colormap texel
|
|
rdcp3: add edi,SPACEFILLER4 ; increment destination pointer
|
|
mov [edi],cl ; Store texel
|
|
mov ecx,ebx
|
|
shr ecx,16
|
|
dec ebp
|
|
jnz rdcploop2 ; loop
|
|
|
|
pop esi
|
|
pop edi
|
|
pop ebx
|
|
pop ebp
|
|
ret
|
|
|
|
|
|
|
|
;*----------------------------------------------------------------------
|
|
;*
|
|
;* R_DrawFuzzColumnP
|
|
;*
|
|
;*----------------------------------------------------------------------
|
|
|
|
GLOBAL @R_DrawFuzzColumnP_ASM@0
|
|
GLOBAL _R_DrawFuzzColumnP_ASM
|
|
GLOBAL R_DrawFuzzColumnP_ASM
|
|
|
|
align 16
|
|
|
|
R_DrawFuzzColumnP_ASM:
|
|
_R_DrawFuzzColumnP_ASM:
|
|
@R_DrawFuzzColumnP_ASM@0:
|
|
|
|
; Adjust borders. Low...
|
|
mov eax,[dc_yl]
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
push ebp
|
|
|
|
cmp eax,0
|
|
jg .ylok
|
|
|
|
mov eax,1
|
|
nop
|
|
|
|
; ...and high.
|
|
.ylok mov edx,[fuzzviewheight]
|
|
mov esi,[dc_yh]
|
|
cmp esi,edx
|
|
jle .yhok
|
|
|
|
mov esi,edx
|
|
nop
|
|
|
|
.yhok mov edx,[dc_x]
|
|
sub esi,eax ; esi = count
|
|
js near .dfcdone ; Zero length (or less)
|
|
|
|
mov edi,[ylookup+eax*4]
|
|
mov ebx,edx
|
|
add edi,[dc_destorg]
|
|
mov eax,[NormalLight]
|
|
mov ecx,[fuzzpos]
|
|
add edi,ebx
|
|
add eax,256*6
|
|
inc esi
|
|
mov ebp,[dc_pitch]
|
|
mov edx,FUZZTABLE
|
|
test ecx,ecx
|
|
je .fuzz0
|
|
|
|
;
|
|
; esi = count
|
|
; edi = dest
|
|
; ecx = fuzzpos
|
|
; eax = colormap 6
|
|
;
|
|
|
|
; first loop: end with fuzzpos or count 0, whichever happens first
|
|
|
|
sub edx,ecx ; edx = # of entries left in fuzzoffset
|
|
mov ebx,esi
|
|
cmp esi,edx
|
|
jle .enuf
|
|
mov esi,edx
|
|
.enuf sub ebx,esi
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
push ebx
|
|
xor ebx,ebx
|
|
|
|
.loop1 inc ecx
|
|
mov bl,[edi+edx]
|
|
dec esi
|
|
mov bl,[eax+ebx]
|
|
mov [edi],bl
|
|
lea edi,[edi+ebp]
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
jnz .loop1
|
|
|
|
; second loop: Chunk it into groups of FUZZTABLE-sized spans and do those
|
|
|
|
pop esi
|
|
cmp ecx,FUZZTABLE
|
|
jl .savefuzzpos
|
|
xor ecx,ecx
|
|
nop
|
|
.fuzz0 cmp esi,FUZZTABLE
|
|
jl .chunked
|
|
|
|
.oloop lea edx,[esi-FUZZTABLE]
|
|
mov esi,FUZZTABLE
|
|
push edx
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
|
|
.iloop inc ecx
|
|
mov bl,[edi+edx]
|
|
dec esi
|
|
mov bl,[eax+ebx]
|
|
mov [edi],bl
|
|
lea edi,[edi+ebp]
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
jnz .iloop
|
|
|
|
pop esi
|
|
xor ecx,ecx
|
|
cmp esi,FUZZTABLE
|
|
jge .oloop
|
|
|
|
; third loop: Do whatever is left
|
|
|
|
.chunked:
|
|
test esi,esi
|
|
jle .savefuzzpos
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
nop
|
|
|
|
.loop3 inc ecx
|
|
mov bl,[edi+edx]
|
|
dec esi
|
|
mov bl,[eax+ebx]
|
|
mov [edi],bl
|
|
lea edi,[edi+ebp]
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
jnz .loop3
|
|
|
|
.savefuzzpos:
|
|
mov [fuzzpos],ecx
|
|
.dfcdone:
|
|
pop ebp
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
ret
|
|
|
|
|
|
;*----------------------------------------------------------------------
|
|
;*
|
|
;* R_DrawColumnHorizP_ASM
|
|
;*
|
|
;*----------------------------------------------------------------------
|
|
|
|
GLOBAL @R_DrawColumnHorizP_ASM@0
|
|
GLOBAL _R_DrawColumnHorizP_ASM
|
|
GLOBAL R_DrawColumnHorizP_ASM
|
|
|
|
align 16
|
|
|
|
@R_DrawColumnHorizP_ASM@0:
|
|
_R_DrawColumnHorizP_ASM:
|
|
R_DrawColumnHorizP_ASM:
|
|
|
|
; count = dc_yh - dc_yl;
|
|
|
|
mov eax,[dc_yh]
|
|
mov ecx,[dc_yl]
|
|
sub eax,ecx
|
|
mov edx,[dc_x]
|
|
|
|
jl near .leave ; count < 0: nothing to do, so leave
|
|
|
|
push ebp ; save registers
|
|
push ebx
|
|
push edi
|
|
push esi
|
|
|
|
inc eax ; make 0 count mean 0 pixels
|
|
and edx,3
|
|
push eax
|
|
mov eax,[dc_temp]
|
|
mov esi,[dc_ctspan+edx*4]
|
|
add eax,edx
|
|
lea eax,[eax+ecx*4] ; eax = top of column in buffer
|
|
mov ebp,[dc_yh]
|
|
mov [esi],ecx
|
|
mov [esi+4],ebp
|
|
add esi,8
|
|
mov edi,[dc_source]
|
|
mov [dc_ctspan+edx*4],esi
|
|
mov esi,[dc_iscale]
|
|
mov ecx,[dc_texturefrac] ; ecx = frac
|
|
mov dl,[edi] ; load cache
|
|
mov ebx,[esp]
|
|
and ebx,0xfffffff8
|
|
jnz .mthan8
|
|
|
|
; Register usage in the following code is:
|
|
;
|
|
; eax: dest
|
|
; edi: source
|
|
; ecx: frac (16.16)
|
|
; esi: fracstep (16.16)
|
|
; ebx: add1
|
|
; ebp: add2
|
|
; dl: texel1
|
|
; dh: texel2
|
|
;[esp] count
|
|
|
|
; there are fewer than 8 pixels to draw
|
|
|
|
mov ebx,[esp]
|
|
.lthan8 shr ebx,1
|
|
jnc .even
|
|
|
|
; do one pixel before loop (little opportunity for pairing)
|
|
|
|
mov ebp,ecx ; copy frac to ebx
|
|
add ecx,esi ; increment frac
|
|
shr ebp,16 ; shift frac over to low end
|
|
add eax,4
|
|
mov dl,[edi+ebp]
|
|
mov [eax-4],dl
|
|
|
|
.even test ebx,ebx
|
|
jz near .done
|
|
|
|
.loop2 mov [esp],ebx ; save counter
|
|
mov ebx,ecx ; copy frac for texel1 to ebx
|
|
shr ebx,16 ; shift frac for texel1 to low end
|
|
add ecx,esi ; increment frac
|
|
mov ebp,ecx ; copy frac for texel2 to ebp
|
|
shr ebp,16 ; shift frac for texel2 to low end
|
|
add ecx,esi ; increment frac
|
|
mov dl,[edi+ebx] ; read texel1
|
|
mov ebx,[esp] ; fetch counter
|
|
mov dh,[edi+ebp] ; read texel2
|
|
mov [eax],dl ; write texel1
|
|
mov [eax+4],dh ; write texel2
|
|
add eax,8 ; increment dest
|
|
dec ebx ; decrement counter
|
|
jnz .loop2 ; loop until it hits 0
|
|
|
|
jmp .done
|
|
|
|
; there are more than 8 pixels to draw. position eax as close to a 32 byte
|
|
; boundary as possible, then do whatever is left.
|
|
|
|
.mthan8 test eax,4
|
|
jz .try2
|
|
|
|
mov ebp,ecx ; frac: in ebp
|
|
add ecx,esi ; step
|
|
shr ebp,16 ; frac: shift
|
|
add eax,4 ; increment dest
|
|
mov ebx,[esp] ; fetch counter
|
|
mov dl,[edi+ebp] ; tex: read
|
|
dec ebx ; decrement counter
|
|
mov [eax-4],dl ; tex: write
|
|
mov [esp],ebx ; store counter
|
|
|
|
.try2 test eax,8
|
|
jz .try4
|
|
|
|
mov ebx,ecx ; frac1: in ebx
|
|
add ecx,esi ; step
|
|
shr ebx,16 ; frac1: shift
|
|
mov ebp,ecx ; frac2: in ebp
|
|
shr ebp,16 ; frac2: shift
|
|
add ecx,esi ; step
|
|
mov dl,[edi+ebx] ; tex1: read
|
|
mov ebx,[esp] ; fetch counter
|
|
mov dh,[edi+ebp] ; tex2: read
|
|
mov [eax],dl ; tex1: write
|
|
mov [eax+4],dh ; tex2: write
|
|
sub ebx,2 ; decrement counter
|
|
add eax,8 ; increment dest
|
|
mov [esp],ebx ; store counter
|
|
|
|
.try4 test eax,16
|
|
jz .try8
|
|
|
|
mov ebx,ecx ; frac1: in ebx
|
|
add ecx,esi ; step
|
|
shr ebx,16 ; frac1: shift
|
|
mov ebp,ecx ; frac2: in ebp
|
|
shr ebp,16 ; frac2: shift
|
|
add ecx,esi ; step
|
|
mov dl,[edi+ebx] ; tex1: read
|
|
mov ebx,ecx ; frac3: in ebx
|
|
shr ebx,16 ; frac3: shift
|
|
mov dh,[edi+ebp] ; tex2: read
|
|
add ecx,esi ; step
|
|
mov [eax],dl ; tex1: write
|
|
mov [eax+4],dh ; tex2: write
|
|
mov ebp,ecx ; frac4: in ebp
|
|
shr ebp,16 ; frac4: shift
|
|
add ecx,esi ; step
|
|
mov dl,[edi+ebx] ; tex3: read
|
|
mov ebx,[esp] ; fetch counter
|
|
mov dh,[edi+ebp] ; tex4: read
|
|
sub ebx,4 ; decrement counter
|
|
mov [esp],ebx ; store counter
|
|
mov [eax+8],dl ; tex3: write
|
|
mov [eax+12],dh ; tex4: write
|
|
add eax,16 ; increment dest
|
|
|
|
.try8 mov ebx,[esp] ; make counter count groups of 8
|
|
sub esp,4
|
|
shr ebx,3
|
|
jmp .tail8
|
|
|
|
align 16
|
|
|
|
.loop8 mov [esp],ebx ; save counter
|
|
mov ebx,ecx ; frac1: in ebx
|
|
shr ebx,16 ; frac1: shift
|
|
add ecx,esi ; step
|
|
mov ebp,ecx ; frac2: in ebp
|
|
shr ebp,16 ; frac2: shift
|
|
add ecx,esi ; step
|
|
mov dl,[edi+ebx] ; tex1: read
|
|
mov ebx,ecx ; frac3: in ebx
|
|
mov dh,[edi+ebp] ; tex2: read
|
|
shr ebx,16 ; frac3: shift
|
|
add ecx,esi ; step
|
|
mov [eax],dl ; tex1: write
|
|
mov [eax+4],dh ; tex2: write
|
|
mov ebp,ecx ; frac4: in ebp
|
|
shr ebp,16 ; frac4: shift
|
|
add ecx,esi ; step
|
|
mov dl,[edi+ebx] ; tex3: read
|
|
mov ebx,ecx ; frac5: in ebx
|
|
mov dh,[edi+ebp] ; tex4: read
|
|
shr ebx,16 ; frac5: shift
|
|
mov [eax+8],dl ; tex3: write
|
|
mov [eax+12],dh ; tex4: write
|
|
add ecx,esi ; step
|
|
mov ebp,ecx ; frac6: in ebp
|
|
shr ebp,16 ; frac6: shift
|
|
mov dl,[edi+ebx] ; tex5: read
|
|
add ecx,esi ; step
|
|
mov ebx,ecx ; frac7: in ebx
|
|
mov [eax+16],dl ; tex5: write
|
|
shr ebx,16 ; frac7: shift
|
|
mov dh,[edi+ebp] ; tex6: read
|
|
add ecx,esi ; step
|
|
mov ebp,ecx ; frac8: in ebp
|
|
mov [eax+20],dh ; tex6: write
|
|
shr ebp,16 ; frac8: shift
|
|
add eax,32 ; increment dest pointer
|
|
mov dl,[edi+ebx] ; tex7: read
|
|
mov ebx,[esp] ; fetch counter
|
|
mov [eax-8],dl ; tex7: write
|
|
mov dh,[edi+ebp] ; tex8: read
|
|
add ecx,esi ; step
|
|
mov [eax-4],dh ; tex8: write
|
|
mov dl,[eax] ; load cache
|
|
dec ebx ; decrement counter
|
|
.tail8 jnz near .loop8 ; loop if more to do
|
|
|
|
pop ebp
|
|
mov ebx,[esp]
|
|
and ebx,7
|
|
jnz near .lthan8
|
|
|
|
.done pop eax
|
|
pop esi
|
|
pop edi
|
|
pop ebx
|
|
pop ebp
|
|
.leave ret
|
|
|
|
|
|
;*----------------------------------------------------------------------
|
|
;*
|
|
;* rt_copy1col_asm
|
|
;*
|
|
;* ecx = hx
|
|
;* edx = sx
|
|
;* [esp+4] = yl
|
|
;* [esp+8] = yh
|
|
;*
|
|
;*----------------------------------------------------------------------
|
|
|
|
GLOBAL @rt_copy1col_asm@16
|
|
GLOBAL _rt_copy1col_asm
|
|
GLOBAL rt_copy1col_asm
|
|
|
|
align 16
|
|
|
|
rt_copy1col_asm:
|
|
_rt_copy1col_asm:
|
|
pop eax
|
|
mov edx,[esp+4*3]
|
|
mov ecx,[esp+4*2]
|
|
push edx
|
|
push ecx
|
|
mov ecx,[esp+4*2]
|
|
mov edx,[esp+4*3]
|
|
push eax
|
|
|
|
@rt_copy1col_asm@16:
|
|
mov eax, [esp+4]
|
|
push ebx
|
|
mov ebx, [esp+12]
|
|
push esi
|
|
sub ebx, eax
|
|
push edi
|
|
js .done
|
|
|
|
lea esi,[eax*4]
|
|
inc ebx ; ebx = count
|
|
mov eax,edx
|
|
add ecx,esi
|
|
mov edi,[ylookup+esi]
|
|
add ecx,[dc_temp] ; ecx = source
|
|
mov esi,[dc_pitch] ; esi = pitch
|
|
add eax,edi ; eax = dest
|
|
add eax,[dc_destorg]
|
|
|
|
shr ebx,1
|
|
jnc .even
|
|
|
|
mov dl,[ecx]
|
|
add ecx,4
|
|
mov [eax],dl
|
|
add eax,esi
|
|
|
|
.even and ebx,ebx
|
|
jz .done
|
|
|
|
.loop mov dl,[ecx]
|
|
mov dh,[ecx+4]
|
|
mov [eax],dl
|
|
mov [eax+esi],dh
|
|
add ecx,8
|
|
lea eax,[eax+esi*2]
|
|
dec ebx
|
|
jnz .loop
|
|
|
|
.done pop edi
|
|
pop esi
|
|
pop ebx
|
|
ret 8
|
|
|
|
;*----------------------------------------------------------------------
|
|
;*
|
|
;* rt_copy4cols_asm
|
|
;*
|
|
;* ecx = sx
|
|
;* edx = yl
|
|
;* [esp+4] = yh
|
|
;*
|
|
;*----------------------------------------------------------------------
|
|
|
|
GLOBAL @rt_copy4cols_asm@12
|
|
GLOBAL _rt_copy4cols_asm
|
|
GLOBAL rt_copy4cols_asm
|
|
|
|
align 16
|
|
|
|
rt_copy4cols_asm:
|
|
_rt_copy4cols_asm:
|
|
pop eax
|
|
mov ecx,[esp+8]
|
|
mov edx,[esp+4]
|
|
push ecx
|
|
mov ecx,[esp+4]
|
|
push eax
|
|
|
|
@rt_copy4cols_asm@12:
|
|
push ebx
|
|
mov ebx,[esp+8]
|
|
push esi
|
|
sub ebx,edx
|
|
push edi
|
|
js .done
|
|
|
|
inc ebx ; ebx = count
|
|
mov eax,ecx
|
|
mov esi,[ylookup+edx*4]
|
|
mov ecx,[dc_temp]
|
|
add eax,esi ; eax = dest
|
|
add eax,[dc_destorg]
|
|
lea ecx,[ecx+edx*4] ; ecx = source
|
|
mov edx,[dc_pitch] ; edx = pitch
|
|
|
|
shr ebx,1
|
|
jnc .even
|
|
|
|
mov esi,[ecx]
|
|
add ecx,4
|
|
mov [eax],esi
|
|
add eax,edx
|
|
|
|
.even and ebx,ebx
|
|
jz .done
|
|
|
|
.loop mov esi,[ecx]
|
|
mov edi,[ecx+4]
|
|
mov [eax],esi
|
|
mov [eax+edx],edi
|
|
add ecx,8
|
|
lea eax,[eax+edx*2]
|
|
dec ebx
|
|
jnz .loop
|
|
|
|
.done pop edi
|
|
pop esi
|
|
pop ebx
|
|
ret 4
|
|
|
|
;*----------------------------------------------------------------------
|
|
;*
|
|
;* rt_map1col_asm
|
|
;*
|
|
;* ecx = hx
|
|
;* edx = sx
|
|
;* [esp+4] = yl
|
|
;* [esp+8] = yh
|
|
;*
|
|
;*----------------------------------------------------------------------
|
|
|
|
GLOBAL @rt_map1col_asm@16
|
|
GLOBAL _rt_map1col_asm
|
|
GLOBAL rt_map1col_asm
|
|
|
|
align 16
|
|
|
|
rt_map1col_asm:
|
|
_rt_map1col_asm:
|
|
pop eax
|
|
mov edx,[esp+4*3]
|
|
mov ecx,[esp+4*2]
|
|
push edx
|
|
push ecx
|
|
mov ecx,[esp+4*2]
|
|
mov edx,[esp+4*3]
|
|
push eax
|
|
|
|
@rt_map1col_asm@16:
|
|
mov eax,[esp+4]
|
|
push ebx
|
|
mov ebx,[esp+12]
|
|
push ebp
|
|
push esi
|
|
sub ebx, eax
|
|
push edi
|
|
js .done
|
|
|
|
lea edi,[eax*4]
|
|
mov esi,[dc_colormap] ; esi = colormap
|
|
inc ebx ; ebx = count
|
|
mov eax,edx
|
|
lea ebp,[ecx+edi] ; ebp = source
|
|
add ebp,[dc_temp]
|
|
mov ecx,[ylookup+edi]
|
|
mov edi,[dc_pitch] ; edi = pitch
|
|
add eax,ecx ; eax = dest
|
|
xor ecx,ecx
|
|
xor edx,edx
|
|
add eax,[dc_destorg]
|
|
|
|
shr ebx,1
|
|
jnc .even
|
|
|
|
mov dl,[ebp]
|
|
add ebp,4
|
|
mov dl,[esi+edx]
|
|
mov [eax],dl
|
|
add eax,edi
|
|
|
|
.even and ebx,ebx
|
|
jz .done
|
|
|
|
.loop mov dl,[ebp]
|
|
mov cl,[ebp+4]
|
|
add ebp,8
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax],dl
|
|
mov [eax+edi],cl
|
|
dec ebx
|
|
lea eax,[eax+edi*2]
|
|
jnz .loop
|
|
|
|
.done pop edi
|
|
pop esi
|
|
pop ebp
|
|
pop ebx
|
|
ret 8
|
|
|
|
;*----------------------------------------------------------------------
|
|
;*
|
|
;* rt_map4cols_asm
|
|
;*
|
|
;* rt_map4cols_asm1 is for PPro and above
|
|
;* rt_map4cols_asm2 is for Pentium and below
|
|
;*
|
|
;* ecx = sx
|
|
;* edx = yl
|
|
;* [esp+4] = yh
|
|
;*
|
|
;*----------------------------------------------------------------------
|
|
|
|
GLOBAL @rt_map4cols_asm1@12
|
|
GLOBAL _rt_map4cols_asm1
|
|
GLOBAL rt_map4cols_asm1
|
|
|
|
align 16
|
|
|
|
rt_map4cols_asm1:
|
|
_rt_map4cols_asm1:
|
|
pop eax
|
|
mov ecx,[esp+8]
|
|
mov edx,[esp+4]
|
|
push ecx
|
|
mov ecx,[esp+4]
|
|
push eax
|
|
|
|
@rt_map4cols_asm1@12:
|
|
push ebx
|
|
mov ebx,[esp+8]
|
|
push ebp
|
|
push esi
|
|
sub ebx,edx
|
|
push edi
|
|
js near .done
|
|
|
|
mov esi,[dc_colormap] ; esi = colormap
|
|
shl edx,2
|
|
mov eax,ecx
|
|
inc ebx ; ebx = count
|
|
mov edi,[ylookup+edx]
|
|
mov ebp,[dc_temp]
|
|
add ebp,edx ; ebp = source
|
|
add eax,edi ; eax = dest
|
|
mov edi,[dc_pitch] ; edi = pitch
|
|
add eax,[dc_destorg]
|
|
xor ecx,ecx
|
|
xor edx,edx
|
|
|
|
shr ebx,1
|
|
jnc .even
|
|
|
|
mov dl,[ebp]
|
|
mov cl,[ebp+1]
|
|
add ebp,4
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax],dl
|
|
mov [eax+1],cl
|
|
mov dl,[ebp-2]
|
|
mov cl,[ebp-1]
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax+2],dl
|
|
mov [eax+3],cl
|
|
add eax,edi
|
|
|
|
.even and ebx,ebx
|
|
jz .done
|
|
|
|
.loop:
|
|
mov dl,[ebp]
|
|
mov cl,[ebp+1]
|
|
add ebp,8
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax],dl
|
|
mov [eax+1],cl
|
|
mov dl,[ebp-6]
|
|
mov cl,[ebp-5]
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax+2],dl
|
|
mov [eax+3],cl
|
|
mov dl,[ebp-4]
|
|
mov cl,[ebp-3]
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax+edi],dl
|
|
mov [eax+edi+1],cl
|
|
mov dl,[ebp-2]
|
|
mov cl,[ebp-1]
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax+edi+2],dl
|
|
mov [eax+edi+3],cl
|
|
lea eax,[eax+edi*2]
|
|
dec ebx
|
|
|
|
jnz .loop
|
|
|
|
.done pop edi
|
|
pop esi
|
|
pop ebp
|
|
pop ebx
|
|
ret 4
|
|
|
|
GLOBAL @rt_map4cols_asm2@12
|
|
GLOBAL _rt_map4cols_asm2
|
|
GLOBAL rt_map4cols_asm2
|
|
|
|
align 16
|
|
|
|
rt_map4cols_asm2:
|
|
_rt_map4cols_asm2:
|
|
pop eax
|
|
mov ecx,[esp+8]
|
|
mov edx,[esp+4]
|
|
push ecx
|
|
mov ecx,[esp+4]
|
|
push eax
|
|
|
|
@rt_map4cols_asm2@12:
|
|
push ebx
|
|
mov ebx,[esp+8]
|
|
push ebp
|
|
push esi
|
|
sub ebx,edx
|
|
push edi
|
|
js near .done
|
|
|
|
mov esi,[dc_colormap] ; esi = colormap
|
|
shl edx,2
|
|
mov eax,ecx
|
|
inc ebx ; ebx = count
|
|
mov edi,[ylookup+edx]
|
|
mov ebp,[dc_temp]
|
|
add ebp,edx ; ebp = source
|
|
add eax,edi ; eax = dest
|
|
mov edi,[dc_pitch] ; edi = pitch
|
|
add eax,[dc_destorg]
|
|
xor ecx,ecx
|
|
xor edx,edx
|
|
|
|
shr ebx,1
|
|
jnc .even
|
|
|
|
mov dl,[ebp]
|
|
mov cl,[ebp+1]
|
|
add ebp,4
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax],dl
|
|
mov [eax+1],cl
|
|
mov dl,[ebp-2]
|
|
mov cl,[ebp-1]
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax+2],dl
|
|
mov [eax+3],cl
|
|
add eax,edi
|
|
|
|
.even and ebx,ebx
|
|
jz .done
|
|
|
|
.loop:
|
|
mov dl,[ebp+3]
|
|
mov ch,[esi+edx]
|
|
mov dl,[ebp+2]
|
|
mov cl,[esi+edx]
|
|
shl ecx,16
|
|
mov dl,[ebp+1]
|
|
mov ch,[esi+edx]
|
|
mov dl,[ebp]
|
|
mov cl,[esi+edx]
|
|
mov [eax],ecx
|
|
add eax,edi
|
|
|
|
mov dl,[ebp+7]
|
|
mov ch,[esi+edx]
|
|
mov dl,[ebp+6]
|
|
mov cl,[esi+edx]
|
|
shl ecx,16
|
|
mov dl,[ebp+5]
|
|
mov ch,[esi+edx]
|
|
mov dl,[ebp+4]
|
|
mov cl,[esi+edx]
|
|
mov [eax],ecx
|
|
add eax,edi
|
|
add ebp,8
|
|
dec ebx
|
|
|
|
jnz .loop
|
|
|
|
.done pop edi
|
|
pop esi
|
|
pop ebp
|
|
pop ebx
|
|
ret 4
|
|
|
|
align 16
|
|
|
|
GLOBAL rt_shaded4cols_asm
|
|
GLOBAL _rt_shaded4cols_asm
|
|
|
|
rt_shaded4cols_asm:
|
|
_rt_shaded4cols_asm:
|
|
mov ecx,[esp+8]
|
|
push ebp
|
|
mov ebp,[esp+16]
|
|
sub ebp,ecx
|
|
js near s4nil
|
|
mov eax,[ylookup+ecx*4]
|
|
add eax,[dc_destorg] ; eax = destination
|
|
push ebx
|
|
push esi
|
|
mov esi,[dc_temp]
|
|
inc ebp ; ebp = count
|
|
add eax,[esp+16]
|
|
push edi
|
|
lea esi,[esi+ecx*4] ; esi = source
|
|
|
|
align 16
|
|
|
|
s4loop: movzx edx,byte [esi]
|
|
movzx ecx,byte [esi+1]
|
|
s4cm1: movzx edx,byte [SPACEFILLER4+edx] ; colormap
|
|
s4cm2: movzx edi,byte [SPACEFILLER4+ecx] ; colormap
|
|
shl edx,8
|
|
movzx ebx,byte [eax]
|
|
shl edi,8
|
|
movzx ecx,byte [eax+1]
|
|
sub ebx,edx
|
|
sub ecx,edi
|
|
mov ebx,[Col2RGB8+0x10000+ebx*4]
|
|
mov ecx,[Col2RGB8+0x10000+ecx*4]
|
|
s4fg1: add ebx,[SPACEFILLER4+edx*4]
|
|
s4fg2: add ecx,[SPACEFILLER4+edi*4]
|
|
or ebx,0x1f07c1f
|
|
or ecx,0x1f07c1f
|
|
mov edx,ebx
|
|
shr ebx,15
|
|
mov edi,ecx
|
|
shr ecx,15
|
|
and edx,ebx
|
|
and ecx,edi
|
|
mov bl,[RGB32k+edx]
|
|
movzx edx,byte [esi+2]
|
|
mov bh,[RGB32k+ecx]
|
|
movzx ecx,byte [esi+3]
|
|
mov [eax],bl
|
|
mov [eax+1],bh
|
|
|
|
s4cm3: movzx edx,byte [SPACEFILLER4+edx] ; colormap
|
|
s4cm4: movzx edi,byte [SPACEFILLER4+ecx] ; colormap
|
|
shl edx,8
|
|
movzx ebx,byte [eax+2]
|
|
shl edi,8
|
|
movzx ecx,byte [eax+3]
|
|
sub ebx,edx
|
|
sub ecx,edi
|
|
mov ebx,[Col2RGB8+0x10000+ebx*4]
|
|
mov ecx,[Col2RGB8+0x10000+ecx*4]
|
|
s4fg3: add ebx,[SPACEFILLER4+edx*4]
|
|
s4fg4: add ecx,[SPACEFILLER4+edi*4]
|
|
or ebx,0x1f07c1f
|
|
or ecx,0x1f07c1f
|
|
mov edx,ebx
|
|
shr ebx,15
|
|
mov edi,ecx
|
|
shr ecx,15
|
|
and edx,ebx
|
|
and ecx,edi
|
|
s4p: add eax,320 ; pitch
|
|
add esi,4
|
|
mov bl,[RGB32k+edx]
|
|
mov bh,[RGB32k+ecx]
|
|
s4p2: mov [eax-320+2],bl
|
|
s4p3: mov [eax-320+3],bh
|
|
dec ebp
|
|
jne s4loop
|
|
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
s4nil: pop ebp
|
|
ret
|
|
|
|
align 16
|
|
|
|
GLOBAL rt_add4cols_asm
|
|
GLOBAL _rt_add4cols_asm
|
|
|
|
rt_add4cols_asm:
|
|
_rt_add4cols_asm:
|
|
mov ecx,[esp+8]
|
|
push edi
|
|
mov edi,[esp+16]
|
|
sub edi,ecx
|
|
js near a4nil
|
|
mov eax,[ylookup+ecx*4]
|
|
add eax,[dc_destorg]
|
|
push ebx
|
|
push esi
|
|
mov esi,[dc_temp]
|
|
push ebp
|
|
inc edi
|
|
add eax,[esp+20]
|
|
lea esi,[esi+ecx*4]
|
|
|
|
align 16
|
|
a4loop:
|
|
movzx ebx,byte [esi]
|
|
movzx edx,byte [esi+1]
|
|
movzx ecx,byte [eax]
|
|
movzx ebp,byte [eax+1]
|
|
a4cm1: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap
|
|
a4cm2: movzx edx,byte [SPACEFILLER4+edx] ; colormap
|
|
a4bg1: mov ecx,[SPACEFILLER4+ecx*4] ; bg2rgb
|
|
a4bg2: mov ebp,[SPACEFILLER4+ebp*4] ; bg2rgb
|
|
a4fg1: add ecx,[SPACEFILLER4+ebx*4] ; fg2rgb
|
|
a4fg2: add ebp,[SPACEFILLER4+edx*4] ; fg2rgb
|
|
or ecx,0x01f07c1f
|
|
or ebp,0x01f07c1f
|
|
mov ebx,ecx
|
|
shr ecx,15
|
|
mov edx,ebp
|
|
shr ebp,15
|
|
and ecx,ebx
|
|
and ebp,edx
|
|
movzx ebx,byte [esi+2]
|
|
movzx edx,byte [esi+3]
|
|
mov cl,[RGB32k+ecx]
|
|
mov ch,[RGB32k+ebp]
|
|
mov [eax],cl
|
|
mov [eax+1],ch
|
|
|
|
movzx ecx,byte [eax+2]
|
|
movzx ebp,byte [eax+3]
|
|
a4cm3: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap
|
|
a4cm4: movzx edx,byte [SPACEFILLER4+edx] ; colormap
|
|
a4bg3: mov ecx,[SPACEFILLER4+ecx*4] ; bg2rgb
|
|
a4bg4: mov ebp,[SPACEFILLER4+ebp*4] ; bg2rgb
|
|
a4fg3: add ecx,[SPACEFILLER4+ebx*4] ; fg2rgb
|
|
a4fg4: add ebp,[SPACEFILLER4+edx*4] ; fg2rgb
|
|
or ecx,0x01f07c1f
|
|
or ebp,0x01f07c1f
|
|
mov ebx,ecx
|
|
shr ecx,15
|
|
mov edx,ebp
|
|
shr ebp,15
|
|
and ebx,ecx
|
|
and edx,ebp
|
|
mov cl,[RGB32k+ebx]
|
|
mov ch,[RGB32k+edx]
|
|
mov [eax+2],cl
|
|
mov [eax+3],ch
|
|
|
|
add esi,4
|
|
a4p: add eax,320 ; pitch
|
|
sub edi,1
|
|
jne a4loop
|
|
pop ebp
|
|
pop esi
|
|
pop ebx
|
|
a4nil: pop edi
|
|
ret
|
|
|
|
align 16
|
|
|
|
GLOBAL rt_addclamp4cols_asm
|
|
GLOBAL _rt_addclamp4cols_asm
|
|
|
|
rt_addclamp4cols_asm:
|
|
_rt_addclamp4cols_asm:
|
|
mov ecx,[esp+8]
|
|
push edi
|
|
mov edi,[esp+16]
|
|
sub edi,ecx
|
|
js near ac4nil
|
|
mov eax,[ylookup+ecx*4]
|
|
add eax,[dc_destorg]
|
|
push ebx
|
|
push esi
|
|
mov esi,[dc_temp]
|
|
push ebp
|
|
inc edi
|
|
add eax,[esp+20]
|
|
lea esi,[esi+ecx*4]
|
|
push edi
|
|
|
|
align 16
|
|
ac4loop:
|
|
movzx ebx,byte [esi]
|
|
movzx edx,byte [esi+1]
|
|
mov [esp],edi
|
|
ac4cm1: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap
|
|
ac4cm2: movzx edx,byte [SPACEFILLER4+edx] ; colormap
|
|
movzx ecx,byte [eax]
|
|
movzx ebp,byte [eax+1]
|
|
ac4fg1: mov ebx,[SPACEFILLER4+ebx*4] ; fg2rgb
|
|
ac4fg2: mov edx,[SPACEFILLER4+edx*4] ; fg2rgb
|
|
ac4bg1: add ebx,[SPACEFILLER4+ecx*4] ; bg2rgb
|
|
ac4bg2: add edx,[SPACEFILLER4+ebp*4] ; bg2rgb
|
|
mov ecx,ebx
|
|
or ebx,0x01f07c1f
|
|
and ecx,0x40100400
|
|
and ebx,0x3fffffff
|
|
mov edi,ecx
|
|
shr ecx,5
|
|
mov ebp,edx
|
|
sub edi,ecx
|
|
or edx,0x01f07c1f
|
|
or ebx,edi
|
|
mov ecx,ebx
|
|
shr ebx,15
|
|
and ebp,0x40100400
|
|
and ebx,ecx
|
|
and edx,0x3fffffff
|
|
mov edi,ebp
|
|
shr ebp,5
|
|
mov cl,[RGB32k+ebx]
|
|
sub edi,ebp
|
|
mov [eax],cl
|
|
or edx,edi
|
|
mov ebp,edx
|
|
shr edx,15
|
|
movzx ebx,byte [esi+2]
|
|
and ebp,edx
|
|
movzx edx,byte [esi+3]
|
|
ac4cm3: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap
|
|
mov cl,[RGB32k+ebp]
|
|
ac4cm4: movzx edx,byte [SPACEFILLER4+edx] ; colormap
|
|
mov [eax+1],cl
|
|
movzx ecx,byte [eax+2]
|
|
movzx ebp,byte [eax+3]
|
|
ac4fg3: mov ebx,[SPACEFILLER4+ebx*4] ; fg2rgb
|
|
ac4fg4: mov edx,[SPACEFILLER4+edx*4] ; fg2rgb
|
|
ac4bg3: add ebx,[SPACEFILLER4+ecx*4] ; bg2rgb
|
|
ac4bg4: add edx,[SPACEFILLER4+ebp*4] ; bg2rgb
|
|
mov ecx,ebx
|
|
or ebx,0x01f07c1f
|
|
and ecx,0x40100400
|
|
and ebx,0x3fffffff
|
|
mov edi,ecx
|
|
shr ecx,5
|
|
mov ebp,edx
|
|
sub edi,ecx
|
|
or edx,0x01f07c1f
|
|
or ebx,edi
|
|
mov ecx,ebx
|
|
shr ebx,15
|
|
and ebp,0x40100400
|
|
and ebx,ecx
|
|
and edx,0x3fffffff
|
|
mov edi,ebp
|
|
shr ebp,5
|
|
mov cl,[RGB32k+ebx]
|
|
sub edi,ebp
|
|
mov [eax+2],cl
|
|
or edx,edi
|
|
mov edi,[esp]
|
|
mov ebp,edx
|
|
shr edx,15
|
|
add esi,4
|
|
and edx,ebp
|
|
mov cl,[RGB32k+edx]
|
|
mov [eax+3],cl
|
|
|
|
ac4p: add eax,320 ; pitch
|
|
sub edi,1
|
|
jne ac4loop
|
|
pop edi
|
|
|
|
pop ebp
|
|
pop esi
|
|
pop ebx
|
|
ac4nil: pop edi
|
|
ret
|
|
|
|
rtext_end:
|
|
%ifdef M_TARGET_MACHO
|
|
GLOBAL _rtext_tmap_end
|
|
_rtext_tmap_end:
|
|
%endif
|
|
align 16
|
|
|
|
;************************
|
|
|
|
SECTION .text
|
|
|
|
GLOBAL R_SetupShadedCol
|
|
GLOBAL @R_SetupShadedCol@0
|
|
|
|
# Patch the values of dc_colormap and dc_color into the shaded column drawer.
|
|
|
|
R_SetupShadedCol:
|
|
@R_SetupShadedCol@0:
|
|
mov eax,[dc_colormap]
|
|
cmp [s4cm1+3],eax
|
|
je .cmdone
|
|
mov [s4cm1+3],eax
|
|
mov [s4cm2+3],eax
|
|
mov [s4cm3+3],eax
|
|
mov [s4cm4+3],eax
|
|
.cmdone mov eax,[dc_color]
|
|
lea eax,[Col2RGB8+eax*4]
|
|
cmp [s4fg1+3],eax
|
|
je .cdone
|
|
mov [s4fg1+3],eax
|
|
mov [s4fg2+3],eax
|
|
mov [s4fg3+3],eax
|
|
mov [s4fg4+3],eax
|
|
selfmod s4cm1, s4fg4+7
|
|
.cdone ret
|
|
|
|
GLOBAL R_SetupAddCol
|
|
GLOBAL @R_SetupAddCol@0
|
|
|
|
# Patch the values of dc_colormap, dc_srcblend, and dc_destblend into the
|
|
# unclamped adding column drawer.
|
|
|
|
R_SetupAddCol:
|
|
@R_SetupAddCol@0:
|
|
mov eax,[dc_colormap]
|
|
cmp [a4cm1+3],eax
|
|
je .cmdone
|
|
mov [a4cm1+3],eax
|
|
mov [a4cm2+3],eax
|
|
mov [a4cm3+3],eax
|
|
mov [a4cm4+3],eax
|
|
.cmdone mov eax,[dc_srcblend]
|
|
cmp [a4fg1+3],eax
|
|
je .sbdone
|
|
mov [a4fg1+3],eax
|
|
mov [a4fg2+3],eax
|
|
mov [a4fg3+3],eax
|
|
mov [a4fg4+3],eax
|
|
.sbdone mov eax,[dc_destblend]
|
|
cmp [a4bg1+3],eax
|
|
je .dbdone
|
|
mov [a4bg1+3],eax
|
|
mov [a4bg2+3],eax
|
|
mov [a4bg3+3],eax
|
|
mov [a4bg4+3],eax
|
|
selfmod a4cm1, a4bg4+7
|
|
.dbdone ret
|
|
|
|
GLOBAL R_SetupAddClampCol
|
|
GLOBAL @R_SetupAddClampCol@0
|
|
|
|
# Patch the values of dc_colormap, dc_srcblend, and dc_destblend into the
|
|
# add with clamping column drawer.
|
|
|
|
R_SetupAddClampCol:
|
|
@R_SetupAddClampCol@0:
|
|
mov eax,[dc_colormap]
|
|
cmp [ac4cm1+3],eax
|
|
je .cmdone
|
|
mov [ac4cm1+3],eax
|
|
mov [ac4cm2+3],eax
|
|
mov [ac4cm3+3],eax
|
|
mov [ac4cm4+3],eax
|
|
.cmdone mov eax,[dc_srcblend]
|
|
cmp [ac4fg1+3],eax
|
|
je .sbdone
|
|
mov [ac4fg1+3],eax
|
|
mov [ac4fg2+3],eax
|
|
mov [ac4fg3+3],eax
|
|
mov [ac4fg4+3],eax
|
|
.sbdone mov eax,[dc_destblend]
|
|
cmp [ac4bg1+3],eax
|
|
je .dbdone
|
|
mov [ac4bg1+3],eax
|
|
mov [ac4bg2+3],eax
|
|
mov [ac4bg3+3],eax
|
|
mov [ac4bg4+3],eax
|
|
selfmod ac4cm1, ac4bg4+7
|
|
.dbdone ret
|
|
|
|
EXTERN setvlinebpl_
|
|
EXTERN setpitch3
|
|
|
|
GLOBAL @ASM_PatchPitch@0
|
|
GLOBAL _ASM_PatchPitch
|
|
GLOBAL ASM_PatchPitch
|
|
|
|
ASM_PatchPitch:
|
|
_ASM_PatchPitch:
|
|
@ASM_PatchPitch@0:
|
|
mov eax,[dc_pitch]
|
|
mov [rdcp1+2],eax
|
|
mov [rdcp2+2],eax
|
|
mov [rdcp3+2],eax
|
|
mov [s4p+1],eax
|
|
mov [a4p+1],eax
|
|
mov [ac4p+1],eax
|
|
mov ecx,eax
|
|
neg ecx
|
|
inc ecx
|
|
inc ecx
|
|
mov [s4p2+2],ecx
|
|
inc ecx
|
|
mov [s4p3+2],ecx
|
|
selfmod rtext_start, rtext_end
|
|
call setpitch3
|
|
jmp setvlinebpl_
|