mirror of
https://github.com/ZDoom/gzdoom.git
synced 2025-04-01 07:30:41 +00:00
- Updated project files for nasm 2.0, which is now named nasm.exe for the Windows version, rather than nasmw.exe. Also fixed the annoying new warnings it generated. SVN r593 (trunk)
1500 lines
29 KiB
Text
1500 lines
29 KiB
Text
;*
|
|
;* tmap.nas
|
|
;* The texture-mapping inner loops in pure assembly language.
|
|
;*
|
|
;*---------------------------------------------------------------------------
|
|
;* Copyright 1998-2006 Randy Heit
|
|
;* All rights reserved.
|
|
;*
|
|
;* Redistribution and use in source and binary forms, with or without
|
|
;* modification, are permitted provided that the following conditions
|
|
;* are met:
|
|
;*
|
|
;* 1. Redistributions of source code must retain the above copyright
|
|
;* notice, this list of conditions and the following disclaimer.
|
|
;* 2. Redistributions in binary form must reproduce the above copyright
|
|
;* notice, this list of conditions and the following disclaimer in the
|
|
;* documentation and/or other materials provided with the distribution.
|
|
;* 3. The name of the author may not be used to endorse or promote products
|
|
;* derived from this software without specific prior written permission.
|
|
;*
|
|
;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
;*---------------------------------------------------------------------------
|
|
;*
|
|
|
|
BITS 32
|
|
|
|
; Segment/section definition macros.
|
|
|
|
SECTION .data
|
|
|
|
%define SPACEFILLER4 (0x44444444)
|
|
|
|
; If you change this in r_draw.c, be sure to change it here, too!
|
|
FUZZTABLE equ 50
|
|
|
|
%ifdef M_TARGET_LINUX
|
|
|
|
EXTERN ylookup
|
|
EXTERN centery
|
|
EXTERN fuzzpos
|
|
EXTERN fuzzoffset
|
|
EXTERN NormalLight
|
|
EXTERN realviewheight
|
|
EXTERN fuzzviewheight
|
|
EXTERN CPU
|
|
|
|
EXTERN dc_pitch
|
|
EXTERN dc_colormap
|
|
EXTERN dc_iscale
|
|
EXTERN dc_texturefrac
|
|
EXTERN dc_source
|
|
EXTERN dc_yl
|
|
EXTERN dc_yh
|
|
EXTERN dc_x
|
|
EXTERN dc_count
|
|
EXTERN dc_dest
|
|
EXTERN dc_destorg
|
|
|
|
EXTERN dc_ctspan
|
|
EXTERN dc_temp
|
|
|
|
EXTERN ds_xstep
|
|
EXTERN ds_ystep
|
|
EXTERN ds_colormap
|
|
EXTERN ds_source
|
|
EXTERN ds_x1
|
|
EXTERN ds_x2
|
|
EXTERN ds_xfrac
|
|
EXTERN ds_yfrac
|
|
EXTERN ds_y
|
|
|
|
GLOBAL ds_cursource
|
|
GLOBAL ds_curcolormap
|
|
|
|
%else
|
|
|
|
EXTERN _ylookup
|
|
EXTERN _centery
|
|
EXTERN _fuzzpos
|
|
EXTERN _fuzzoffset
|
|
EXTERN _NormalLight
|
|
EXTERN _realviewheight
|
|
EXTERN _fuzzviewheight
|
|
EXTERN _CPU
|
|
|
|
EXTERN _dc_pitch
|
|
EXTERN _dc_colormap
|
|
EXTERN _dc_iscale
|
|
EXTERN _dc_texturefrac
|
|
EXTERN _dc_source
|
|
EXTERN _dc_yl
|
|
EXTERN _dc_yh
|
|
EXTERN _dc_x
|
|
EXTERN _dc_count
|
|
EXTERN _dc_dest
|
|
EXTERN _dc_destorg
|
|
|
|
EXTERN _dc_ctspan
|
|
EXTERN _dc_temp
|
|
|
|
EXTERN _ds_xstep
|
|
EXTERN _ds_ystep
|
|
EXTERN _ds_colormap
|
|
EXTERN _ds_source
|
|
EXTERN _ds_x1
|
|
EXTERN _ds_x2
|
|
EXTERN _ds_xfrac
|
|
EXTERN _ds_yfrac
|
|
EXTERN _ds_y
|
|
|
|
GLOBAL _ds_cursource
|
|
GLOBAL _ds_curcolormap
|
|
|
|
%define ylookup _ylookup
|
|
%define centery _centery
|
|
%define fuzzpos _fuzzpos
|
|
%define fuzzoffset _fuzzoffset
|
|
%define NormalLight _NormalLight
|
|
%define realviewheight _realviewheight
|
|
%define fuzzviewheight _fuzzviewheight
|
|
%define CPU _CPU
|
|
|
|
%define dc_pitch _dc_pitch
|
|
%define dc_colormap _dc_colormap
|
|
%define dc_iscale _dc_iscale
|
|
%define dc_texturefrac _dc_texturefrac
|
|
%define dc_source _dc_source
|
|
%define dc_yl _dc_yl
|
|
%define dc_yh _dc_yh
|
|
%define dc_x _dc_x
|
|
%define dc_count _dc_count
|
|
%define dc_dest _dc_dest
|
|
%define dc_destorg _dc_destorg
|
|
|
|
%define dc_ctspan _dc_ctspan
|
|
%define dc_temp _dc_temp
|
|
|
|
%define ds_xstep _ds_xstep
|
|
%define ds_ystep _ds_ystep
|
|
%define ds_colormap _ds_colormap
|
|
%define ds_source _ds_source
|
|
%define ds_x1 _ds_x1
|
|
%define ds_x2 _ds_x2
|
|
%define ds_xfrac _ds_xfrac
|
|
%define ds_yfrac _ds_yfrac
|
|
%define ds_y _ds_y
|
|
|
|
%define R_SetSpanSource_ASM _R_SetSpanSource_ASM
|
|
%define R_SetSpanSize_ASM _R_SetSpanSize_ASM
|
|
%define R_SetSpanColormap_ASM _R_SetSpanColormap_ASM
|
|
|
|
%endif
|
|
|
|
_ds_cursource:
|
|
ds_cursource:
|
|
DD 0
|
|
|
|
_ds_curcolormap:
|
|
ds_curcolormap:
|
|
DD 0
|
|
|
|
|
|
; Local stuff:
|
|
lastAddress DD 0
|
|
pixelcount DD 0
|
|
|
|
SECTION .text
|
|
|
|
|
|
GLOBAL @R_SetSpanSource_ASM@4
|
|
GLOBAL R_SetSpanSource_ASM
|
|
|
|
R_SetSpanSource_ASM:
|
|
mov ecx,[esp+4]
|
|
|
|
@R_SetSpanSource_ASM@4:
|
|
mov [spreada+2],ecx
|
|
mov [spreadb+2],ecx
|
|
mov [spreadc+2],ecx
|
|
mov [spreadd+2],ecx
|
|
mov [spreade+2],ecx
|
|
mov [spreadf+2],ecx
|
|
mov [spreadg+2],ecx
|
|
|
|
mov [mspreada+2],ecx
|
|
mov [mspreadb+2],ecx
|
|
mov [mspreadc+2],ecx
|
|
mov [mspreadd+2],ecx
|
|
mov [mspreade+2],ecx
|
|
mov [mspreadf+2],ecx
|
|
mov [mspreadg+2],ecx
|
|
|
|
mov [ds_cursource],ecx
|
|
ret
|
|
|
|
GLOBAL @R_SetSpanColormap_ASM@4
|
|
GLOBAL R_SetSpanColormap_ASM
|
|
|
|
R_SetSpanColormap_ASM:
|
|
mov ecx,[esp+4]
|
|
|
|
@R_SetSpanColormap_ASM@4:
|
|
mov [spmapa+2],ecx
|
|
mov [spmapb+2],ecx
|
|
mov [spmapc+2],ecx
|
|
mov [spmapd+2],ecx
|
|
mov [spmape+2],ecx
|
|
mov [spmapf+2],ecx
|
|
mov [spmapg+2],ecx
|
|
|
|
mov [mspmapa+2],ecx
|
|
mov [mspmapb+2],ecx
|
|
mov [mspmapc+2],ecx
|
|
mov [mspmapd+2],ecx
|
|
mov [mspmape+2],ecx
|
|
mov [mspmapf+2],ecx
|
|
mov [mspmapg+2],ecx
|
|
|
|
mov [ds_curcolormap],ecx
|
|
ret
|
|
|
|
GLOBAL R_SetSpanSize_ASM
|
|
|
|
EXTERN SetTiltedSpanSize
|
|
|
|
R_SetSpanSize_ASM:
|
|
mov edx,[esp+4]
|
|
mov ecx,[esp+8]
|
|
call SetTiltedSpanSize
|
|
|
|
mov [dsy1+2],dl
|
|
mov [dsy2+2],dl
|
|
|
|
mov [dsx1+2],cl
|
|
mov [dsx2+2],cl
|
|
mov [dsx3+2],cl
|
|
mov [dsx4+2],cl
|
|
mov [dsx5+2],cl
|
|
mov [dsx6+2],cl
|
|
mov [dsx7+2],cl
|
|
|
|
mov [dmsy1+2],dl
|
|
mov [dmsy2+2],dl
|
|
|
|
mov [dmsx1+2],cl
|
|
mov [dmsx2+2],cl
|
|
mov [dmsx3+2],cl
|
|
mov [dmsx4+2],cl
|
|
mov [dmsx5+2],cl
|
|
mov [dmsx6+2],cl
|
|
mov [dmsx7+2],cl
|
|
|
|
push ecx
|
|
add ecx,edx
|
|
mov eax,1
|
|
shl eax,cl
|
|
dec eax
|
|
mov [dsm1+2],eax
|
|
mov [dsm5+1],eax
|
|
mov [dsm6+1],eax
|
|
mov [dsm7+1],eax
|
|
|
|
mov [dmsm1+2],eax
|
|
mov [dmsm5+1],eax
|
|
mov [dmsm6+1],eax
|
|
mov [dmsm7+1],eax
|
|
pop ecx
|
|
ror eax,cl
|
|
mov [dsm2+2],eax
|
|
mov [dsm3+2],eax
|
|
mov [dsm4+2],eax
|
|
|
|
mov [dmsm2+2],eax
|
|
mov [dmsm3+2],eax
|
|
mov [dmsm4+2],eax
|
|
and eax,0xffff
|
|
not eax
|
|
mov [dsm8+2],eax
|
|
mov [dsm9+2],eax
|
|
|
|
mov [dmsm8+2],eax
|
|
mov [dmsm9+2],eax
|
|
|
|
neg dl
|
|
mov [dsy3+2],dl
|
|
mov [dsy4+2],dl
|
|
|
|
mov [dmsy3+2],dl
|
|
mov [dmsy4+2],dl
|
|
|
|
aret: ret
|
|
|
|
SECTION .rtext progbits alloc exec write align=64
|
|
|
|
GLOBAL @R_DrawSpanP_ASM@0
|
|
GLOBAL _R_DrawSpanP_ASM
|
|
GLOBAL R_DrawSpanP_ASM
|
|
|
|
; eax: scratch
|
|
; ebx: zero
|
|
; ecx: yfrac at top end, xfrac int part at low end
|
|
; edx: xfrac frac part at top end
|
|
; edi: dest
|
|
; ebp: scratch
|
|
; esi: count
|
|
|
|
align 16
|
|
|
|
@R_DrawSpanP_ASM@0:
|
|
_R_DrawSpanP_ASM:
|
|
R_DrawSpanP_ASM:
|
|
mov eax,[ds_x2]
|
|
mov ecx,[ds_x1]
|
|
sub eax,ecx
|
|
jl near rdspret ; count < 0: nothing to do, so leave
|
|
|
|
push ebx
|
|
push edi
|
|
push ebp
|
|
push esi
|
|
|
|
mov edi,ecx
|
|
add edi,[dc_destorg]
|
|
mov ecx,[ds_y]
|
|
add edi,[ylookup+ecx*4]
|
|
mov edx,[ds_xstep]
|
|
dsy1: shl edx,6
|
|
mov ebp,[ds_xstep]
|
|
dsy3: shr ebp,26
|
|
xor ebx,ebx
|
|
lea esi,[eax+1]
|
|
mov [ds_xstep],edx
|
|
mov edx,[ds_ystep]
|
|
mov ecx,[ds_xfrac]
|
|
dsy4: shr ecx,26
|
|
dsm8: and edx,0xffffffc0
|
|
or ebp,edx
|
|
mov [ds_ystep],ebp
|
|
mov ebp,[ds_yfrac]
|
|
mov edx,[ds_xfrac]
|
|
dsy2: shl edx,6
|
|
dsm9: and ebp,0xffffffc0
|
|
or ecx,ebp
|
|
shr esi,1
|
|
jnc dseven1
|
|
|
|
; do odd pixel
|
|
|
|
mov ebp,ecx
|
|
dsx1: rol ebp,6
|
|
dsm1: and ebp,0xfff
|
|
add edx,[ds_xstep]
|
|
adc ecx,[ds_ystep]
|
|
spreada mov bl,[ebp+SPACEFILLER4]
|
|
spmapa mov bl,[ebx+SPACEFILLER4]
|
|
mov [edi],bl
|
|
inc edi
|
|
|
|
dseven1 shr esi,1
|
|
jnc dsrest
|
|
|
|
; do two more pixels
|
|
mov ebp,ecx
|
|
add edx,[ds_xstep]
|
|
adc ecx,[ds_ystep]
|
|
dsm2: and ebp,0xfc00003f
|
|
dsx2: rol ebp,6
|
|
mov eax,ecx
|
|
add edx,[ds_xstep]
|
|
adc ecx,[ds_ystep]
|
|
spreadb mov bl,[ebp+SPACEFILLER4] ;read texel1
|
|
dsx3: rol eax,6
|
|
dsm6: and eax,0xfff
|
|
spmapb mov bl,[ebx+SPACEFILLER4] ;map texel1
|
|
mov [edi],bl ;store texel1
|
|
add edi,2
|
|
spreadc mov bl,[eax+SPACEFILLER4] ;read texel2
|
|
spmapc mov bl,[ebx+SPACEFILLER4] ;map texel2
|
|
mov [edi-1],bl ;store texel2
|
|
|
|
; do the rest
|
|
|
|
dsrest test esi,esi
|
|
jz near dsdone
|
|
|
|
align 16
|
|
|
|
dsloop mov ebp,ecx
|
|
spstep1d add edx,[ds_xstep]
|
|
spstep2d adc ecx,[ds_ystep]
|
|
dsm3: and ebp,0xfc00003f
|
|
dsx4: rol ebp,6
|
|
mov eax,ecx
|
|
spstep1e add edx,[ds_xstep]
|
|
spstep2e adc ecx,[ds_ystep]
|
|
spreadd mov bl,[ebp+SPACEFILLER4] ;read texel1
|
|
dsx5: rol eax,6
|
|
dsm5: and eax,0xfff
|
|
spmapd mov bl,[ebx+SPACEFILLER4] ;map texel1
|
|
mov [edi],bl ;store texel1
|
|
mov ebp,ecx
|
|
spreade mov bl,[eax+SPACEFILLER4] ;read texel2
|
|
spstep1f add edx,[ds_xstep]
|
|
spstep2f adc ecx,[ds_ystep]
|
|
dsm4: and ebp,0xfc00003f
|
|
dsx6: rol ebp,6
|
|
spmape mov bl,[ebx+SPACEFILLER4] ;map texel2
|
|
mov eax,ecx
|
|
mov [edi+1],bl ;store texel2
|
|
spreadf mov bl,[ebp+SPACEFILLER4] ;read texel3
|
|
spmapf mov bl,[ebx+SPACEFILLER4] ;map texel3
|
|
add edi,4
|
|
dsx7: rol eax,6
|
|
dsm7: and eax,0xfff
|
|
mov [edi-2],bl ;store texel3
|
|
spreadg mov bl,[eax+SPACEFILLER4] ;read texel4
|
|
spstep1g add edx,[ds_xstep]
|
|
spstep2g adc ecx,[ds_ystep]
|
|
spmapg mov bl,[ebx+SPACEFILLER4] ;map texel4
|
|
dec esi
|
|
mov [edi-1],bl ;store texel4
|
|
jnz near dsloop
|
|
|
|
dsdone pop esi
|
|
pop ebp
|
|
pop edi
|
|
pop ebx
|
|
|
|
rdspret ret
|
|
|
|
; This is the same as the previous routine, except it doesn't draw pixels
|
|
; where the texture's color value is 0.
|
|
|
|
GLOBAL @R_DrawSpanMaskedP_ASM@0
|
|
GLOBAL _R_DrawSpanMaskedP_ASM
|
|
GLOBAL R_DrawSpanMaskedP_ASM
|
|
|
|
; eax: scratch
|
|
; ebx: zero
|
|
; ecx: yfrac at top end, xfrac int part at low end
|
|
; edx: xfrac frac part at top end
|
|
; edi: dest
|
|
; ebp: scratch
|
|
; esi: count
|
|
|
|
align 16
|
|
|
|
@R_DrawSpanMaskedP_ASM@0:
|
|
_R_DrawSpanMaskedP_ASM:
|
|
R_DrawSpanMaskedP_ASM:
|
|
mov eax,[ds_x2]
|
|
mov ecx,[ds_x1]
|
|
sub eax,ecx
|
|
jl rdspret ; count < 0: nothing to do, so leave
|
|
|
|
push ebx
|
|
push edi
|
|
push ebp
|
|
push esi
|
|
|
|
mov edi,ecx
|
|
add edi,[dc_destorg]
|
|
mov ecx,[ds_y]
|
|
add edi,[ylookup+ecx*4]
|
|
mov edx,[ds_xstep]
|
|
dmsy1: shl edx,6
|
|
mov ebp,[ds_xstep]
|
|
dmsy3: shr ebp,26
|
|
xor ebx,ebx
|
|
lea esi,[eax+1]
|
|
mov [ds_xstep],edx
|
|
mov edx,[ds_ystep]
|
|
mov ecx,[ds_xfrac]
|
|
dmsy4: shr ecx,26
|
|
dmsm8: and edx,0xffffffc0
|
|
or ebp,edx
|
|
mov [ds_ystep],ebp
|
|
mov ebp,[ds_yfrac]
|
|
mov edx,[ds_xfrac]
|
|
dmsy2: shl edx,6
|
|
dmsm9: and ebp,0xffffffc0
|
|
or ecx,ebp
|
|
shr esi,1
|
|
jnc dmseven1
|
|
|
|
; do odd pixel
|
|
|
|
mov ebp,ecx
|
|
dmsx1: rol ebp,6
|
|
dmsm1: and ebp,0xfff
|
|
add edx,[ds_xstep]
|
|
adc ecx,[ds_ystep]
|
|
mspreada mov bl,[ebp+SPACEFILLER4]
|
|
cmp bl,0
|
|
je mspskipa
|
|
mspmapa mov bl,[ebx+SPACEFILLER4]
|
|
mov [edi],bl
|
|
mspskipa: inc edi
|
|
|
|
dmseven1 shr esi,1
|
|
jnc dmsrest
|
|
|
|
; do two more pixels
|
|
mov ebp,ecx
|
|
add edx,[ds_xstep]
|
|
adc ecx,[ds_ystep]
|
|
dmsm2: and ebp,0xfc00003f
|
|
dmsx2: rol ebp,6
|
|
mov eax,ecx
|
|
add edx,[ds_xstep]
|
|
adc ecx,[ds_ystep]
|
|
mspreadb mov bl,[ebp+SPACEFILLER4] ;read texel1
|
|
dmsx3: rol eax,6
|
|
dmsm6: and eax,0xfff
|
|
cmp bl,0
|
|
je mspskipb
|
|
mspmapb mov bl,[ebx+SPACEFILLER4] ;map texel1
|
|
mov [edi],bl ;store texel1
|
|
mspskipb add edi,2
|
|
mspreadc mov bl,[eax+SPACEFILLER4] ;read texel2
|
|
cmp bl,0
|
|
je dmsrest
|
|
mspmapc mov bl,[ebx+SPACEFILLER4] ;map texel2
|
|
mov [edi-1],bl ;store texel2
|
|
|
|
; do the rest
|
|
|
|
dmsrest test esi,esi
|
|
jz near dmsdone
|
|
|
|
align 16
|
|
|
|
dmsloop mov ebp,ecx
|
|
mspstep1d add edx,[ds_xstep]
|
|
mspstep2d adc ecx,[ds_ystep]
|
|
dmsm3: and ebp,0xfc00003f
|
|
dmsx4: rol ebp,6
|
|
mov eax,ecx
|
|
mspstep1e add edx,[ds_xstep]
|
|
mspstep2e adc ecx,[ds_ystep]
|
|
mspreadd mov bl,[ebp+SPACEFILLER4] ;read texel1
|
|
dmsx5: rol eax,6
|
|
dmsm5: and eax,0xfff
|
|
cmp bl,0
|
|
mov ebp,ecx
|
|
je mspreade
|
|
mspmapd mov bl,[ebx+SPACEFILLER4] ;map texel1
|
|
mov [edi],bl ;store texel1
|
|
mspreade mov bl,[eax+SPACEFILLER4] ;read texel2
|
|
mspstep1f add edx,[ds_xstep]
|
|
mspstep2f adc ecx,[ds_ystep]
|
|
dmsm4: and ebp,0xfc00003f
|
|
dmsx6: rol ebp,6
|
|
cmp bl,0
|
|
mov eax,ecx
|
|
je mspreadf
|
|
mspmape mov bl,[ebx+SPACEFILLER4] ;map texel2
|
|
mov [edi+1],bl ;store texel2
|
|
mspreadf mov bl,[ebp+SPACEFILLER4] ;read texel3
|
|
add edi,4
|
|
dmsx7: rol eax,6
|
|
dmsm7: and eax,0xfff
|
|
cmp bl,0
|
|
je mspreadg
|
|
mspmapf mov bl,[ebx+SPACEFILLER4] ;map texel3
|
|
mov [edi-2],bl ;store texel3
|
|
mspreadg mov bl,[eax+SPACEFILLER4] ;read texel4
|
|
mspstep1g add edx,[ds_xstep]
|
|
mspstep2g adc ecx,[ds_ystep]
|
|
cmp bl,0
|
|
je mspskipg
|
|
mspmapg mov bl,[ebx+SPACEFILLER4] ;map texel4
|
|
mov [edi-1],bl ;store texel4
|
|
mspskipg dec esi
|
|
jnz near dmsloop
|
|
|
|
dmsdone pop esi
|
|
pop ebp
|
|
pop edi
|
|
pop ebx
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
;*----------------------------------------------------------------------
|
|
;*
|
|
;* R_DrawColumnP
|
|
;*
|
|
;*----------------------------------------------------------------------
|
|
|
|
GLOBAL @R_DrawColumnP_ASM@0
|
|
GLOBAL _R_DrawColumnP_ASM
|
|
GLOBAL R_DrawColumnP_ASM
|
|
|
|
align 16
|
|
|
|
R_DrawColumnP_ASM:
|
|
_R_DrawColumnP_ASM:
|
|
@R_DrawColumnP_ASM@0:
|
|
|
|
; count = dc_yh - dc_yl;
|
|
|
|
mov ecx,[dc_count]
|
|
test ecx,ecx
|
|
jle near rdcpret ; count <= 0: nothing to do, so leave
|
|
|
|
push ebp ; save registers
|
|
push ebx
|
|
push edi
|
|
push esi
|
|
|
|
; dest = ylookup[dc_yl] + dc_x + dc_destorg;
|
|
|
|
mov edi,[dc_dest]
|
|
mov ebp,ecx
|
|
mov ebx,[dc_texturefrac] ; ebx = frac
|
|
rdcp1: sub edi,SPACEFILLER4
|
|
mov ecx,ebx
|
|
shr ecx,16
|
|
mov esi,[dc_source]
|
|
mov edx,[dc_iscale]
|
|
mov eax,[dc_colormap]
|
|
|
|
cmp BYTE [CPU+66],byte 5
|
|
jg rdcploop2
|
|
|
|
; need 12 bytes of filler to make it aligned
|
|
db 0x8D,0x80,0,0,0,0 ; lea eax,[eax+00000000]
|
|
db 0x8D,0xBF,0,0,0,0 ; lea edi,[edi+00000000]
|
|
|
|
align 16
|
|
|
|
; The registers should now look like this:
|
|
;
|
|
; [31 .. 16][15 .. 8][7 .. 0]
|
|
; eax [colormap ]
|
|
; ebx [yi ][yf ]
|
|
; ecx [scratch ]
|
|
; edx [dyi ][dyf ]
|
|
; esi [source texture column ]
|
|
; edi [destination screen pointer ]
|
|
; ebp [counter ]
|
|
;
|
|
|
|
|
|
; Note the partial register stalls on anything better than a Pentium
|
|
; That's why there are two versions of this loop.
|
|
|
|
rdcploop:
|
|
mov cl,[esi+ecx] ; Fetch texel
|
|
xor ch,ch
|
|
add ebx,edx ; increment frac
|
|
rdcp2: add edi,SPACEFILLER4 ; increment destination pointer
|
|
mov cl,[eax+ecx] ; colormap texel
|
|
mov [edi],cl ; Store texel
|
|
mov ecx,ebx
|
|
shr ecx,16
|
|
dec ebp
|
|
jnz rdcploop ; loop
|
|
|
|
pop esi
|
|
pop edi
|
|
pop ebx
|
|
pop ebp
|
|
rdcpret:
|
|
ret
|
|
|
|
align 16
|
|
|
|
rdcploop2:
|
|
movzx ecx,byte [esi+ecx] ; Fetch texel
|
|
add ebx,edx ; increment frac
|
|
mov cl,[eax+ecx] ; colormap texel
|
|
rdcp3: add edi,SPACEFILLER4 ; increment destination pointer
|
|
mov [edi],cl ; Store texel
|
|
mov ecx,ebx
|
|
shr ecx,16
|
|
dec ebp
|
|
jnz rdcploop2 ; loop
|
|
|
|
pop esi
|
|
pop edi
|
|
pop ebx
|
|
pop ebp
|
|
ret
|
|
|
|
|
|
|
|
;*----------------------------------------------------------------------
|
|
;*
|
|
;* R_DrawFuzzColumnP
|
|
;*
|
|
;*----------------------------------------------------------------------
|
|
|
|
GLOBAL @R_DrawFuzzColumnP_ASM@0
|
|
GLOBAL _R_DrawFuzzColumnP_ASM
|
|
GLOBAL R_DrawFuzzColumnP_ASM
|
|
|
|
align 16
|
|
|
|
R_DrawFuzzColumnP_ASM:
|
|
_R_DrawFuzzColumnP_ASM:
|
|
@R_DrawFuzzColumnP_ASM@0:
|
|
|
|
; Adjust borders. Low...
|
|
mov eax,[dc_yl]
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
push ebp
|
|
|
|
cmp eax,0
|
|
jg .ylok
|
|
|
|
mov eax,1
|
|
nop
|
|
|
|
; ...and high.
|
|
.ylok mov edx,[fuzzviewheight]
|
|
mov esi,[dc_yh]
|
|
cmp esi,edx
|
|
jle .yhok
|
|
|
|
mov esi,edx
|
|
nop
|
|
|
|
.yhok mov edx,[dc_x]
|
|
sub esi,eax ; esi = count
|
|
js near .dfcdone ; Zero length (or less)
|
|
|
|
mov edi,[ylookup+eax*4]
|
|
mov ebx,edx
|
|
add edi,[dc_destorg]
|
|
mov eax,[NormalLight]
|
|
mov ecx,[fuzzpos]
|
|
add edi,ebx
|
|
add eax,256*6
|
|
inc esi
|
|
mov ebp,[dc_pitch]
|
|
mov edx,FUZZTABLE
|
|
test ecx,ecx
|
|
je .fuzz0
|
|
|
|
;
|
|
; esi = count
|
|
; edi = dest
|
|
; ecx = fuzzpos
|
|
; eax = colormap 6
|
|
;
|
|
|
|
; first loop: end with fuzzpos or count 0, whichever happens first
|
|
|
|
sub edx,ecx ; edx = # of entries left in fuzzoffset
|
|
mov ebx,esi
|
|
cmp esi,edx
|
|
jle .enuf
|
|
mov esi,edx
|
|
.enuf sub ebx,esi
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
push ebx
|
|
xor ebx,ebx
|
|
|
|
.loop1 inc ecx
|
|
mov bl,[edi+edx]
|
|
dec esi
|
|
mov bl,[eax+ebx]
|
|
mov [edi],bl
|
|
lea edi,[edi+ebp]
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
jnz .loop1
|
|
|
|
; second loop: Chunk it into groups of FUZZTABLE-sized spans and do those
|
|
|
|
pop esi
|
|
cmp ecx,FUZZTABLE
|
|
jl .savefuzzpos
|
|
xor ecx,ecx
|
|
nop
|
|
.fuzz0 cmp esi,FUZZTABLE
|
|
jl .chunked
|
|
|
|
.oloop lea edx,[esi-FUZZTABLE]
|
|
mov esi,FUZZTABLE
|
|
push edx
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
|
|
.iloop inc ecx
|
|
mov bl,[edi+edx]
|
|
dec esi
|
|
mov bl,[eax+ebx]
|
|
mov [edi],bl
|
|
lea edi,[edi+ebp]
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
jnz .iloop
|
|
|
|
pop esi
|
|
xor ecx,ecx
|
|
cmp esi,FUZZTABLE
|
|
jge .oloop
|
|
|
|
; third loop: Do whatever is left
|
|
|
|
.chunked:
|
|
test esi,esi
|
|
jle .savefuzzpos
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
nop
|
|
|
|
.loop3 inc ecx
|
|
mov bl,[edi+edx]
|
|
dec esi
|
|
mov bl,[eax+ebx]
|
|
mov [edi],bl
|
|
lea edi,[edi+ebp]
|
|
mov edx,[fuzzoffset+ecx*4]
|
|
jnz .loop3
|
|
|
|
.savefuzzpos:
|
|
mov [fuzzpos],ecx
|
|
.dfcdone:
|
|
pop ebp
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
ret
|
|
|
|
|
|
;*----------------------------------------------------------------------
|
|
;*
|
|
;* R_DrawColumnHorizP_ASM
|
|
;*
|
|
;*----------------------------------------------------------------------
|
|
|
|
GLOBAL @R_DrawColumnHorizP_ASM@0
|
|
GLOBAL _R_DrawColumnHorizP_ASM
|
|
GLOBAL R_DrawColumnHorizP_ASM
|
|
|
|
align 16
|
|
|
|
@R_DrawColumnHorizP_ASM@0:
|
|
R_DrawColumnHorizP_ASM:
|
|
_R_DrawColumnHorizP_ASM:
|
|
|
|
; count = dc_yh - dc_yl;
|
|
|
|
mov eax,[dc_yh]
|
|
mov ecx,[dc_yl]
|
|
sub eax,ecx
|
|
mov edx,[dc_x]
|
|
|
|
jl near .leave ; count < 0: nothing to do, so leave
|
|
|
|
push ebp ; save registers
|
|
push ebx
|
|
push edi
|
|
push esi
|
|
|
|
inc eax ; make 0 count mean 0 pixels
|
|
and edx,3
|
|
push eax
|
|
mov esi,[dc_ctspan+edx*4]
|
|
lea eax,[dc_temp+ecx*4+edx] ; eax = top of column in buffer
|
|
mov ebp,[dc_yh]
|
|
mov [esi],ecx
|
|
mov [esi+4],ebp
|
|
add esi,8
|
|
mov edi,[dc_source]
|
|
mov [dc_ctspan+edx*4],esi
|
|
mov esi,[dc_iscale]
|
|
mov ecx,[dc_texturefrac] ; ecx = frac
|
|
mov dl,[edi] ; load cache
|
|
mov ebx,[esp]
|
|
and ebx,0xfffffff8
|
|
jnz .mthan8
|
|
|
|
; Register usage in the following code is:
|
|
;
|
|
; eax: dest
|
|
; edi: source
|
|
; ecx: frac (16.16)
|
|
; esi: fracstep (16.16)
|
|
; ebx: add1
|
|
; ebp: add2
|
|
; dl: texel1
|
|
; dh: texel2
|
|
;[esp] count
|
|
|
|
; there are fewer than 8 pixels to draw
|
|
|
|
mov ebx,[esp]
|
|
.lthan8 shr ebx,1
|
|
jnc .even
|
|
|
|
; do one pixel before loop (little opportunity for pairing)
|
|
|
|
mov ebp,ecx ; copy frac to ebx
|
|
add ecx,esi ; increment frac
|
|
shr ebp,16 ; shift frac over to low end
|
|
add eax,4
|
|
mov dl,[edi+ebp]
|
|
mov [eax-4],dl
|
|
|
|
.even test ebx,ebx
|
|
jz near .done
|
|
|
|
.loop2 mov [esp],ebx ; save counter
|
|
mov ebx,ecx ; copy frac for texel1 to ebx
|
|
shr ebx,16 ; shift frac for texel1 to low end
|
|
add ecx,esi ; increment frac
|
|
mov ebp,ecx ; copy frac for texel2 to ebp
|
|
shr ebp,16 ; shift frac for texel2 to low end
|
|
add ecx,esi ; increment frac
|
|
mov dl,[edi+ebx] ; read texel1
|
|
mov ebx,[esp] ; fetch counter
|
|
mov dh,[edi+ebp] ; read texel2
|
|
mov [eax],dl ; write texel1
|
|
mov [eax+4],dh ; write texel2
|
|
add eax,8 ; increment dest
|
|
dec ebx ; decrement counter
|
|
jnz .loop2 ; loop until it hits 0
|
|
|
|
jmp .done
|
|
|
|
; there are more than 8 pixels to draw. position eax as close to a 32 byte
|
|
; boundary as possible, then do whatever is left.
|
|
|
|
.mthan8 test eax,4
|
|
jz .try2
|
|
|
|
mov ebp,ecx ; frac: in ebp
|
|
add ecx,esi ; step
|
|
shr ebp,16 ; frac: shift
|
|
add eax,4 ; increment dest
|
|
mov ebx,[esp] ; fetch counter
|
|
mov dl,[edi+ebp] ; tex: read
|
|
dec ebx ; decrement counter
|
|
mov [eax-4],dl ; tex: write
|
|
mov [esp],ebx ; store counter
|
|
|
|
.try2 test eax,8
|
|
jz .try4
|
|
|
|
mov ebx,ecx ; frac1: in ebx
|
|
add ecx,esi ; step
|
|
shr ebx,16 ; frac1: shift
|
|
mov ebp,ecx ; frac2: in ebp
|
|
shr ebp,16 ; frac2: shift
|
|
add ecx,esi ; step
|
|
mov dl,[edi+ebx] ; tex1: read
|
|
mov ebx,[esp] ; fetch counter
|
|
mov dh,[edi+ebp] ; tex2: read
|
|
mov [eax],dl ; tex1: write
|
|
mov [eax+4],dh ; tex2: write
|
|
sub ebx,2 ; decrement counter
|
|
add eax,8 ; increment dest
|
|
mov [esp],ebx ; store counter
|
|
|
|
.try4 test eax,16
|
|
jz .try8
|
|
|
|
mov ebx,ecx ; frac1: in ebx
|
|
add ecx,esi ; step
|
|
shr ebx,16 ; frac1: shift
|
|
mov ebp,ecx ; frac2: in ebp
|
|
shr ebp,16 ; frac2: shift
|
|
add ecx,esi ; step
|
|
mov dl,[edi+ebx] ; tex1: read
|
|
mov ebx,ecx ; frac3: in ebx
|
|
shr ebx,16 ; frac3: shift
|
|
mov dh,[edi+ebp] ; tex2: read
|
|
add ecx,esi ; step
|
|
mov [eax],dl ; tex1: write
|
|
mov [eax+4],dh ; tex2: write
|
|
mov ebp,ecx ; frac4: in ebp
|
|
shr ebp,16 ; frac4: shift
|
|
add ecx,esi ; step
|
|
mov dl,[edi+ebx] ; tex3: read
|
|
mov ebx,[esp] ; fetch counter
|
|
mov dh,[edi+ebp] ; tex4: read
|
|
sub ebx,4 ; decrement counter
|
|
mov [esp],ebx ; store counter
|
|
mov [eax+8],dl ; tex3: write
|
|
mov [eax+12],dh ; tex4: write
|
|
add eax,16 ; increment dest
|
|
|
|
.try8 mov ebx,[esp] ; make counter count groups of 8
|
|
sub esp,4
|
|
shr ebx,3
|
|
jmp .tail8
|
|
|
|
align 16
|
|
|
|
.loop8 mov [esp],ebx ; save counter
|
|
mov ebx,ecx ; frac1: in ebx
|
|
shr ebx,16 ; frac1: shift
|
|
add ecx,esi ; step
|
|
mov ebp,ecx ; frac2: in ebp
|
|
shr ebp,16 ; frac2: shift
|
|
add ecx,esi ; step
|
|
mov dl,[edi+ebx] ; tex1: read
|
|
mov ebx,ecx ; frac3: in ebx
|
|
mov dh,[edi+ebp] ; tex2: read
|
|
shr ebx,16 ; frac3: shift
|
|
add ecx,esi ; step
|
|
mov [eax],dl ; tex1: write
|
|
mov [eax+4],dh ; tex2: write
|
|
mov ebp,ecx ; frac4: in ebp
|
|
shr ebp,16 ; frac4: shift
|
|
add ecx,esi ; step
|
|
mov dl,[edi+ebx] ; tex3: read
|
|
mov ebx,ecx ; frac5: in ebx
|
|
mov dh,[edi+ebp] ; tex4: read
|
|
shr ebx,16 ; frac5: shift
|
|
mov [eax+8],dl ; tex3: write
|
|
mov [eax+12],dh ; tex4: write
|
|
add ecx,esi ; step
|
|
mov ebp,ecx ; frac6: in ebp
|
|
shr ebp,16 ; frac6: shift
|
|
mov dl,[edi+ebx] ; tex5: read
|
|
add ecx,esi ; step
|
|
mov ebx,ecx ; frac7: in ebx
|
|
mov [eax+16],dl ; tex5: write
|
|
shr ebx,16 ; frac7: shift
|
|
mov dh,[edi+ebp] ; tex6: read
|
|
add ecx,esi ; step
|
|
mov ebp,ecx ; frac8: in ebp
|
|
mov [eax+20],dh ; tex6: write
|
|
shr ebp,16 ; frac8: shift
|
|
add eax,32 ; increment dest pointer
|
|
mov dl,[edi+ebx] ; tex7: read
|
|
mov ebx,[esp] ; fetch counter
|
|
mov [eax-8],dl ; tex7: write
|
|
mov dh,[edi+ebp] ; tex8: read
|
|
add ecx,esi ; step
|
|
mov [eax-4],dh ; tex8: write
|
|
mov dl,[eax] ; load cache
|
|
dec ebx ; decrement counter
|
|
.tail8 jnz near .loop8 ; loop if more to do
|
|
|
|
pop ebp
|
|
mov ebx,[esp]
|
|
and ebx,7
|
|
jnz near .lthan8
|
|
|
|
.done pop eax
|
|
pop esi
|
|
pop edi
|
|
pop ebx
|
|
pop ebp
|
|
.leave ret
|
|
|
|
|
|
;*----------------------------------------------------------------------
|
|
;*
|
|
;* rt_copy1col_asm
|
|
;*
|
|
;* ecx = hx
|
|
;* edx = sx
|
|
;* [esp+4] = yl
|
|
;* [esp+8] = yh
|
|
;*
|
|
;*----------------------------------------------------------------------
|
|
|
|
GLOBAL @rt_copy1col_asm@16
|
|
GLOBAL _rt_copy1col_asm
|
|
GLOBAL rt_copy1col_asm
|
|
|
|
align 16
|
|
|
|
rt_copy1col_asm:
|
|
_rt_copy1col_asm:
|
|
pop eax
|
|
mov edx,[esp+4*3]
|
|
mov ecx,[esp+4*2]
|
|
push edx
|
|
push ecx
|
|
mov ecx,[esp+4*2]
|
|
mov edx,[esp+4*3]
|
|
push eax
|
|
|
|
@rt_copy1col_asm@16:
|
|
mov eax, [esp+4]
|
|
push ebx
|
|
mov ebx, [esp+12]
|
|
push esi
|
|
sub ebx, eax
|
|
push edi
|
|
js .done
|
|
|
|
lea esi,[eax*4]
|
|
inc ebx ; ebx = count
|
|
mov eax,edx
|
|
lea ecx,[dc_temp+ecx+esi] ; ecx = source
|
|
mov edi,[ylookup+esi]
|
|
mov esi,[dc_pitch] ; esi = pitch
|
|
add eax,edi ; eax = dest
|
|
add eax,[dc_destorg]
|
|
|
|
shr ebx,1
|
|
jnc .even
|
|
|
|
mov dl,[ecx]
|
|
add ecx,4
|
|
mov [eax],dl
|
|
add eax,esi
|
|
|
|
.even and ebx,ebx
|
|
jz .done
|
|
|
|
.loop mov dl,[ecx]
|
|
mov dh,[ecx+4]
|
|
mov [eax],dl
|
|
mov [eax+esi],dh
|
|
add ecx,8
|
|
lea eax,[eax+esi*2]
|
|
dec ebx
|
|
jnz .loop
|
|
|
|
.done pop edi
|
|
pop esi
|
|
pop ebx
|
|
ret 8
|
|
|
|
;*----------------------------------------------------------------------
|
|
;*
|
|
;* rt_copy4cols_asm
|
|
;*
|
|
;* ecx = sx
|
|
;* edx = yl
|
|
;* [esp+4] = yh
|
|
;*
|
|
;*----------------------------------------------------------------------
|
|
|
|
GLOBAL @rt_copy4cols_asm@12
|
|
GLOBAL _rt_copy4cols_asm
|
|
GLOBAL rt_copy4cols_asm
|
|
|
|
align 16
|
|
|
|
rt_copy4cols_asm:
|
|
_rt_copy4cols_asm:
|
|
pop eax
|
|
mov ecx,[esp+8]
|
|
mov edx,[esp+4]
|
|
push ecx
|
|
mov ecx,[esp+4]
|
|
push eax
|
|
|
|
@rt_copy4cols_asm@12:
|
|
push ebx
|
|
mov ebx,[esp+8]
|
|
push esi
|
|
sub ebx,edx
|
|
push edi
|
|
js .done
|
|
|
|
inc ebx ; ebx = count
|
|
mov eax,ecx
|
|
mov esi,[ylookup+edx*4]
|
|
lea ecx,[dc_temp+edx*4] ; ecx = source
|
|
mov edx,[dc_pitch] ; edx = pitch
|
|
add eax,esi ; eax = dest
|
|
add eax,[dc_destorg]
|
|
|
|
shr ebx,1
|
|
jnc .even
|
|
|
|
mov esi,[ecx]
|
|
add ecx,4
|
|
mov [eax],esi
|
|
add eax,edx
|
|
|
|
.even and ebx,ebx
|
|
jz .done
|
|
|
|
.loop mov esi,[ecx]
|
|
mov edi,[ecx+4]
|
|
mov [eax],esi
|
|
mov [eax+edx],edi
|
|
add ecx,8
|
|
lea eax,[eax+edx*2]
|
|
dec ebx
|
|
jnz .loop
|
|
|
|
.done pop edi
|
|
pop esi
|
|
pop ebx
|
|
ret 4
|
|
|
|
;*----------------------------------------------------------------------
|
|
;*
|
|
;* rt_map1col_asm
|
|
;*
|
|
;* ecx = hx
|
|
;* edx = sx
|
|
;* [esp+4] = yl
|
|
;* [esp+8] = yh
|
|
;*
|
|
;*----------------------------------------------------------------------
|
|
|
|
GLOBAL @rt_map1col_asm@16
|
|
GLOBAL _rt_map1col_asm
|
|
GLOBAL rt_map1col_asm
|
|
|
|
align 16
|
|
|
|
rt_map1col_asm:
|
|
_rt_map1col_asm:
|
|
pop eax
|
|
mov edx,[esp+4*3]
|
|
mov ecx,[esp+4*2]
|
|
push edx
|
|
push ecx
|
|
mov ecx,[esp+4*2]
|
|
mov edx,[esp+4*3]
|
|
push eax
|
|
|
|
@rt_map1col_asm@16:
|
|
mov eax,[esp+4]
|
|
push ebx
|
|
mov ebx,[esp+12]
|
|
push ebp
|
|
push esi
|
|
sub ebx, eax
|
|
push edi
|
|
js .done
|
|
|
|
lea edi,[eax*4]
|
|
mov esi,[dc_colormap] ; esi = colormap
|
|
inc ebx ; ebx = count
|
|
mov eax,edx
|
|
lea ebp,[dc_temp+ecx+edi] ; ebp = source
|
|
mov ecx,[ylookup+edi]
|
|
mov edi,[dc_pitch] ; edi = pitch
|
|
add eax,ecx ; eax = dest
|
|
xor ecx,ecx
|
|
xor edx,edx
|
|
add eax,[dc_destorg]
|
|
|
|
shr ebx,1
|
|
jnc .even
|
|
|
|
mov dl,[ebp]
|
|
add ebp,4
|
|
mov dl,[esi+edx]
|
|
mov [eax],dl
|
|
add eax,edi
|
|
|
|
.even and ebx,ebx
|
|
jz .done
|
|
|
|
.loop mov dl,[ebp]
|
|
mov cl,[ebp+4]
|
|
add ebp,8
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax],dl
|
|
mov [eax+edi],cl
|
|
dec ebx
|
|
lea eax,[eax+edi*2]
|
|
jnz .loop
|
|
|
|
.done pop edi
|
|
pop esi
|
|
pop ebp
|
|
pop ebx
|
|
ret 8
|
|
|
|
;*----------------------------------------------------------------------
|
|
;*
|
|
;* rt_map4cols_asm
|
|
;*
|
|
;* rt_map4cols_asm1 is for PPro and above
|
|
;* rt_map4cols_asm2 is for Pentium and below
|
|
;*
|
|
;* ecx = sx
|
|
;* edx = yl
|
|
;* [esp+4] = yh
|
|
;*
|
|
;*----------------------------------------------------------------------
|
|
|
|
GLOBAL @rt_map4cols_asm1@12
|
|
GLOBAL _rt_map4cols_asm1
|
|
GLOBAL rt_map4cols_asm1
|
|
|
|
align 16
|
|
|
|
rt_map4cols_asm1:
|
|
_rt_map4cols_asm1:
|
|
pop eax
|
|
mov ecx,[esp+8]
|
|
mov edx,[esp+4]
|
|
push ecx
|
|
mov ecx,[esp+4]
|
|
push eax
|
|
|
|
@rt_map4cols_asm1@12:
|
|
push ebx
|
|
mov ebx,[esp+8]
|
|
push ebp
|
|
push esi
|
|
sub ebx,edx
|
|
push edi
|
|
js near .done
|
|
|
|
mov esi,[dc_colormap] ; esi = colormap
|
|
shl edx,2
|
|
mov eax,ecx
|
|
inc ebx ; ebx = count
|
|
mov edi,[ylookup+edx]
|
|
lea ebp,[dc_temp+edx] ; ebp = source
|
|
add eax,edi ; eax = dest
|
|
mov edi,[dc_pitch] ; edi = pitch
|
|
add eax,[dc_destorg]
|
|
xor ecx,ecx
|
|
xor edx,edx
|
|
|
|
shr ebx,1
|
|
jnc .even
|
|
|
|
mov dl,[ebp]
|
|
mov cl,[ebp+1]
|
|
add ebp,4
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax],dl
|
|
mov [eax+1],cl
|
|
mov dl,[ebp-2]
|
|
mov cl,[ebp-1]
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax+2],dl
|
|
mov [eax+3],cl
|
|
add eax,edi
|
|
|
|
.even and ebx,ebx
|
|
jz .done
|
|
|
|
.loop:
|
|
mov dl,[ebp]
|
|
mov cl,[ebp+1]
|
|
add ebp,8
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax],dl
|
|
mov [eax+1],cl
|
|
mov dl,[ebp-6]
|
|
mov cl,[ebp-5]
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax+2],dl
|
|
mov [eax+3],cl
|
|
mov dl,[ebp-4]
|
|
mov cl,[ebp-3]
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax+edi],dl
|
|
mov [eax+edi+1],cl
|
|
mov dl,[ebp-2]
|
|
mov cl,[ebp-1]
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax+edi+2],dl
|
|
mov [eax+edi+3],cl
|
|
lea eax,[eax+edi*2]
|
|
dec ebx
|
|
|
|
jnz .loop
|
|
|
|
.done pop edi
|
|
pop esi
|
|
pop ebp
|
|
pop ebx
|
|
ret 4
|
|
|
|
GLOBAL @rt_map4cols_asm2@12
|
|
GLOBAL _rt_map4cols_asm2
|
|
GLOBAL rt_map4cols_asm2
|
|
|
|
align 16
|
|
|
|
rt_map4cols_asm2:
|
|
_rt_map4cols_asm2:
|
|
pop eax
|
|
mov ecx,[esp+8]
|
|
mov edx,[esp+4]
|
|
push ecx
|
|
mov ecx,[esp+4]
|
|
push eax
|
|
|
|
@rt_map4cols_asm2@12:
|
|
push ebx
|
|
mov ebx,[esp+8]
|
|
push ebp
|
|
push esi
|
|
sub ebx,edx
|
|
push edi
|
|
js near .done
|
|
|
|
mov esi,[dc_colormap] ; esi = colormap
|
|
shl edx,2
|
|
mov eax,ecx
|
|
inc ebx ; ebx = count
|
|
mov edi,[ylookup+edx]
|
|
lea ebp,[dc_temp+edx] ; ebp = source
|
|
add eax,edi ; eax = dest
|
|
mov edi,[dc_pitch] ; edi = pitch
|
|
add eax,[dc_destorg]
|
|
xor ecx,ecx
|
|
xor edx,edx
|
|
|
|
shr ebx,1
|
|
jnc .even
|
|
|
|
mov dl,[ebp]
|
|
mov cl,[ebp+1]
|
|
add ebp,4
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax],dl
|
|
mov [eax+1],cl
|
|
mov dl,[ebp-2]
|
|
mov cl,[ebp-1]
|
|
mov dl,[esi+edx]
|
|
mov cl,[esi+ecx]
|
|
mov [eax+2],dl
|
|
mov [eax+3],cl
|
|
add eax,edi
|
|
|
|
.even and ebx,ebx
|
|
jz .done
|
|
|
|
.loop:
|
|
mov dl,[ebp+3]
|
|
mov ch,[esi+edx]
|
|
mov dl,[ebp+2]
|
|
mov cl,[esi+edx]
|
|
shl ecx,16
|
|
mov dl,[ebp+1]
|
|
mov ch,[esi+edx]
|
|
mov dl,[ebp]
|
|
mov cl,[esi+edx]
|
|
mov [eax],ecx
|
|
add eax,edi
|
|
|
|
mov dl,[ebp+7]
|
|
mov ch,[esi+edx]
|
|
mov dl,[ebp+6]
|
|
mov cl,[esi+edx]
|
|
shl ecx,16
|
|
mov dl,[ebp+5]
|
|
mov ch,[esi+edx]
|
|
mov dl,[ebp+4]
|
|
mov cl,[esi+edx]
|
|
mov [eax],ecx
|
|
add eax,edi
|
|
add ebp,8
|
|
dec ebx
|
|
|
|
jnz .loop
|
|
|
|
.done pop edi
|
|
pop esi
|
|
pop ebp
|
|
pop ebx
|
|
ret 4
|
|
|
|
;************************
|
|
|
|
SECTION .text
|
|
|
|
EXTERN setvlinebpl_
|
|
EXTERN setpitch3
|
|
|
|
GLOBAL @ASM_PatchPitch@0
|
|
GLOBAL _ASM_PatchPitch
|
|
GLOBAL ASM_PatchPitch
|
|
|
|
ASM_PatchPitch:
|
|
_ASM_PatchPitch:
|
|
@ASM_PatchPitch@0:
|
|
mov eax,[dc_pitch]
|
|
mov [rdcp1+2],eax
|
|
mov [rdcp2+2],eax
|
|
mov [rdcp3+2],eax
|
|
call setpitch3
|
|
jmp setvlinebpl_
|
|
|
|
|