2006-02-24 04:48:15 +00:00
|
|
|
|
; "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman
|
|
|
|
|
; Ken Silverman's official web site: "http://www.advsys.net/ken"
|
|
|
|
|
; See the included license file "BUILDLIC.TXT" for license info.
|
|
|
|
|
; This file has been modified from Ken Silverman's original release
|
|
|
|
|
|
About a week's worth of changes here. As a heads-up, I wouldn't be
surprised if this doesn't build in Linux right now. The CMakeLists.txt
were checked with MinGW and NMake, but how they fair under Linux is an
unknown to me at this time.
- Converted most sprintf (and all wsprintf) calls to either mysnprintf or
FStrings, depending on the situation.
- Changed the strings in the wbstartstruct to be FStrings.
- Changed myvsnprintf() to output nothing if count is greater than INT_MAX.
This is so that I can use a series of mysnprintf() calls and advance the
pointer for each one. Once the pointer goes beyond the end of the buffer,
the count will go negative, but since it's an unsigned type it will be
seen as excessively huge instead. This should not be a problem, as there's
no reason for ZDoom to be using text buffers larger than 2 GB anywhere.
- Ripped out the disabled bit from FGameConfigFile::MigrateOldConfig().
- Changed CalcMapName() to return an FString instead of a pointer to a static
buffer.
- Changed startmap in d_main.cpp into an FString.
- Changed CheckWarpTransMap() to take an FString& as the first argument.
- Changed d_mapname in g_level.cpp into an FString.
- Changed DoSubstitution() in ct_chat.cpp to place the substitutions in an
FString.
- Fixed: The MAPINFO parser wrote into the string buffer to construct a map
name when given a Hexen map number. This was fine with the old scanner
code, but only a happy coincidence prevents it from crashing with the new
code
- Added the 'B' conversion specifier to StringFormat::VWorker() for printing
binary numbers.
- Added CMake support for building with MinGW, MSYS, and NMake. Linux support
is probably broken until I get around to booting into Linux again. Niceties
provided over the existing Makefiles they're replacing:
* All command-line builds can use the same build system, rather than having
a separate one for MinGW and another for Linux.
* Microsoft's NMake tool is supported as a target.
* Progress meters.
* Parallel makes work from a fresh checkout without needing to be primed
first with a single-threaded make.
* Porting to other architectures should be simplified, whenever that day
comes.
- Replaced the makewad tool with zipdir. This handles the dependency tracking
itself instead of generating an external makefile to do it, since I couldn't
figure out how to generate a makefile with an external tool and include it
with a CMake-generated makefile. Where makewad used a master list of files
to generate the package file, zipdir just zips the entire contents of one or
more directories.
- Added the gdtoa package from netlib's fp library so that ZDoom's printf-style
formatting can be entirely independant of the CRT.
SVN r1082 (trunk)
2008-07-23 04:57:26 +00:00
|
|
|
|
%include "valgrind.inc"
|
2008-06-08 02:31:30 +00:00
|
|
|
|
|
2006-02-24 04:48:15 +00:00
|
|
|
|
SECTION .data
|
|
|
|
|
|
|
|
|
|
%ifndef M_TARGET_LINUX
|
|
|
|
|
%define ylookup _ylookup
|
|
|
|
|
%define vince _vince
|
2006-05-10 15:07:14 +00:00
|
|
|
|
%define vplce _vplce
|
2006-02-24 04:48:15 +00:00
|
|
|
|
%define palookupoffse _palookupoffse
|
|
|
|
|
%define bufplce _bufplce
|
|
|
|
|
%define dc_iscale _dc_iscale
|
|
|
|
|
%define dc_colormap _dc_colormap
|
|
|
|
|
%define dc_count _dc_count
|
|
|
|
|
%define dc_dest _dc_dest
|
|
|
|
|
%define dc_source _dc_source
|
|
|
|
|
%define dc_texturefrac _dc_texturefrac
|
|
|
|
|
|
|
|
|
|
%define setupvlineasm _setupvlineasm
|
|
|
|
|
%define prevlineasm1 _prevlineasm1
|
|
|
|
|
%define vlineasm1 _vlineasm1
|
|
|
|
|
%define vlineasm4 _vlineasm4
|
2006-05-09 21:30:31 +00:00
|
|
|
|
|
|
|
|
|
%define setupmvlineasm _setupmvlineasm
|
|
|
|
|
%define mvlineasm1 _mvlineasm1
|
|
|
|
|
%define mvlineasm4 _mvlineasm4
|
2006-02-24 04:48:15 +00:00
|
|
|
|
%endif
|
|
|
|
|
|
|
|
|
|
EXTERN ylookup ; near
|
|
|
|
|
|
|
|
|
|
EXTERN vplce ; near
|
|
|
|
|
EXTERN vince ; near
|
|
|
|
|
EXTERN palookupoffse ; near
|
|
|
|
|
EXTERN bufplce ; near
|
|
|
|
|
|
|
|
|
|
EXTERN dc_iscale
|
|
|
|
|
EXTERN dc_colormap
|
|
|
|
|
EXTERN dc_count
|
|
|
|
|
EXTERN dc_dest
|
|
|
|
|
EXTERN dc_source
|
|
|
|
|
EXTERN dc_texturefrac
|
|
|
|
|
|
2006-05-09 21:30:31 +00:00
|
|
|
|
mvlineasm4_counter:
|
|
|
|
|
dd 0
|
|
|
|
|
|
2006-02-24 04:48:15 +00:00
|
|
|
|
SECTION .text
|
|
|
|
|
|
|
|
|
|
ALIGN 16
|
|
|
|
|
GLOBAL setvlinebpl_
|
|
|
|
|
setvlinebpl_:
|
2006-05-09 21:30:31 +00:00
|
|
|
|
mov [fixchain1a+2], eax
|
|
|
|
|
mov [fixchain1b+2], eax
|
|
|
|
|
mov [fixchain2a+2], eax
|
|
|
|
|
mov [fixchain1m+2], eax
|
|
|
|
|
mov [fixchain2ma+2], eax
|
|
|
|
|
mov [fixchain2mb+2], eax
|
2008-06-08 02:31:30 +00:00
|
|
|
|
selfmod fixchain1a, fixchain2mb+6
|
2006-02-24 04:48:15 +00:00
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
; pass it log2(texheight)
|
|
|
|
|
|
|
|
|
|
ALIGN 16
|
|
|
|
|
GLOBAL setupvlineasm
|
|
|
|
|
setupvlineasm:
|
|
|
|
|
mov ecx, [esp+4]
|
|
|
|
|
|
|
|
|
|
;First 2 lines for VLINEASM1, rest for VLINEASM4
|
|
|
|
|
mov byte [premach3a+2], cl
|
|
|
|
|
mov byte [mach3a+2], cl
|
|
|
|
|
|
|
|
|
|
mov byte [machvsh1+2], cl ;32-shy
|
|
|
|
|
mov byte [machvsh3+2], cl ;32-shy
|
|
|
|
|
mov byte [machvsh5+2], cl ;32-shy
|
|
|
|
|
mov byte [machvsh6+2], cl ;32-shy
|
|
|
|
|
mov ch, cl
|
|
|
|
|
sub ch, 16
|
|
|
|
|
mov byte [machvsh8+2], ch ;16-shy
|
|
|
|
|
neg cl
|
|
|
|
|
mov byte [machvsh7+2], cl ;shy
|
|
|
|
|
mov byte [machvsh9+2], cl ;shy
|
|
|
|
|
mov byte [machvsh10+2], cl ;shy
|
|
|
|
|
mov byte [machvsh11+2], cl ;shy
|
|
|
|
|
mov byte [machvsh12+2], cl ;shy
|
|
|
|
|
mov eax, 1
|
|
|
|
|
shl eax, cl
|
|
|
|
|
dec eax
|
|
|
|
|
mov dword [machvsh2+2], eax ;(1<<shy)-1
|
|
|
|
|
mov dword [machvsh4+2], eax ;(1<<shy)-1
|
2008-06-08 02:31:30 +00:00
|
|
|
|
selfmod premach3a, machvsh8+6
|
2006-02-24 04:48:15 +00:00
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
SECTION .rtext progbits alloc exec write align=64
|
|
|
|
|
|
|
|
|
|
;eax = xscale
|
|
|
|
|
;ebx = palookupoffse
|
|
|
|
|
;ecx = # pixels to draw-1
|
|
|
|
|
;edx = texturefrac
|
|
|
|
|
;esi = texturecolumn
|
|
|
|
|
;edi = buffer pointer
|
|
|
|
|
|
|
|
|
|
ALIGN 16
|
|
|
|
|
GLOBAL prevlineasm1
|
|
|
|
|
prevlineasm1:
|
|
|
|
|
mov ecx, [dc_count]
|
|
|
|
|
cmp ecx, 1
|
|
|
|
|
ja vlineasm1
|
|
|
|
|
|
|
|
|
|
mov eax, [dc_iscale]
|
|
|
|
|
mov edx, [dc_texturefrac]
|
|
|
|
|
add eax, edx
|
|
|
|
|
mov ecx, [dc_source]
|
|
|
|
|
premach3a: shr edx, 32
|
|
|
|
|
push ebx
|
|
|
|
|
push edi
|
|
|
|
|
mov edi, [dc_colormap]
|
|
|
|
|
xor ebx, ebx
|
|
|
|
|
mov bl, byte [ecx+edx]
|
|
|
|
|
mov ecx, [dc_dest]
|
|
|
|
|
mov bl, byte [edi+ebx]
|
|
|
|
|
pop edi
|
|
|
|
|
mov byte [ecx], bl
|
|
|
|
|
pop ebx
|
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
GLOBAL vlineasm1
|
|
|
|
|
ALIGN 16
|
|
|
|
|
vlineasm1:
|
|
|
|
|
push ebx
|
|
|
|
|
push edi
|
|
|
|
|
push esi
|
|
|
|
|
push ebp
|
|
|
|
|
mov ecx, [dc_count]
|
|
|
|
|
mov ebp, [dc_colormap]
|
|
|
|
|
mov edi, [dc_dest]
|
|
|
|
|
mov eax, [dc_iscale]
|
|
|
|
|
mov edx, [dc_texturefrac]
|
|
|
|
|
mov esi, [dc_source]
|
|
|
|
|
fixchain1a: sub edi, 320
|
|
|
|
|
nop
|
|
|
|
|
nop
|
|
|
|
|
nop
|
|
|
|
|
beginvline:
|
|
|
|
|
mov ebx, edx
|
|
|
|
|
mach3a: shr ebx, 32
|
|
|
|
|
fixchain1b: add edi, 320
|
|
|
|
|
mov bl, byte [esi+ebx]
|
|
|
|
|
add edx, eax
|
|
|
|
|
dec ecx
|
|
|
|
|
mov bl, byte [ebp+ebx]
|
|
|
|
|
mov byte [edi], bl
|
|
|
|
|
jnz short beginvline
|
|
|
|
|
pop ebp
|
|
|
|
|
pop esi
|
|
|
|
|
pop edi
|
|
|
|
|
pop ebx
|
|
|
|
|
mov eax, edx
|
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
;eax: -------temp1-------
|
|
|
|
|
;ebx: -------temp2-------
|
|
|
|
|
;ecx: dat dat dat dat
|
|
|
|
|
;edx: ylo2 ylo4
|
|
|
|
|
;esi: yhi1 yhi2
|
|
|
|
|
;edi: ---videoplc/cnt----
|
|
|
|
|
;ebp: yhi3 yhi4
|
|
|
|
|
;esp:
|
|
|
|
|
ALIGN 16
|
|
|
|
|
GLOBAL vlineasm4
|
|
|
|
|
vlineasm4:
|
|
|
|
|
mov ecx, [dc_count]
|
|
|
|
|
push ebp
|
|
|
|
|
push ebx
|
|
|
|
|
push esi
|
|
|
|
|
push edi
|
|
|
|
|
mov edi, [dc_dest]
|
|
|
|
|
|
|
|
|
|
mov eax, dword [ylookup+ecx*4-4]
|
|
|
|
|
add eax, edi
|
|
|
|
|
mov dword [machvline4end+2], eax
|
|
|
|
|
sub edi, eax
|
|
|
|
|
|
|
|
|
|
mov eax, dword [bufplce+0]
|
|
|
|
|
mov ebx, dword [bufplce+4]
|
|
|
|
|
mov ecx, dword [bufplce+8]
|
|
|
|
|
mov edx, dword [bufplce+12]
|
|
|
|
|
mov dword [machvbuf1+2], ecx
|
|
|
|
|
mov dword [machvbuf2+2], edx
|
|
|
|
|
mov dword [machvbuf3+2], eax
|
|
|
|
|
mov dword [machvbuf4+2], ebx
|
|
|
|
|
|
|
|
|
|
mov eax, dword [palookupoffse+0]
|
|
|
|
|
mov ebx, dword [palookupoffse+4]
|
|
|
|
|
mov ecx, dword [palookupoffse+8]
|
|
|
|
|
mov edx, dword [palookupoffse+12]
|
|
|
|
|
mov dword [machvpal1+2], ecx
|
|
|
|
|
mov dword [machvpal2+2], edx
|
|
|
|
|
mov dword [machvpal3+2], eax
|
|
|
|
|
mov dword [machvpal4+2], ebx
|
|
|
|
|
|
|
|
|
|
; <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ä¿
|
|
|
|
|
;edx: <20>v3lo <20>v1lo <20>
|
|
|
|
|
; <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ä´
|
|
|
|
|
;esi: <20>v2hi v2lo <20> v3hi<68>
|
|
|
|
|
; <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ä´
|
|
|
|
|
;ebp: <20>v0hi v0lo <20> v1hi<68>
|
|
|
|
|
; <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
|
|
|
|
|
mov ebp, dword [vince+0]
|
|
|
|
|
mov ebx, dword [vince+4]
|
|
|
|
|
mov esi, dword [vince+8]
|
|
|
|
|
mov eax, dword [vince+12]
|
|
|
|
|
and esi, 0fffffe00h
|
|
|
|
|
and ebp, 0fffffe00h
|
|
|
|
|
machvsh9: rol eax, 88h ;sh
|
|
|
|
|
machvsh10: rol ebx, 88h ;sh
|
|
|
|
|
mov edx, eax
|
|
|
|
|
mov ecx, ebx
|
|
|
|
|
shr ecx, 16
|
|
|
|
|
and edx, 0ffff0000h
|
|
|
|
|
add edx, ecx
|
|
|
|
|
and eax, 000001ffh
|
|
|
|
|
and ebx, 000001ffh
|
|
|
|
|
add esi, eax
|
|
|
|
|
add ebp, ebx
|
|
|
|
|
;
|
|
|
|
|
mov eax, edx
|
|
|
|
|
and eax, 0ffff0000h
|
|
|
|
|
mov dword [machvinc1+2], eax
|
|
|
|
|
mov dword [machvinc2+2], esi
|
|
|
|
|
mov byte [machvinc3+2], dl
|
|
|
|
|
mov byte [machvinc4+2], dh
|
|
|
|
|
mov dword [machvinc5+2], ebp
|
|
|
|
|
|
|
|
|
|
mov ebp, dword [vplce+0]
|
|
|
|
|
mov ebx, dword [vplce+4]
|
|
|
|
|
mov esi, dword [vplce+8]
|
|
|
|
|
mov eax, dword [vplce+12]
|
|
|
|
|
and esi, 0fffffe00h
|
|
|
|
|
and ebp, 0fffffe00h
|
|
|
|
|
machvsh11: rol eax, 88h ;sh
|
|
|
|
|
machvsh12: rol ebx, 88h ;sh
|
|
|
|
|
mov edx, eax
|
|
|
|
|
mov ecx, ebx
|
|
|
|
|
shr ecx, 16
|
|
|
|
|
and edx, 0ffff0000h
|
|
|
|
|
add edx, ecx
|
|
|
|
|
and eax, 000001ffh
|
|
|
|
|
and ebx, 000001ffh
|
|
|
|
|
add esi, eax
|
|
|
|
|
add ebp, ebx
|
|
|
|
|
|
|
|
|
|
mov ecx, esi
|
2008-06-08 02:31:30 +00:00
|
|
|
|
selfmod beginvlineasm4, machvline4end+6
|
2006-02-24 04:48:15 +00:00
|
|
|
|
jmp short beginvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
beginvlineasm4:
|
|
|
|
|
machvsh1: shr ecx, 88h ;32-sh
|
|
|
|
|
mov ebx, esi
|
|
|
|
|
machvsh2: and ebx, 00000088h ;(1<<sh)-1
|
|
|
|
|
machvinc1: add edx, 88880000h
|
|
|
|
|
machvinc2: adc esi, 88888088h
|
|
|
|
|
machvbuf1: mov cl, byte [ecx+88888888h]
|
|
|
|
|
machvbuf2: mov bl, byte [ebx+88888888h]
|
|
|
|
|
mov eax, ebp
|
|
|
|
|
machvsh3: shr eax, 88h ;32-sh
|
|
|
|
|
machvpal1: mov cl, byte [ecx+88888888h]
|
|
|
|
|
machvpal2: mov ch, byte [ebx+88888888h]
|
|
|
|
|
mov ebx, ebp
|
|
|
|
|
shl ecx, 16
|
|
|
|
|
machvsh4: and ebx, 00000088h ;(1<<sh)-1
|
|
|
|
|
machvinc3: add dl, 88h
|
|
|
|
|
machvbuf3: mov al, byte [eax+88888888h]
|
|
|
|
|
machvinc4: adc dh, 88h
|
|
|
|
|
machvbuf4: mov bl, byte [ebx+88888888h]
|
|
|
|
|
machvinc5: adc ebp, 88888088h
|
|
|
|
|
machvpal3: mov cl, byte [eax+88888888h]
|
|
|
|
|
machvpal4: mov ch, byte [ebx+88888888h]
|
|
|
|
|
machvline4end: mov dword [edi+88888888h], ecx
|
|
|
|
|
fixchain2a: add edi, 88888888h
|
|
|
|
|
mov ecx, esi
|
|
|
|
|
jle short beginvlineasm4
|
|
|
|
|
|
|
|
|
|
; <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ä¿
|
|
|
|
|
;edx: <20>v3lo <20>v1lo <20>
|
|
|
|
|
; <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ä´
|
|
|
|
|
;esi: <20>v2hi v2lo <20> v3hi<68>
|
|
|
|
|
; <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ä´
|
|
|
|
|
;ebp: <20>v0hi v0lo <20> v1hi<68>
|
|
|
|
|
; <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
|
|
|
|
|
mov dword [vplce+8], esi
|
|
|
|
|
mov dword [vplce+0], ebp
|
|
|
|
|
;vplc2 = (esi<<(32-sh))+(edx>>sh)
|
|
|
|
|
;vplc3 = (ebp<<(32-sh))+((edx&65535)<<(16-sh))
|
|
|
|
|
machvsh5: shl esi, 88h ;32-sh
|
|
|
|
|
mov eax, edx
|
|
|
|
|
machvsh6: shl ebp, 88h ;32-sh
|
|
|
|
|
and edx, 0000ffffh
|
|
|
|
|
machvsh7: shr eax, 88h ;sh
|
|
|
|
|
add esi, eax
|
|
|
|
|
machvsh8: shl edx, 88h ;16-sh
|
|
|
|
|
add ebp, edx
|
|
|
|
|
mov dword [vplce+12], esi
|
|
|
|
|
mov dword [vplce+4], ebp
|
|
|
|
|
|
|
|
|
|
pop edi
|
|
|
|
|
pop esi
|
|
|
|
|
pop ebx
|
|
|
|
|
pop ebp
|
|
|
|
|
ret
|
2006-05-09 21:30:31 +00:00
|
|
|
|
|
|
|
|
|
;*************************************************************************
|
|
|
|
|
;************************* Masked Vertical Lines *************************
|
|
|
|
|
;*************************************************************************
|
|
|
|
|
|
|
|
|
|
; pass it log2(texheight)
|
|
|
|
|
|
|
|
|
|
ALIGN 16
|
|
|
|
|
GLOBAL setupmvlineasm
|
|
|
|
|
setupmvlineasm:
|
|
|
|
|
mov ecx, dword [esp+4]
|
|
|
|
|
mov byte [maskmach3a+2], cl
|
|
|
|
|
mov byte [machmv13+2], cl
|
|
|
|
|
mov byte [machmv14+2], cl
|
|
|
|
|
mov byte [machmv15+2], cl
|
|
|
|
|
mov byte [machmv16+2], cl
|
2008-06-08 02:31:30 +00:00
|
|
|
|
selfmod maskmach3a, machmv13+6
|
2006-05-09 21:30:31 +00:00
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
ALIGN 16
|
|
|
|
|
GLOBAL mvlineasm1 ;Masked vline
|
|
|
|
|
mvlineasm1:
|
|
|
|
|
push ebx
|
|
|
|
|
push edi
|
|
|
|
|
push esi
|
|
|
|
|
push ebp
|
|
|
|
|
mov ecx, [dc_count]
|
|
|
|
|
mov ebp, [dc_colormap]
|
|
|
|
|
mov edi, [dc_dest]
|
|
|
|
|
mov eax, [dc_iscale]
|
|
|
|
|
mov edx, [dc_texturefrac]
|
|
|
|
|
mov esi, [dc_source]
|
|
|
|
|
beginmvline:
|
|
|
|
|
mov ebx, edx
|
|
|
|
|
maskmach3a: shr ebx, 32
|
- Fixed mvlineasm1 and mvlineasm4 so that they can be used with textures
taller than 256 pixels. There was a very slight performance hit for this,
but I was able to tweak mvlineasm4 to make it approximately as fast as
before. Interestingly, maskwallscan manages to be nearly as fast as
wallscan despite having to check every pixel for transparency. I'm
tempted to dump all the old masked rendering code and use
(trans)maskwallscan for everything for the sake of simplicity: Only
two functions to maintain for each render style, and much less
complicated supporting code. Currently, I need five different functions
for each rendering style: One traditional column-at-a-time style like
Doom did it originally, two for rt_draw4cols, and two for transmaskwallscan.
(Right now, I have cheated, and just done the ones that can be used
by walls for transmaskwallscan, so the actual number of different functions
isn't quite so high.) For small textures, such as font characters and
far-away sprites, I'm sure maskwallscan is faster than the current code.
For large textures, it's probably still competitive even if it isn't faster.
But considering how similar wallscan and maskwallscan perform, the
difference is probably pretty minimal, and maskwallscan still might come
out ahead due to its simpler overhead.
SVN r105 (trunk)
2006-05-11 01:15:15 +00:00
|
|
|
|
movzx ebx, byte [esi+ebx]
|
|
|
|
|
cmp ebx, 0
|
2006-05-09 21:30:31 +00:00
|
|
|
|
je short skipmask1
|
|
|
|
|
maskmach3c: mov bl, byte [ebp+ebx]
|
|
|
|
|
mov [edi], bl
|
|
|
|
|
skipmask1: add edx, eax
|
|
|
|
|
fixchain1m: add edi, 320
|
|
|
|
|
dec ecx
|
|
|
|
|
jnz short beginmvline
|
|
|
|
|
|
|
|
|
|
pop ebp
|
|
|
|
|
pop esi
|
|
|
|
|
pop edi
|
|
|
|
|
pop ebx
|
|
|
|
|
mov eax, edx
|
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
ALIGN 16
|
|
|
|
|
GLOBAL mvlineasm4
|
|
|
|
|
mvlineasm4:
|
|
|
|
|
push ebx
|
|
|
|
|
push esi
|
|
|
|
|
push edi
|
|
|
|
|
push ebp
|
|
|
|
|
|
|
|
|
|
mov ecx,[dc_count]
|
|
|
|
|
mov edi,[dc_dest]
|
|
|
|
|
|
|
|
|
|
mov eax, [bufplce+0]
|
|
|
|
|
mov ebx, [bufplce+4]
|
- Fixed mvlineasm1 and mvlineasm4 so that they can be used with textures
taller than 256 pixels. There was a very slight performance hit for this,
but I was able to tweak mvlineasm4 to make it approximately as fast as
before. Interestingly, maskwallscan manages to be nearly as fast as
wallscan despite having to check every pixel for transparency. I'm
tempted to dump all the old masked rendering code and use
(trans)maskwallscan for everything for the sake of simplicity: Only
two functions to maintain for each render style, and much less
complicated supporting code. Currently, I need five different functions
for each rendering style: One traditional column-at-a-time style like
Doom did it originally, two for rt_draw4cols, and two for transmaskwallscan.
(Right now, I have cheated, and just done the ones that can be used
by walls for transmaskwallscan, so the actual number of different functions
isn't quite so high.) For small textures, such as font characters and
far-away sprites, I'm sure maskwallscan is faster than the current code.
For large textures, it's probably still competitive even if it isn't faster.
But considering how similar wallscan and maskwallscan perform, the
difference is probably pretty minimal, and maskwallscan still might come
out ahead due to its simpler overhead.
SVN r105 (trunk)
2006-05-11 01:15:15 +00:00
|
|
|
|
mov [machmv1+3], eax
|
|
|
|
|
mov [machmv4+3], ebx
|
2006-05-09 21:30:31 +00:00
|
|
|
|
mov eax, [bufplce+8]
|
|
|
|
|
mov ebx, [bufplce+12]
|
- Fixed mvlineasm1 and mvlineasm4 so that they can be used with textures
taller than 256 pixels. There was a very slight performance hit for this,
but I was able to tweak mvlineasm4 to make it approximately as fast as
before. Interestingly, maskwallscan manages to be nearly as fast as
wallscan despite having to check every pixel for transparency. I'm
tempted to dump all the old masked rendering code and use
(trans)maskwallscan for everything for the sake of simplicity: Only
two functions to maintain for each render style, and much less
complicated supporting code. Currently, I need five different functions
for each rendering style: One traditional column-at-a-time style like
Doom did it originally, two for rt_draw4cols, and two for transmaskwallscan.
(Right now, I have cheated, and just done the ones that can be used
by walls for transmaskwallscan, so the actual number of different functions
isn't quite so high.) For small textures, such as font characters and
far-away sprites, I'm sure maskwallscan is faster than the current code.
For large textures, it's probably still competitive even if it isn't faster.
But considering how similar wallscan and maskwallscan perform, the
difference is probably pretty minimal, and maskwallscan still might come
out ahead due to its simpler overhead.
SVN r105 (trunk)
2006-05-11 01:15:15 +00:00
|
|
|
|
mov [machmv7+3], eax
|
|
|
|
|
mov [machmv10+3], ebx
|
2006-05-09 21:30:31 +00:00
|
|
|
|
|
|
|
|
|
mov eax, [palookupoffse]
|
|
|
|
|
mov ebx, [palookupoffse+4]
|
|
|
|
|
mov [machmv2+2], eax
|
|
|
|
|
mov [machmv5+2], ebx
|
|
|
|
|
mov eax, [palookupoffse+8]
|
|
|
|
|
mov ebx, [palookupoffse+12]
|
|
|
|
|
mov [machmv8+2], eax
|
|
|
|
|
mov [machmv11+2], ebx
|
|
|
|
|
|
|
|
|
|
mov eax, [vince] ;vince
|
|
|
|
|
mov ebx, [vince+4]
|
|
|
|
|
xor bl, bl
|
|
|
|
|
mov [machmv3+2], eax
|
|
|
|
|
mov [machmv6+2], ebx
|
|
|
|
|
mov eax, [vince+8]
|
|
|
|
|
mov ebx, [vince+12]
|
|
|
|
|
mov [machmv9+2], eax
|
|
|
|
|
mov [machmv12+2], ebx
|
|
|
|
|
|
|
|
|
|
inc ecx
|
|
|
|
|
push ecx
|
|
|
|
|
mov ecx, [vplce+0]
|
|
|
|
|
mov edx, [vplce+4]
|
|
|
|
|
mov esi, [vplce+8]
|
|
|
|
|
mov ebp, [vplce+12]
|
|
|
|
|
fixchain2ma: sub edi, 320
|
|
|
|
|
|
2008-06-08 02:31:30 +00:00
|
|
|
|
selfmod beginmvlineasm4, machmv2+6
|
2006-05-09 21:30:31 +00:00
|
|
|
|
jmp short beginmvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
beginmvlineasm4:
|
|
|
|
|
dec dword [esp]
|
|
|
|
|
jz near endmvlineasm4
|
|
|
|
|
|
|
|
|
|
mov eax, ebp
|
|
|
|
|
mov ebx, esi
|
|
|
|
|
machmv16: shr eax, 32
|
|
|
|
|
machmv12: add ebp, 0x88888888 ;vince[3]
|
- Fixed mvlineasm1 and mvlineasm4 so that they can be used with textures
taller than 256 pixels. There was a very slight performance hit for this,
but I was able to tweak mvlineasm4 to make it approximately as fast as
before. Interestingly, maskwallscan manages to be nearly as fast as
wallscan despite having to check every pixel for transparency. I'm
tempted to dump all the old masked rendering code and use
(trans)maskwallscan for everything for the sake of simplicity: Only
two functions to maintain for each render style, and much less
complicated supporting code. Currently, I need five different functions
for each rendering style: One traditional column-at-a-time style like
Doom did it originally, two for rt_draw4cols, and two for transmaskwallscan.
(Right now, I have cheated, and just done the ones that can be used
by walls for transmaskwallscan, so the actual number of different functions
isn't quite so high.) For small textures, such as font characters and
far-away sprites, I'm sure maskwallscan is faster than the current code.
For large textures, it's probably still competitive even if it isn't faster.
But considering how similar wallscan and maskwallscan perform, the
difference is probably pretty minimal, and maskwallscan still might come
out ahead due to its simpler overhead.
SVN r105 (trunk)
2006-05-11 01:15:15 +00:00
|
|
|
|
machmv15: shr ebx, 32
|
2006-05-09 21:30:31 +00:00
|
|
|
|
machmv9: add esi, 0x88888888 ;vince[2]
|
- Fixed mvlineasm1 and mvlineasm4 so that they can be used with textures
taller than 256 pixels. There was a very slight performance hit for this,
but I was able to tweak mvlineasm4 to make it approximately as fast as
before. Interestingly, maskwallscan manages to be nearly as fast as
wallscan despite having to check every pixel for transparency. I'm
tempted to dump all the old masked rendering code and use
(trans)maskwallscan for everything for the sake of simplicity: Only
two functions to maintain for each render style, and much less
complicated supporting code. Currently, I need five different functions
for each rendering style: One traditional column-at-a-time style like
Doom did it originally, two for rt_draw4cols, and two for transmaskwallscan.
(Right now, I have cheated, and just done the ones that can be used
by walls for transmaskwallscan, so the actual number of different functions
isn't quite so high.) For small textures, such as font characters and
far-away sprites, I'm sure maskwallscan is faster than the current code.
For large textures, it's probably still competitive even if it isn't faster.
But considering how similar wallscan and maskwallscan perform, the
difference is probably pretty minimal, and maskwallscan still might come
out ahead due to its simpler overhead.
SVN r105 (trunk)
2006-05-11 01:15:15 +00:00
|
|
|
|
machmv10: movzx eax, byte [eax+0x88888888];bufplce[3]
|
|
|
|
|
machmv7: movzx ebx, byte [ebx+0x88888888];bufplce[2]
|
|
|
|
|
cmp eax, 1
|
2006-05-09 21:30:31 +00:00
|
|
|
|
adc dl, dl
|
- Fixed mvlineasm1 and mvlineasm4 so that they can be used with textures
taller than 256 pixels. There was a very slight performance hit for this,
but I was able to tweak mvlineasm4 to make it approximately as fast as
before. Interestingly, maskwallscan manages to be nearly as fast as
wallscan despite having to check every pixel for transparency. I'm
tempted to dump all the old masked rendering code and use
(trans)maskwallscan for everything for the sake of simplicity: Only
two functions to maintain for each render style, and much less
complicated supporting code. Currently, I need five different functions
for each rendering style: One traditional column-at-a-time style like
Doom did it originally, two for rt_draw4cols, and two for transmaskwallscan.
(Right now, I have cheated, and just done the ones that can be used
by walls for transmaskwallscan, so the actual number of different functions
isn't quite so high.) For small textures, such as font characters and
far-away sprites, I'm sure maskwallscan is faster than the current code.
For large textures, it's probably still competitive even if it isn't faster.
But considering how similar wallscan and maskwallscan perform, the
difference is probably pretty minimal, and maskwallscan still might come
out ahead due to its simpler overhead.
SVN r105 (trunk)
2006-05-11 01:15:15 +00:00
|
|
|
|
cmp ebx, 1
|
2006-05-09 21:30:31 +00:00
|
|
|
|
adc dl, dl
|
|
|
|
|
machmv8: mov bl, [ebx+0x88888888] ;palookupoffs[2]
|
|
|
|
|
machmv11: mov bh, [eax+0x88888888] ;palookupoffs[3]
|
|
|
|
|
|
|
|
|
|
mov eax, edx
|
- Fixed mvlineasm1 and mvlineasm4 so that they can be used with textures
taller than 256 pixels. There was a very slight performance hit for this,
but I was able to tweak mvlineasm4 to make it approximately as fast as
before. Interestingly, maskwallscan manages to be nearly as fast as
wallscan despite having to check every pixel for transparency. I'm
tempted to dump all the old masked rendering code and use
(trans)maskwallscan for everything for the sake of simplicity: Only
two functions to maintain for each render style, and much less
complicated supporting code. Currently, I need five different functions
for each rendering style: One traditional column-at-a-time style like
Doom did it originally, two for rt_draw4cols, and two for transmaskwallscan.
(Right now, I have cheated, and just done the ones that can be used
by walls for transmaskwallscan, so the actual number of different functions
isn't quite so high.) For small textures, such as font characters and
far-away sprites, I'm sure maskwallscan is faster than the current code.
For large textures, it's probably still competitive even if it isn't faster.
But considering how similar wallscan and maskwallscan perform, the
difference is probably pretty minimal, and maskwallscan still might come
out ahead due to its simpler overhead.
SVN r105 (trunk)
2006-05-11 01:15:15 +00:00
|
|
|
|
machmv6: add edx, 0x88888888 ;vince[1]
|
2006-05-09 21:30:31 +00:00
|
|
|
|
machmv14: shr eax, 32
|
|
|
|
|
shl ebx, 16
|
- Fixed mvlineasm1 and mvlineasm4 so that they can be used with textures
taller than 256 pixels. There was a very slight performance hit for this,
but I was able to tweak mvlineasm4 to make it approximately as fast as
before. Interestingly, maskwallscan manages to be nearly as fast as
wallscan despite having to check every pixel for transparency. I'm
tempted to dump all the old masked rendering code and use
(trans)maskwallscan for everything for the sake of simplicity: Only
two functions to maintain for each render style, and much less
complicated supporting code. Currently, I need five different functions
for each rendering style: One traditional column-at-a-time style like
Doom did it originally, two for rt_draw4cols, and two for transmaskwallscan.
(Right now, I have cheated, and just done the ones that can be used
by walls for transmaskwallscan, so the actual number of different functions
isn't quite so high.) For small textures, such as font characters and
far-away sprites, I'm sure maskwallscan is faster than the current code.
For large textures, it's probably still competitive even if it isn't faster.
But considering how similar wallscan and maskwallscan perform, the
difference is probably pretty minimal, and maskwallscan still might come
out ahead due to its simpler overhead.
SVN r105 (trunk)
2006-05-11 01:15:15 +00:00
|
|
|
|
machmv4: movzx eax, byte [eax+0x88888888];bufplce[1]
|
|
|
|
|
cmp eax, 1
|
2006-05-09 21:30:31 +00:00
|
|
|
|
adc dl, dl
|
|
|
|
|
machmv5: mov bh, [eax+0x88888888] ;palookupoffs[1]
|
|
|
|
|
|
|
|
|
|
mov eax, ecx
|
|
|
|
|
machmv3: add ecx, 0x88888888 ;vince[0]
|
- Fixed mvlineasm1 and mvlineasm4 so that they can be used with textures
taller than 256 pixels. There was a very slight performance hit for this,
but I was able to tweak mvlineasm4 to make it approximately as fast as
before. Interestingly, maskwallscan manages to be nearly as fast as
wallscan despite having to check every pixel for transparency. I'm
tempted to dump all the old masked rendering code and use
(trans)maskwallscan for everything for the sake of simplicity: Only
two functions to maintain for each render style, and much less
complicated supporting code. Currently, I need five different functions
for each rendering style: One traditional column-at-a-time style like
Doom did it originally, two for rt_draw4cols, and two for transmaskwallscan.
(Right now, I have cheated, and just done the ones that can be used
by walls for transmaskwallscan, so the actual number of different functions
isn't quite so high.) For small textures, such as font characters and
far-away sprites, I'm sure maskwallscan is faster than the current code.
For large textures, it's probably still competitive even if it isn't faster.
But considering how similar wallscan and maskwallscan perform, the
difference is probably pretty minimal, and maskwallscan still might come
out ahead due to its simpler overhead.
SVN r105 (trunk)
2006-05-11 01:15:15 +00:00
|
|
|
|
machmv13: shr eax, 32
|
|
|
|
|
machmv1: movzx eax, byte [eax+0x88888888];bufplce[0]
|
|
|
|
|
cmp eax, 1
|
2006-05-09 21:30:31 +00:00
|
|
|
|
adc dl, dl
|
|
|
|
|
machmv2: mov bl, [eax+0x88888888] ;palookupoffs[0]
|
|
|
|
|
|
|
|
|
|
xor eax, eax
|
- Fixed mvlineasm1 and mvlineasm4 so that they can be used with textures
taller than 256 pixels. There was a very slight performance hit for this,
but I was able to tweak mvlineasm4 to make it approximately as fast as
before. Interestingly, maskwallscan manages to be nearly as fast as
wallscan despite having to check every pixel for transparency. I'm
tempted to dump all the old masked rendering code and use
(trans)maskwallscan for everything for the sake of simplicity: Only
two functions to maintain for each render style, and much less
complicated supporting code. Currently, I need five different functions
for each rendering style: One traditional column-at-a-time style like
Doom did it originally, two for rt_draw4cols, and two for transmaskwallscan.
(Right now, I have cheated, and just done the ones that can be used
by walls for transmaskwallscan, so the actual number of different functions
isn't quite so high.) For small textures, such as font characters and
far-away sprites, I'm sure maskwallscan is faster than the current code.
For large textures, it's probably still competitive even if it isn't faster.
But considering how similar wallscan and maskwallscan perform, the
difference is probably pretty minimal, and maskwallscan still might come
out ahead due to its simpler overhead.
SVN r105 (trunk)
2006-05-11 01:15:15 +00:00
|
|
|
|
shl dl, 4
|
2006-05-09 21:30:31 +00:00
|
|
|
|
fixchain2mb: add edi, 320
|
|
|
|
|
mov al, dl
|
|
|
|
|
add eax, mvcase15
|
|
|
|
|
jmp eax ;16 byte cases
|
|
|
|
|
|
|
|
|
|
ALIGN 16
|
|
|
|
|
endmvlineasm4:
|
2006-05-10 15:07:14 +00:00
|
|
|
|
mov [vplce], ecx
|
|
|
|
|
mov [vplce+4], edx
|
|
|
|
|
mov [vplce+8], esi
|
|
|
|
|
mov [vplce+12], ebp
|
2006-05-09 21:30:31 +00:00
|
|
|
|
pop ecx
|
|
|
|
|
pop ebp
|
|
|
|
|
pop edi
|
|
|
|
|
pop esi
|
|
|
|
|
pop ebx
|
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
;5,7,8,8,11,13,12,14,11,13,14,14,12,14,15,7
|
|
|
|
|
ALIGN 16
|
|
|
|
|
mvcase15: mov [edi], ebx
|
|
|
|
|
jmp beginmvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
mvcase14: mov [edi+1], bh
|
|
|
|
|
shr ebx, 16
|
|
|
|
|
mov [edi+2], bx
|
|
|
|
|
jmp beginmvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
mvcase13: mov [edi], bl
|
|
|
|
|
shr ebx, 16
|
|
|
|
|
mov [edi+2], bx
|
|
|
|
|
jmp beginmvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
mvcase12: shr ebx, 16
|
|
|
|
|
mov [edi+2], bx
|
|
|
|
|
jmp beginmvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
mvcase11: mov [edi], bx
|
|
|
|
|
shr ebx, 16
|
|
|
|
|
mov [edi+3], bh
|
|
|
|
|
jmp beginmvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
mvcase10: mov [edi+1], bh
|
|
|
|
|
shr ebx, 16
|
|
|
|
|
mov [edi+3], bh
|
|
|
|
|
jmp beginmvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
mvcase9: mov [edi], bl
|
|
|
|
|
shr ebx, 16
|
|
|
|
|
mov [edi+3], bh
|
|
|
|
|
jmp beginmvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
mvcase8: shr ebx, 16
|
|
|
|
|
mov [edi+3], bh
|
|
|
|
|
jmp beginmvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
mvcase7: mov [edi], bx
|
|
|
|
|
shr ebx, 16
|
|
|
|
|
mov [edi+2], bl
|
|
|
|
|
jmp beginmvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
mvcase6: shr ebx, 8
|
|
|
|
|
mov [edi+1], bx
|
|
|
|
|
jmp beginmvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
mvcase5: mov [edi], bl
|
|
|
|
|
shr ebx, 16
|
|
|
|
|
mov [edi+2], bl
|
|
|
|
|
jmp beginmvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
mvcase4: shr ebx, 16
|
|
|
|
|
mov [edi+2], bl
|
|
|
|
|
jmp beginmvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
mvcase3: mov [edi], bx
|
|
|
|
|
jmp beginmvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
mvcase2: mov [edi+1], bh
|
|
|
|
|
jmp beginmvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
mvcase1: mov [edi], bl
|
|
|
|
|
jmp beginmvlineasm4
|
|
|
|
|
ALIGN 16
|
|
|
|
|
mvcase0: jmp beginmvlineasm4
|
2008-06-08 02:31:30 +00:00
|
|
|
|
|
|
|
|
|
align 16
|