mirror of
https://github.com/ZDoom/qzdoom-gpl.git
synced 2025-01-18 13:11:37 +00:00
- Fixed mvlineasm1 and mvlineasm4 so that they can be used with textures
taller than 256 pixels. There was a very slight performance hit for this, but I was able to tweak mvlineasm4 to make it approximately as fast as before. Interestingly, maskwallscan manages to be nearly as fast as wallscan despite having to check every pixel for transparency. I'm tempted to dump all the old masked rendering code and use (trans)maskwallscan for everything for the sake of simplicity: Only two functions to maintain for each render style, and much less complicated supporting code. Currently, I need five different functions for each rendering style: One traditional column-at-a-time style like Doom did it originally, two for rt_draw4cols, and two for transmaskwallscan. (Right now, I have cheated, and just done the ones that can be used by walls for transmaskwallscan, so the actual number of different functions isn't quite so high.) For small textures, such as font characters and far-away sprites, I'm sure maskwallscan is faster than the current code. For large textures, it's probably still competitive even if it isn't faster. But considering how similar wallscan and maskwallscan perform, the difference is probably pretty minimal, and maskwallscan still might come out ahead due to its simpler overhead. SVN r105 (trunk)
This commit is contained in:
parent
ee12c25f47
commit
0069ca4072
2 changed files with 39 additions and 19 deletions
|
@ -1,3 +1,24 @@
|
|||
May 10, 2006
|
||||
- Fixed mvlineasm1 and mvlineasm4 so that they can be used with textures
|
||||
taller than 256 pixels. There was a very slight performance hit for this,
|
||||
but I was able to tweak mvlineasm4 to make it approximately as fast as
|
||||
before. Interestingly, maskwallscan manages to be nearly as fast as
|
||||
wallscan despite having to check every pixel for transparency. I'm
|
||||
tempted to dump all the old masked rendering code and use
|
||||
(trans)maskwallscan for everything for the sake of simplicity: Only
|
||||
two functions to maintain for each render style, and much less
|
||||
complicated supporting code. Currently, I need five different functions
|
||||
for each rendering style: One traditional column-at-a-time style like
|
||||
Doom did it originally, two for rt_draw4cols, and two for transmaskwallscan.
|
||||
(Right now, I have cheated, and just done the ones that can be used
|
||||
by walls for transmaskwallscan, so the actual number of different functions
|
||||
isn't quite so high.) For small textures, such as font characters and
|
||||
far-away sprites, I'm sure maskwallscan is faster than the current code.
|
||||
For large textures, it's probably still competitive even if it isn't faster.
|
||||
But considering how similar wallscan and maskwallscan perform, the
|
||||
difference is probably pretty minimal, and maskwallscan still might come
|
||||
out ahead due to its simpler overhead.
|
||||
|
||||
May 10, 2006 (Changes by Graf Zahl)
|
||||
- Fixed: PClass::CreateNew didn't check whether the class had valid
|
||||
defaults and tried to copy data from a NULL pointer.
|
||||
|
|
37
src/a.nas
37
src/a.nas
|
@ -341,8 +341,8 @@ mvlineasm1:
|
|||
beginmvline:
|
||||
mov ebx, edx
|
||||
maskmach3a: shr ebx, 32
|
||||
mov bl, byte [esi+ebx]
|
||||
cmp bl, 0
|
||||
movzx ebx, byte [esi+ebx]
|
||||
cmp ebx, 0
|
||||
je short skipmask1
|
||||
maskmach3c: mov bl, byte [ebp+ebx]
|
||||
mov [edi], bl
|
||||
|
@ -371,12 +371,12 @@ mvlineasm4:
|
|||
|
||||
mov eax, [bufplce+0]
|
||||
mov ebx, [bufplce+4]
|
||||
mov [machmv1+2], eax
|
||||
mov [machmv4+2], ebx
|
||||
mov [machmv1+3], eax
|
||||
mov [machmv4+3], ebx
|
||||
mov eax, [bufplce+8]
|
||||
mov ebx, [bufplce+12]
|
||||
mov [machmv7+2], eax
|
||||
mov [machmv10+2], ebx
|
||||
mov [machmv7+3], eax
|
||||
mov [machmv10+3], ebx
|
||||
|
||||
mov eax, [palookupoffse]
|
||||
mov ebx, [palookupoffse+4]
|
||||
|
@ -389,7 +389,6 @@ mvlineasm4:
|
|||
|
||||
mov eax, [vince] ;vince
|
||||
mov ebx, [vince+4]
|
||||
xor al, al
|
||||
xor bl, bl
|
||||
mov [machmv3+2], eax
|
||||
mov [machmv6+2], ebx
|
||||
|
@ -415,37 +414,37 @@ beginmvlineasm4:
|
|||
mov eax, ebp
|
||||
mov ebx, esi
|
||||
machmv16: shr eax, 32
|
||||
machmv15: shr ebx, 32
|
||||
machmv12: add ebp, 0x88888888 ;vince[3]
|
||||
machmv15: shr ebx, 32
|
||||
machmv9: add esi, 0x88888888 ;vince[2]
|
||||
machmv10: mov al, [eax+0x88888888] ;bufplce[3]
|
||||
machmv7: mov bl, [ebx+0x88888888] ;bufplce[2]
|
||||
cmp al, 1
|
||||
machmv10: movzx eax, byte [eax+0x88888888];bufplce[3]
|
||||
machmv7: movzx ebx, byte [ebx+0x88888888];bufplce[2]
|
||||
cmp eax, 1
|
||||
adc dl, dl
|
||||
cmp bl, 1
|
||||
cmp ebx, 1
|
||||
adc dl, dl
|
||||
machmv8: mov bl, [ebx+0x88888888] ;palookupoffs[2]
|
||||
machmv11: mov bh, [eax+0x88888888] ;palookupoffs[3]
|
||||
|
||||
mov eax, edx
|
||||
machmv6: add edx, 0x88888888 ;vince[1]
|
||||
machmv14: shr eax, 32
|
||||
shl ebx, 16
|
||||
machmv4: mov al, [eax+0x88888888] ;bufplce[1]
|
||||
cmp al, 1
|
||||
machmv4: movzx eax, byte [eax+0x88888888];bufplce[1]
|
||||
cmp eax, 1
|
||||
adc dl, dl
|
||||
machmv6: add edx, 0x88888888 ;vince[1]
|
||||
machmv5: mov bh, [eax+0x88888888] ;palookupoffs[1]
|
||||
|
||||
mov eax, ecx
|
||||
machmv13: shr eax, 32
|
||||
machmv3: add ecx, 0x88888888 ;vince[0]
|
||||
machmv1: mov al, [eax+0x88888888] ;bufplce[0]
|
||||
cmp al, 1
|
||||
machmv13: shr eax, 32
|
||||
machmv1: movzx eax, byte [eax+0x88888888];bufplce[0]
|
||||
cmp eax, 1
|
||||
adc dl, dl
|
||||
machmv2: mov bl, [eax+0x88888888] ;palookupoffs[0]
|
||||
|
||||
shl dl, 4
|
||||
xor eax, eax
|
||||
shl dl, 4
|
||||
fixchain2mb: add edi, 320
|
||||
mov al, dl
|
||||
add eax, mvcase15
|
||||
|
|
Loading…
Reference in a new issue