mirror of
https://github.com/ZDoom/qzdoom-gpl.git
synced 2025-01-31 02:30:33 +00:00
- Fixed mvlineasm1 and mvlineasm4 so that they can be used with textures
taller than 256 pixels. There was a very slight performance hit for this, but I was able to tweak mvlineasm4 to make it approximately as fast as before. Interestingly, maskwallscan manages to be nearly as fast as wallscan despite having to check every pixel for transparency. I'm tempted to dump all the old masked rendering code and use (trans)maskwallscan for everything for the sake of simplicity: Only two functions to maintain for each render style, and much less complicated supporting code. Currently, I need five different functions for each rendering style: One traditional column-at-a-time style like Doom did it originally, two for rt_draw4cols, and two for transmaskwallscan. (Right now, I have cheated, and just done the ones that can be used by walls for transmaskwallscan, so the actual number of different functions isn't quite so high.) For small textures, such as font characters and far-away sprites, I'm sure maskwallscan is faster than the current code. For large textures, it's probably still competitive even if it isn't faster. But considering how similar wallscan and maskwallscan perform, the difference is probably pretty minimal, and maskwallscan still might come out ahead due to its simpler overhead. SVN r105 (trunk)
This commit is contained in:
parent
ee12c25f47
commit
0069ca4072
2 changed files with 39 additions and 19 deletions
|
@ -1,3 +1,24 @@
|
||||||
|
May 10, 2006
|
||||||
|
- Fixed mvlineasm1 and mvlineasm4 so that they can be used with textures
|
||||||
|
taller than 256 pixels. There was a very slight performance hit for this,
|
||||||
|
but I was able to tweak mvlineasm4 to make it approximately as fast as
|
||||||
|
before. Interestingly, maskwallscan manages to be nearly as fast as
|
||||||
|
wallscan despite having to check every pixel for transparency. I'm
|
||||||
|
tempted to dump all the old masked rendering code and use
|
||||||
|
(trans)maskwallscan for everything for the sake of simplicity: Only
|
||||||
|
two functions to maintain for each render style, and much less
|
||||||
|
complicated supporting code. Currently, I need five different functions
|
||||||
|
for each rendering style: One traditional column-at-a-time style like
|
||||||
|
Doom did it originally, two for rt_draw4cols, and two for transmaskwallscan.
|
||||||
|
(Right now, I have cheated, and just done the ones that can be used
|
||||||
|
by walls for transmaskwallscan, so the actual number of different functions
|
||||||
|
isn't quite so high.) For small textures, such as font characters and
|
||||||
|
far-away sprites, I'm sure maskwallscan is faster than the current code.
|
||||||
|
For large textures, it's probably still competitive even if it isn't faster.
|
||||||
|
But considering how similar wallscan and maskwallscan perform, the
|
||||||
|
difference is probably pretty minimal, and maskwallscan still might come
|
||||||
|
out ahead due to its simpler overhead.
|
||||||
|
|
||||||
May 10, 2006 (Changes by Graf Zahl)
|
May 10, 2006 (Changes by Graf Zahl)
|
||||||
- Fixed: PClass::CreateNew didn't check whether the class had valid
|
- Fixed: PClass::CreateNew didn't check whether the class had valid
|
||||||
defaults and tried to copy data from a NULL pointer.
|
defaults and tried to copy data from a NULL pointer.
|
||||||
|
|
37
src/a.nas
37
src/a.nas
|
@ -341,8 +341,8 @@ mvlineasm1:
|
||||||
beginmvline:
|
beginmvline:
|
||||||
mov ebx, edx
|
mov ebx, edx
|
||||||
maskmach3a: shr ebx, 32
|
maskmach3a: shr ebx, 32
|
||||||
mov bl, byte [esi+ebx]
|
movzx ebx, byte [esi+ebx]
|
||||||
cmp bl, 0
|
cmp ebx, 0
|
||||||
je short skipmask1
|
je short skipmask1
|
||||||
maskmach3c: mov bl, byte [ebp+ebx]
|
maskmach3c: mov bl, byte [ebp+ebx]
|
||||||
mov [edi], bl
|
mov [edi], bl
|
||||||
|
@ -371,12 +371,12 @@ mvlineasm4:
|
||||||
|
|
||||||
mov eax, [bufplce+0]
|
mov eax, [bufplce+0]
|
||||||
mov ebx, [bufplce+4]
|
mov ebx, [bufplce+4]
|
||||||
mov [machmv1+2], eax
|
mov [machmv1+3], eax
|
||||||
mov [machmv4+2], ebx
|
mov [machmv4+3], ebx
|
||||||
mov eax, [bufplce+8]
|
mov eax, [bufplce+8]
|
||||||
mov ebx, [bufplce+12]
|
mov ebx, [bufplce+12]
|
||||||
mov [machmv7+2], eax
|
mov [machmv7+3], eax
|
||||||
mov [machmv10+2], ebx
|
mov [machmv10+3], ebx
|
||||||
|
|
||||||
mov eax, [palookupoffse]
|
mov eax, [palookupoffse]
|
||||||
mov ebx, [palookupoffse+4]
|
mov ebx, [palookupoffse+4]
|
||||||
|
@ -389,7 +389,6 @@ mvlineasm4:
|
||||||
|
|
||||||
mov eax, [vince] ;vince
|
mov eax, [vince] ;vince
|
||||||
mov ebx, [vince+4]
|
mov ebx, [vince+4]
|
||||||
xor al, al
|
|
||||||
xor bl, bl
|
xor bl, bl
|
||||||
mov [machmv3+2], eax
|
mov [machmv3+2], eax
|
||||||
mov [machmv6+2], ebx
|
mov [machmv6+2], ebx
|
||||||
|
@ -415,37 +414,37 @@ beginmvlineasm4:
|
||||||
mov eax, ebp
|
mov eax, ebp
|
||||||
mov ebx, esi
|
mov ebx, esi
|
||||||
machmv16: shr eax, 32
|
machmv16: shr eax, 32
|
||||||
machmv15: shr ebx, 32
|
|
||||||
machmv12: add ebp, 0x88888888 ;vince[3]
|
machmv12: add ebp, 0x88888888 ;vince[3]
|
||||||
|
machmv15: shr ebx, 32
|
||||||
machmv9: add esi, 0x88888888 ;vince[2]
|
machmv9: add esi, 0x88888888 ;vince[2]
|
||||||
machmv10: mov al, [eax+0x88888888] ;bufplce[3]
|
machmv10: movzx eax, byte [eax+0x88888888];bufplce[3]
|
||||||
machmv7: mov bl, [ebx+0x88888888] ;bufplce[2]
|
machmv7: movzx ebx, byte [ebx+0x88888888];bufplce[2]
|
||||||
cmp al, 1
|
cmp eax, 1
|
||||||
adc dl, dl
|
adc dl, dl
|
||||||
cmp bl, 1
|
cmp ebx, 1
|
||||||
adc dl, dl
|
adc dl, dl
|
||||||
machmv8: mov bl, [ebx+0x88888888] ;palookupoffs[2]
|
machmv8: mov bl, [ebx+0x88888888] ;palookupoffs[2]
|
||||||
machmv11: mov bh, [eax+0x88888888] ;palookupoffs[3]
|
machmv11: mov bh, [eax+0x88888888] ;palookupoffs[3]
|
||||||
|
|
||||||
mov eax, edx
|
mov eax, edx
|
||||||
|
machmv6: add edx, 0x88888888 ;vince[1]
|
||||||
machmv14: shr eax, 32
|
machmv14: shr eax, 32
|
||||||
shl ebx, 16
|
shl ebx, 16
|
||||||
machmv4: mov al, [eax+0x88888888] ;bufplce[1]
|
machmv4: movzx eax, byte [eax+0x88888888];bufplce[1]
|
||||||
cmp al, 1
|
cmp eax, 1
|
||||||
adc dl, dl
|
adc dl, dl
|
||||||
machmv6: add edx, 0x88888888 ;vince[1]
|
|
||||||
machmv5: mov bh, [eax+0x88888888] ;palookupoffs[1]
|
machmv5: mov bh, [eax+0x88888888] ;palookupoffs[1]
|
||||||
|
|
||||||
mov eax, ecx
|
mov eax, ecx
|
||||||
machmv13: shr eax, 32
|
|
||||||
machmv3: add ecx, 0x88888888 ;vince[0]
|
machmv3: add ecx, 0x88888888 ;vince[0]
|
||||||
machmv1: mov al, [eax+0x88888888] ;bufplce[0]
|
machmv13: shr eax, 32
|
||||||
cmp al, 1
|
machmv1: movzx eax, byte [eax+0x88888888];bufplce[0]
|
||||||
|
cmp eax, 1
|
||||||
adc dl, dl
|
adc dl, dl
|
||||||
machmv2: mov bl, [eax+0x88888888] ;palookupoffs[0]
|
machmv2: mov bl, [eax+0x88888888] ;palookupoffs[0]
|
||||||
|
|
||||||
shl dl, 4
|
|
||||||
xor eax, eax
|
xor eax, eax
|
||||||
|
shl dl, 4
|
||||||
fixchain2mb: add edi, 320
|
fixchain2mb: add edi, 320
|
||||||
mov al, dl
|
mov al, dl
|
||||||
add eax, mvcase15
|
add eax, mvcase15
|
||||||
|
|
Loading…
Reference in a new issue