Merge branch 'master' into zscript

This commit is contained in:
Christoph Oelckers 2016-12-05 12:03:59 +01:00
commit 67bdac5227
5 changed files with 77 additions and 974 deletions

View file

@ -252,9 +252,10 @@ Note: All <bool> fields default to false unless mentioned otherwise.
// negative values are used as their absolute. Default = 1.
renderstyle = <string>; // Set per-actor render style, overriding the class default. Possible values can be "normal",
// "none", "add" or "additive", "subtract" or "subtractive", "stencil", "translucentstencil",
// "translucent", "fuzzy", "optfuzzy", "soultrans". Default is an empty string for no change.
fillcolor = <integer>; // Fill color used by the "stencil" and "translucentstencil" rendestyles, as RRGGBB value, default = 0x000000.
// "none", "add" or "additive", "subtract" or "subtractive", "stencil", "translucentstencil",
// "addstencil", "shaded", "addshaded", "translucent", "fuzzy", "optfuzzy", "soultrans" and "shadow".
// Default is an empty string for no change.
fillcolor = <integer>; // Fill color used by the "stencil", "addstencil" and "translucentstencil" rendestyles, as RRGGBB value, default = 0x000000.
alpha = <float>; // Translucency of this actor (if applicable to renderstyle), default is 1.0.
score = <int>; // Score value of this actor, overriding the class default if not null. Default = 0.
pitch = <integer>; // Pitch of thing in degrees. Default = 0 (horizontal).

View file

@ -598,894 +598,6 @@ dmsdone add esp,8
;*----------------------------------------------------------------------
;*
;* R_DrawColumnP
;*
;*----------------------------------------------------------------------
GLOBAL @R_DrawColumnP_ASM@0
GLOBAL _R_DrawColumnP_ASM
GLOBAL R_DrawColumnP_ASM
align 16
R_DrawColumnP_ASM:
_R_DrawColumnP_ASM:
@R_DrawColumnP_ASM@0:
; count = dc_yh - dc_yl;
mov ecx,[dc_count]
test ecx,ecx
jle near rdcpret ; count <= 0: nothing to do, so leave
push ebp ; save registers
push ebx
push edi
push esi
; dest = ylookup[dc_yl] + dc_x + dc_destorg;
mov edi,[dc_dest]
mov ebp,ecx
mov ebx,[dc_texturefrac] ; ebx = frac
rdcp1: sub edi,SPACEFILLER4
mov ecx,ebx
shr ecx,16
mov esi,[dc_source]
mov edx,[dc_iscale]
mov eax,[dc_colormap]
cmp BYTE [CPU+66],byte 5
jg rdcploop2
align 16
; The registers should now look like this:
;
; [31 .. 16][15 .. 8][7 .. 0]
; eax [colormap ]
; ebx [yi ][yf ]
; ecx [scratch ]
; edx [dyi ][dyf ]
; esi [source texture column ]
; edi [destination screen pointer ]
; ebp [counter ]
;
; Note the partial register stalls on anything better than a Pentium
; That's why there are two versions of this loop.
rdcploop:
mov cl,[esi+ecx] ; Fetch texel
xor ch,ch
add ebx,edx ; increment frac
rdcp2: add edi,SPACEFILLER4 ; increment destination pointer
mov cl,[eax+ecx] ; colormap texel
mov [edi],cl ; Store texel
mov ecx,ebx
shr ecx,16
dec ebp
jnz rdcploop ; loop
pop esi
pop edi
pop ebx
pop ebp
rdcpret:
ret
align 16
rdcploop2:
movzx ecx,byte [esi+ecx] ; Fetch texel
add ebx,edx ; increment frac
mov cl,[eax+ecx] ; colormap texel
rdcp3: add edi,SPACEFILLER4 ; increment destination pointer
mov [edi],cl ; Store texel
mov ecx,ebx
shr ecx,16
dec ebp
jnz rdcploop2 ; loop
pop esi
pop edi
pop ebx
pop ebp
ret
;*----------------------------------------------------------------------
;*
;* R_DrawFuzzColumnP
;*
;*----------------------------------------------------------------------
GLOBAL @R_DrawFuzzColumnP_ASM@0
GLOBAL _R_DrawFuzzColumnP_ASM
GLOBAL R_DrawFuzzColumnP_ASM
align 16
R_DrawFuzzColumnP_ASM:
_R_DrawFuzzColumnP_ASM:
@R_DrawFuzzColumnP_ASM@0:
; Adjust borders. Low...
mov eax,[dc_yl]
push ebx
push esi
push edi
push ebp
cmp eax,0
jg .ylok
mov eax,1
nop
; ...and high.
.ylok mov edx,[fuzzviewheight]
mov esi,[dc_yh]
cmp esi,edx
jle .yhok
mov esi,edx
nop
.yhok mov edx,[dc_x]
sub esi,eax ; esi = count
js near .dfcdone ; Zero length (or less)
mov edi,[ylookup+eax*4]
mov ebx,edx
add edi,[dc_destorg]
mov eax,[NormalLight]
mov ecx,[fuzzpos]
add edi,ebx
add eax,256*6
inc esi
mov ebp,[dc_pitch]
mov edx,FUZZTABLE
test ecx,ecx
je .fuzz0
;
; esi = count
; edi = dest
; ecx = fuzzpos
; eax = colormap 6
;
; first loop: end with fuzzpos or count 0, whichever happens first
sub edx,ecx ; edx = # of entries left in fuzzoffset
mov ebx,esi
cmp esi,edx
jle .enuf
mov esi,edx
.enuf sub ebx,esi
mov edx,[fuzzoffset+ecx*4]
push ebx
xor ebx,ebx
.loop1 inc ecx
mov bl,[edi+edx]
dec esi
mov bl,[eax+ebx]
mov [edi],bl
lea edi,[edi+ebp]
mov edx,[fuzzoffset+ecx*4]
jnz .loop1
; second loop: Chunk it into groups of FUZZTABLE-sized spans and do those
pop esi
cmp ecx,FUZZTABLE
jl .savefuzzpos
xor ecx,ecx
nop
.fuzz0 cmp esi,FUZZTABLE
jl .chunked
.oloop lea edx,[esi-FUZZTABLE]
mov esi,FUZZTABLE
push edx
mov edx,[fuzzoffset+ecx*4]
.iloop inc ecx
mov bl,[edi+edx]
dec esi
mov bl,[eax+ebx]
mov [edi],bl
lea edi,[edi+ebp]
mov edx,[fuzzoffset+ecx*4]
jnz .iloop
pop esi
xor ecx,ecx
cmp esi,FUZZTABLE
jge .oloop
; third loop: Do whatever is left
.chunked:
test esi,esi
jle .savefuzzpos
mov edx,[fuzzoffset+ecx*4]
nop
.loop3 inc ecx
mov bl,[edi+edx]
dec esi
mov bl,[eax+ebx]
mov [edi],bl
lea edi,[edi+ebp]
mov edx,[fuzzoffset+ecx*4]
jnz .loop3
.savefuzzpos:
mov [fuzzpos],ecx
.dfcdone:
pop ebp
pop edi
pop esi
pop ebx
ret
;*----------------------------------------------------------------------
;*
;* R_DrawColumnHorizP_ASM
;*
;*----------------------------------------------------------------------
GLOBAL @R_DrawColumnHorizP_ASM@0
GLOBAL _R_DrawColumnHorizP_ASM
GLOBAL R_DrawColumnHorizP_ASM
align 16
@R_DrawColumnHorizP_ASM@0:
_R_DrawColumnHorizP_ASM:
R_DrawColumnHorizP_ASM:
; count = dc_yh - dc_yl;
mov eax,[dc_yh]
mov ecx,[dc_yl]
sub eax,ecx
mov edx,[dc_x]
jl near .leave ; count < 0: nothing to do, so leave
push ebp ; save registers
push ebx
push edi
push esi
inc eax ; make 0 count mean 0 pixels
and edx,3
push eax
mov eax,[dc_temp]
mov esi,[dc_ctspan+edx*4]
add eax,edx
lea eax,[eax+ecx*4] ; eax = top of column in buffer
mov ebp,[dc_yh]
mov [esi],ecx
mov [esi+4],ebp
add esi,8
mov edi,[dc_source]
mov [dc_ctspan+edx*4],esi
mov esi,[dc_iscale]
mov ecx,[dc_texturefrac] ; ecx = frac
mov dl,[edi] ; load cache
mov ebx,[esp]
and ebx,0xfffffff8
jnz .mthan8
; Register usage in the following code is:
;
; eax: dest
; edi: source
; ecx: frac (16.16)
; esi: fracstep (16.16)
; ebx: add1
; ebp: add2
; dl: texel1
; dh: texel2
;[esp] count
; there are fewer than 8 pixels to draw
mov ebx,[esp]
.lthan8 shr ebx,1
jnc .even
; do one pixel before loop (little opportunity for pairing)
mov ebp,ecx ; copy frac to ebx
add ecx,esi ; increment frac
shr ebp,16 ; shift frac over to low end
add eax,4
mov dl,[edi+ebp]
mov [eax-4],dl
.even test ebx,ebx
jz near .done
.loop2 mov [esp],ebx ; save counter
mov ebx,ecx ; copy frac for texel1 to ebx
shr ebx,16 ; shift frac for texel1 to low end
add ecx,esi ; increment frac
mov ebp,ecx ; copy frac for texel2 to ebp
shr ebp,16 ; shift frac for texel2 to low end
add ecx,esi ; increment frac
mov dl,[edi+ebx] ; read texel1
mov ebx,[esp] ; fetch counter
mov dh,[edi+ebp] ; read texel2
mov [eax],dl ; write texel1
mov [eax+4],dh ; write texel2
add eax,8 ; increment dest
dec ebx ; decrement counter
jnz .loop2 ; loop until it hits 0
jmp .done
; there are more than 8 pixels to draw. position eax as close to a 32 byte
; boundary as possible, then do whatever is left.
.mthan8 test eax,4
jz .try2
mov ebp,ecx ; frac: in ebp
add ecx,esi ; step
shr ebp,16 ; frac: shift
add eax,4 ; increment dest
mov ebx,[esp] ; fetch counter
mov dl,[edi+ebp] ; tex: read
dec ebx ; decrement counter
mov [eax-4],dl ; tex: write
mov [esp],ebx ; store counter
.try2 test eax,8
jz .try4
mov ebx,ecx ; frac1: in ebx
add ecx,esi ; step
shr ebx,16 ; frac1: shift
mov ebp,ecx ; frac2: in ebp
shr ebp,16 ; frac2: shift
add ecx,esi ; step
mov dl,[edi+ebx] ; tex1: read
mov ebx,[esp] ; fetch counter
mov dh,[edi+ebp] ; tex2: read
mov [eax],dl ; tex1: write
mov [eax+4],dh ; tex2: write
sub ebx,2 ; decrement counter
add eax,8 ; increment dest
mov [esp],ebx ; store counter
.try4 test eax,16
jz .try8
mov ebx,ecx ; frac1: in ebx
add ecx,esi ; step
shr ebx,16 ; frac1: shift
mov ebp,ecx ; frac2: in ebp
shr ebp,16 ; frac2: shift
add ecx,esi ; step
mov dl,[edi+ebx] ; tex1: read
mov ebx,ecx ; frac3: in ebx
shr ebx,16 ; frac3: shift
mov dh,[edi+ebp] ; tex2: read
add ecx,esi ; step
mov [eax],dl ; tex1: write
mov [eax+4],dh ; tex2: write
mov ebp,ecx ; frac4: in ebp
shr ebp,16 ; frac4: shift
add ecx,esi ; step
mov dl,[edi+ebx] ; tex3: read
mov ebx,[esp] ; fetch counter
mov dh,[edi+ebp] ; tex4: read
sub ebx,4 ; decrement counter
mov [esp],ebx ; store counter
mov [eax+8],dl ; tex3: write
mov [eax+12],dh ; tex4: write
add eax,16 ; increment dest
.try8 mov ebx,[esp] ; make counter count groups of 8
sub esp,4
shr ebx,3
jmp .tail8
align 16
.loop8 mov [esp],ebx ; save counter
mov ebx,ecx ; frac1: in ebx
shr ebx,16 ; frac1: shift
add ecx,esi ; step
mov ebp,ecx ; frac2: in ebp
shr ebp,16 ; frac2: shift
add ecx,esi ; step
mov dl,[edi+ebx] ; tex1: read
mov ebx,ecx ; frac3: in ebx
mov dh,[edi+ebp] ; tex2: read
shr ebx,16 ; frac3: shift
add ecx,esi ; step
mov [eax],dl ; tex1: write
mov [eax+4],dh ; tex2: write
mov ebp,ecx ; frac4: in ebp
shr ebp,16 ; frac4: shift
add ecx,esi ; step
mov dl,[edi+ebx] ; tex3: read
mov ebx,ecx ; frac5: in ebx
mov dh,[edi+ebp] ; tex4: read
shr ebx,16 ; frac5: shift
mov [eax+8],dl ; tex3: write
mov [eax+12],dh ; tex4: write
add ecx,esi ; step
mov ebp,ecx ; frac6: in ebp
shr ebp,16 ; frac6: shift
mov dl,[edi+ebx] ; tex5: read
add ecx,esi ; step
mov ebx,ecx ; frac7: in ebx
mov [eax+16],dl ; tex5: write
shr ebx,16 ; frac7: shift
mov dh,[edi+ebp] ; tex6: read
add ecx,esi ; step
mov ebp,ecx ; frac8: in ebp
mov [eax+20],dh ; tex6: write
shr ebp,16 ; frac8: shift
add eax,32 ; increment dest pointer
mov dl,[edi+ebx] ; tex7: read
mov ebx,[esp] ; fetch counter
mov [eax-8],dl ; tex7: write
mov dh,[edi+ebp] ; tex8: read
add ecx,esi ; step
mov [eax-4],dh ; tex8: write
mov dl,[eax] ; load cache
dec ebx ; decrement counter
.tail8 jnz near .loop8 ; loop if more to do
pop ebp
mov ebx,[esp]
and ebx,7
jnz near .lthan8
.done pop eax
pop esi
pop edi
pop ebx
pop ebp
.leave ret
;*----------------------------------------------------------------------
;*
;* rt_copy1col_asm
;*
;* ecx = hx
;* edx = sx
;* [esp+4] = yl
;* [esp+8] = yh
;*
;*----------------------------------------------------------------------
GLOBAL @rt_copy1col_asm@16
GLOBAL _rt_copy1col_asm
GLOBAL rt_copy1col_asm
align 16
rt_copy1col_asm:
_rt_copy1col_asm:
pop eax
mov edx,[esp+4*3]
mov ecx,[esp+4*2]
push edx
push ecx
mov ecx,[esp+4*2]
mov edx,[esp+4*3]
push eax
@rt_copy1col_asm@16:
mov eax, [esp+4]
push ebx
mov ebx, [esp+12]
push esi
sub ebx, eax
push edi
js .done
lea esi,[eax*4]
inc ebx ; ebx = count
mov eax,edx
add ecx,esi
mov edi,[ylookup+esi]
add ecx,[dc_temp] ; ecx = source
mov esi,[dc_pitch] ; esi = pitch
add eax,edi ; eax = dest
add eax,[dc_destorg]
shr ebx,1
jnc .even
mov dl,[ecx]
add ecx,4
mov [eax],dl
add eax,esi
.even and ebx,ebx
jz .done
.loop mov dl,[ecx]
mov dh,[ecx+4]
mov [eax],dl
mov [eax+esi],dh
add ecx,8
lea eax,[eax+esi*2]
dec ebx
jnz .loop
.done pop edi
pop esi
pop ebx
ret 8
;*----------------------------------------------------------------------
;*
;* rt_copy4cols_asm
;*
;* ecx = sx
;* edx = yl
;* [esp+4] = yh
;*
;*----------------------------------------------------------------------
GLOBAL @rt_copy4cols_asm@12
GLOBAL _rt_copy4cols_asm
GLOBAL rt_copy4cols_asm
align 16
rt_copy4cols_asm:
_rt_copy4cols_asm:
pop eax
mov ecx,[esp+8]
mov edx,[esp+4]
push ecx
mov ecx,[esp+4]
push eax
@rt_copy4cols_asm@12:
push ebx
mov ebx,[esp+8]
push esi
sub ebx,edx
push edi
js .done
inc ebx ; ebx = count
mov eax,ecx
mov esi,[ylookup+edx*4]
mov ecx,[dc_temp]
add eax,esi ; eax = dest
add eax,[dc_destorg]
lea ecx,[ecx+edx*4] ; ecx = source
mov edx,[dc_pitch] ; edx = pitch
shr ebx,1
jnc .even
mov esi,[ecx]
add ecx,4
mov [eax],esi
add eax,edx
.even and ebx,ebx
jz .done
.loop mov esi,[ecx]
mov edi,[ecx+4]
mov [eax],esi
mov [eax+edx],edi
add ecx,8
lea eax,[eax+edx*2]
dec ebx
jnz .loop
.done pop edi
pop esi
pop ebx
ret 4
;*----------------------------------------------------------------------
;*
;* rt_map1col_asm
;*
;* ecx = hx
;* edx = sx
;* [esp+4] = yl
;* [esp+8] = yh
;*
;*----------------------------------------------------------------------
GLOBAL @rt_map1col_asm@16
GLOBAL _rt_map1col_asm
GLOBAL rt_map1col_asm
align 16
rt_map1col_asm:
_rt_map1col_asm:
pop eax
mov edx,[esp+4*3]
mov ecx,[esp+4*2]
push edx
push ecx
mov ecx,[esp+4*2]
mov edx,[esp+4*3]
push eax
@rt_map1col_asm@16:
mov eax,[esp+4]
push ebx
mov ebx,[esp+12]
push ebp
push esi
sub ebx, eax
push edi
js .done
lea edi,[eax*4]
mov esi,[dc_colormap] ; esi = colormap
inc ebx ; ebx = count
mov eax,edx
lea ebp,[ecx+edi] ; ebp = source
add ebp,[dc_temp]
mov ecx,[ylookup+edi]
mov edi,[dc_pitch] ; edi = pitch
add eax,ecx ; eax = dest
xor ecx,ecx
xor edx,edx
add eax,[dc_destorg]
shr ebx,1
jnc .even
mov dl,[ebp]
add ebp,4
mov dl,[esi+edx]
mov [eax],dl
add eax,edi
.even and ebx,ebx
jz .done
.loop mov dl,[ebp]
mov cl,[ebp+4]
add ebp,8
mov dl,[esi+edx]
mov cl,[esi+ecx]
mov [eax],dl
mov [eax+edi],cl
dec ebx
lea eax,[eax+edi*2]
jnz .loop
.done pop edi
pop esi
pop ebp
pop ebx
ret 8
;*----------------------------------------------------------------------
;*
;* rt_map4cols_asm
;*
;* rt_map4cols_asm1 is for PPro and above
;* rt_map4cols_asm2 is for Pentium and below
;*
;* ecx = sx
;* edx = yl
;* [esp+4] = yh
;*
;*----------------------------------------------------------------------
GLOBAL @rt_map4cols_asm1@12
GLOBAL _rt_map4cols_asm1
GLOBAL rt_map4cols_asm1
align 16
rt_map4cols_asm1:
_rt_map4cols_asm1:
pop eax
mov ecx,[esp+8]
mov edx,[esp+4]
push ecx
mov ecx,[esp+4]
push eax
@rt_map4cols_asm1@12:
push ebx
mov ebx,[esp+8]
push ebp
push esi
sub ebx,edx
push edi
js near .done
mov esi,[dc_colormap] ; esi = colormap
shl edx,2
mov eax,ecx
inc ebx ; ebx = count
mov edi,[ylookup+edx]
mov ebp,[dc_temp]
add ebp,edx ; ebp = source
add eax,edi ; eax = dest
mov edi,[dc_pitch] ; edi = pitch
add eax,[dc_destorg]
xor ecx,ecx
xor edx,edx
shr ebx,1
jnc .even
mov dl,[ebp]
mov cl,[ebp+1]
add ebp,4
mov dl,[esi+edx]
mov cl,[esi+ecx]
mov [eax],dl
mov [eax+1],cl
mov dl,[ebp-2]
mov cl,[ebp-1]
mov dl,[esi+edx]
mov cl,[esi+ecx]
mov [eax+2],dl
mov [eax+3],cl
add eax,edi
.even and ebx,ebx
jz .done
.loop:
mov dl,[ebp]
mov cl,[ebp+1]
add ebp,8
mov dl,[esi+edx]
mov cl,[esi+ecx]
mov [eax],dl
mov [eax+1],cl
mov dl,[ebp-6]
mov cl,[ebp-5]
mov dl,[esi+edx]
mov cl,[esi+ecx]
mov [eax+2],dl
mov [eax+3],cl
mov dl,[ebp-4]
mov cl,[ebp-3]
mov dl,[esi+edx]
mov cl,[esi+ecx]
mov [eax+edi],dl
mov [eax+edi+1],cl
mov dl,[ebp-2]
mov cl,[ebp-1]
mov dl,[esi+edx]
mov cl,[esi+ecx]
mov [eax+edi+2],dl
mov [eax+edi+3],cl
lea eax,[eax+edi*2]
dec ebx
jnz .loop
.done pop edi
pop esi
pop ebp
pop ebx
ret 4
GLOBAL @rt_map4cols_asm2@12
GLOBAL _rt_map4cols_asm2
GLOBAL rt_map4cols_asm2
align 16
rt_map4cols_asm2:
_rt_map4cols_asm2:
pop eax
mov ecx,[esp+8]
mov edx,[esp+4]
push ecx
mov ecx,[esp+4]
push eax
@rt_map4cols_asm2@12:
push ebx
mov ebx,[esp+8]
push ebp
push esi
sub ebx,edx
push edi
js near .done
mov esi,[dc_colormap] ; esi = colormap
shl edx,2
mov eax,ecx
inc ebx ; ebx = count
mov edi,[ylookup+edx]
mov ebp,[dc_temp]
add ebp,edx ; ebp = source
add eax,edi ; eax = dest
mov edi,[dc_pitch] ; edi = pitch
add eax,[dc_destorg]
xor ecx,ecx
xor edx,edx
shr ebx,1
jnc .even
mov dl,[ebp]
mov cl,[ebp+1]
add ebp,4
mov dl,[esi+edx]
mov cl,[esi+ecx]
mov [eax],dl
mov [eax+1],cl
mov dl,[ebp-2]
mov cl,[ebp-1]
mov dl,[esi+edx]
mov cl,[esi+ecx]
mov [eax+2],dl
mov [eax+3],cl
add eax,edi
.even and ebx,ebx
jz .done
.loop:
mov dl,[ebp+3]
mov ch,[esi+edx]
mov dl,[ebp+2]
mov cl,[esi+edx]
shl ecx,16
mov dl,[ebp+1]
mov ch,[esi+edx]
mov dl,[ebp]
mov cl,[esi+edx]
mov [eax],ecx
add eax,edi
mov dl,[ebp+7]
mov ch,[esi+edx]
mov dl,[ebp+6]
mov cl,[esi+edx]
shl ecx,16
mov dl,[ebp+5]
mov ch,[esi+edx]
mov dl,[ebp+4]
mov cl,[esi+edx]
mov [eax],ecx
add eax,edi
add ebp,8
dec ebx
jnz .loop
.done pop edi
pop esi
pop ebp
pop ebx
ret 4
align 16
GLOBAL rt_shaded4cols_asm
GLOBAL _rt_shaded4cols_asm
@ -1875,9 +987,6 @@ ASM_PatchPitch:
_ASM_PatchPitch:
@ASM_PatchPitch@0:
mov eax,[dc_pitch]
mov [rdcp1+2],eax
mov [rdcp2+2],eax
mov [rdcp3+2],eax
mov [s4p+1],eax
mov [a4p+1],eax
mov [ac4p+1],eax

View file

@ -69,17 +69,10 @@ int scaledviewwidth;
// These get changed depending on the current
// screen depth and asm/no asm.
void (*R_DrawColumnHoriz)(void);
void (*R_DrawColumn)(void);
void (*R_DrawFuzzColumn)(void);
void (*R_DrawTranslatedColumn)(void);
void (*R_DrawShadedColumn)(void);
void (*R_DrawSpan)(void);
void (*R_DrawSpanMasked)(void);
void (*R_DrawSpanTranslucent)(void);
void (*R_DrawSpanMaskedTranslucent)(void);
void (*R_DrawSpanAddClamp)(void);
void (*R_DrawSpanMaskedAddClamp)(void);
void (*rt_map4cols)(int,int,int);
//
// R_DrawColumn
@ -171,7 +164,6 @@ void R_InitShadeMaps()
/* */
/************************************/
#ifndef X86_ASM
//
// A column is a vertical slice/span from a wall texture that,
// given the DOOM style restrictions on the view orientation,
@ -179,7 +171,7 @@ void R_InitShadeMaps()
// Thus a special case loop for very fast rendering can
// be used. It has also been used with Wolfenstein 3D.
//
void R_DrawColumnP_C (void)
void R_DrawColumn (void)
{
int count;
BYTE* dest;
@ -222,7 +214,7 @@ void R_DrawColumnP_C (void)
} while (--count);
}
}
#endif
// [RH] Just fills a column with a color
void R_FillColumnP (void)
@ -414,13 +406,12 @@ void R_InitFuzzTable (int fuzzoff)
}
}
#ifndef X86_ASM
//
// Creates a fuzzy image by copying pixels from adjacent ones above and below.
// Used with an all black colormap, this could create the SHADOW effect,
// i.e. spectres and invisible players.
//
void R_DrawFuzzColumnP_C (void)
void R_DrawFuzzColumn (void)
{
int count;
BYTE *dest;
@ -490,7 +481,6 @@ void R_DrawFuzzColumnP_C (void)
fuzzpos = fuzz;
}
}
#endif
//
// R_DrawTranlucentColumn
@ -1046,7 +1036,7 @@ void R_SetupSpanBits(FTexture *tex)
{
ds_xbits--;
}
if ((1 << ds_ybits) > tex->GetHeight())
if ((1 << ds_ybits) > tex->GetHeight())
{
ds_ybits--;
}
@ -1057,7 +1047,7 @@ void R_SetupSpanBits(FTexture *tex)
//
// Draws the actual span.
#ifndef X86_ASM
//#ifndef X86_ASM
void R_DrawSpanP_C (void)
{
dsfixed_t xfrac;
@ -1156,7 +1146,7 @@ void R_DrawSpanMaskedP_C (void)
// 64x64 is the most common case by far, so special case it.
do
{
BYTE texdata;
int texdata;
spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6));
texdata = source[spot];
@ -1176,7 +1166,7 @@ void R_DrawSpanMaskedP_C (void)
int xmask = ((1 << ds_xbits) - 1) << ds_ybits;
do
{
BYTE texdata;
int texdata;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
texdata = source[spot];
@ -1190,9 +1180,9 @@ void R_DrawSpanMaskedP_C (void)
} while (--count);
}
}
#endif
//#endif
void R_DrawSpanTranslucentP_C (void)
void R_DrawSpanTranslucent (void)
{
dsfixed_t xfrac;
dsfixed_t yfrac;
@ -1252,7 +1242,7 @@ void R_DrawSpanTranslucentP_C (void)
}
}
void R_DrawSpanMaskedTranslucentP_C (void)
void R_DrawSpanMaskedTranslucent (void)
{
dsfixed_t xfrac;
dsfixed_t yfrac;
@ -1326,7 +1316,7 @@ void R_DrawSpanMaskedTranslucentP_C (void)
}
}
void R_DrawSpanAddClampP_C (void)
void R_DrawSpanAddClamp (void)
{
dsfixed_t xfrac;
dsfixed_t yfrac;
@ -1392,7 +1382,7 @@ void R_DrawSpanAddClampP_C (void)
}
}
void R_DrawSpanMaskedAddClampP_C (void)
void R_DrawSpanMaskedAddClamp (void)
{
dsfixed_t xfrac;
dsfixed_t yfrac;
@ -1682,6 +1672,7 @@ DWORD vlinec1 ()
return frac;
}
#ifndef _M_X64
void vlinec4 ()
{
BYTE *dest = dc_dest;
@ -1698,6 +1689,43 @@ void vlinec4 ()
dest += dc_pitch;
} while (--count);
}
#else
// Optimized version for 64 bit. In 64 bit mode, accessing global variables is very expensive so even though
// this exceeds the register count, loading all those values into a local variable is faster than not loading all of them.
void vlinec4()
{
BYTE *dest = dc_dest;
int count = dc_count;
int bits = vlinebits;
DWORD place;
auto pal0 = palookupoffse[0];
auto pal1 = palookupoffse[1];
auto pal2 = palookupoffse[2];
auto pal3 = palookupoffse[3];
auto buf0 = bufplce[0];
auto buf1 = bufplce[1];
auto buf2 = bufplce[2];
auto buf3 = bufplce[3];
const auto vince0 = vince[0];
const auto vince1 = vince[1];
const auto vince2 = vince[2];
const auto vince3 = vince[3];
auto vplce0 = vplce[0];
auto vplce1 = vplce[1];
auto vplce2 = vplce[2];
auto vplce3 = vplce[3];
do
{
dest[0] = pal0[buf0[(place = vplce0) >> bits]]; vplce0 = place + vince0;
dest[1] = pal1[buf1[(place = vplce1) >> bits]]; vplce1 = place + vince1;
dest[2] = pal2[buf2[(place = vplce2) >> bits]]; vplce2 = place + vince2;
dest[3] = pal3[buf3[(place = vplce3) >> bits]]; vplce3 = place + vince3;
dest += dc_pitch;
} while (--count);
}
#endif
#endif
void setupmvline (int fracbits)
@ -2555,35 +2583,18 @@ const BYTE *R_GetColumn (FTexture *tex, int col)
void R_InitColumnDrawers ()
{
#ifdef X86_ASM
R_DrawColumn = R_DrawColumnP_ASM;
R_DrawColumnHoriz = R_DrawColumnHorizP_ASM;
R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM;
R_DrawColumnHoriz = R_DrawColumnHorizP_C;
R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C;
R_DrawShadedColumn = R_DrawShadedColumnP_C;
R_DrawSpan = R_DrawSpanP_ASM;
R_DrawSpanMasked = R_DrawSpanMaskedP_ASM;
if (CPU.Family <= 5)
{
rt_map4cols = rt_map4cols_asm2;
}
else
{
rt_map4cols = rt_map4cols_asm1;
}
#else
R_DrawColumnHoriz = R_DrawColumnHorizP_C;
R_DrawColumn = R_DrawColumnP_C;
R_DrawFuzzColumn = R_DrawFuzzColumnP_C;
R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C;
R_DrawShadedColumn = R_DrawShadedColumnP_C;
R_DrawSpan = R_DrawSpanP_C;
R_DrawSpanMasked = R_DrawSpanMaskedP_C;
rt_map4cols = rt_map4cols_c;
#endif
R_DrawSpanTranslucent = R_DrawSpanTranslucentP_C;
R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C;
R_DrawSpanAddClamp = R_DrawSpanAddClampP_C;
R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C;
}
// [RH] Choose column drawers in a single place

View file

@ -65,7 +65,6 @@ extern "C" unsigned int horizspans[4];
// The span blitting interface.
// Hook in assembler or system specific BLT here.
extern void (*R_DrawColumn)(void);
extern DWORD (*dovline1) ();
extern DWORD (*doprevline1) ();
@ -84,7 +83,7 @@ extern void setupmvline (int);
extern void setuptmvline (int);
// The Spectre/Invisibility effect.
extern void (*R_DrawFuzzColumn)(void);
extern void R_DrawFuzzColumn(void);
// [RH] Draw shaded column
extern void (*R_DrawShadedColumn)(void);
@ -103,16 +102,16 @@ void R_SetSpanSource(const BYTE *pixels);
extern void (*R_DrawSpanMasked)(void);
// Span drawing for translucent textures.
extern void (*R_DrawSpanTranslucent)(void);
void R_DrawSpanTranslucent(void);
// Span drawing for masked, translucent textures.
extern void (*R_DrawSpanMaskedTranslucent)(void);
void R_DrawSpanMaskedTranslucent(void);
// Span drawing for translucent, additive textures.
extern void (*R_DrawSpanAddClamp)(void);
void R_DrawSpanAddClamp(void);
// Span drawing for masked, translucent, additive textures.
extern void (*R_DrawSpanMaskedAddClamp)(void);
void R_DrawSpanMaskedAddClamp(void);
// [RH] Span blit into an interleaved intermediate buffer
extern void (*R_DrawColumnHoriz)(void);
@ -121,16 +120,19 @@ extern void (*R_DrawColumnHoriz)(void);
void R_InitColumnDrawers ();
// [RH] Moves data from the temporary buffer to the screen.
void rt_copy1col(int hx, int sx, int yl, int yh);
void rt_copy4cols(int sx, int yl, int yh);
void rt_map4cols(int sx, int yl, int yh);
extern "C"
{
void rt_copy1col_c (int hx, int sx, int yl, int yh);
void rt_copy4cols_c (int sx, int yl, int yh);
void rt_shaded1col (int hx, int sx, int yl, int yh);
void rt_shaded4cols_c (int sx, int yl, int yh);
void rt_shaded4cols_asm (int sx, int yl, int yh);
void rt_map1col_c (int hx, int sx, int yl, int yh);
void rt_map1col (int hx, int sx, int yl, int yh);
void rt_add1col (int hx, int sx, int yl, int yh);
void rt_addclamp1col (int hx, int sx, int yl, int yh);
void rt_subclamp1col (int hx, int sx, int yl, int yh);
@ -142,7 +144,6 @@ void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh);
void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh);
void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh);
void rt_map4cols_c (int sx, int yl, int yh);
void rt_add4cols_c (int sx, int yl, int yh);
void rt_addclamp4cols_c (int sx, int yl, int yh);
void rt_subclamp4cols (int sx, int yl, int yh);
@ -154,29 +155,16 @@ void rt_tlateaddclamp4cols (int sx, int yl, int yh);
void rt_tlatesubclamp4cols (int sx, int yl, int yh);
void rt_tlaterevsubclamp4cols (int sx, int yl, int yh);
void rt_copy1col_asm (int hx, int sx, int yl, int yh);
void rt_map1col_asm (int hx, int sx, int yl, int yh);
void rt_copy4cols_asm (int sx, int yl, int yh);
void rt_map4cols_asm1 (int sx, int yl, int yh);
void rt_map4cols_asm2 (int sx, int yl, int yh);
void rt_add4cols_asm (int sx, int yl, int yh);
void rt_addclamp4cols_asm (int sx, int yl, int yh);
}
extern void (*rt_map4cols)(int sx, int yl, int yh);
#ifdef X86_ASM
#define rt_copy1col rt_copy1col_asm
#define rt_copy4cols rt_copy4cols_asm
#define rt_map1col rt_map1col_asm
#define rt_shaded4cols rt_shaded4cols_asm
#define rt_add4cols rt_add4cols_asm
#define rt_addclamp4cols rt_addclamp4cols_asm
#else
#define rt_copy1col rt_copy1col_c
#define rt_copy4cols rt_copy4cols_c
#define rt_map1col rt_map1col_c
#define rt_shaded4cols rt_shaded4cols_c
#define rt_add4cols rt_add4cols_c
#define rt_addclamp4cols rt_addclamp4cols_c
@ -193,29 +181,25 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip);
#ifdef X86_ASM
extern "C" void R_DrawColumnP_Unrolled (void);
extern "C" void R_DrawColumnHorizP_ASM (void);
extern "C" void R_DrawColumnP_ASM (void);
extern "C" void R_DrawFuzzColumnP_ASM (void);
void R_DrawTranslatedColumnP_C (void);
void R_DrawShadedColumnP_C (void);
extern "C" void R_DrawSpanP_ASM (void);
extern "C" void R_DrawSpanMaskedP_ASM (void);
void R_DrawColumnHorizP_C(void);
#else
void R_DrawColumnHorizP_C (void);
void R_DrawColumnP_C (void);
void R_DrawFuzzColumnP_C (void);
void R_DrawTranslatedColumnP_C (void);
void R_DrawShadedColumnP_C (void);
void R_DrawSpanP_C (void);
void R_DrawSpanMaskedP_C (void);
#endif
void R_DrawSpanTranslucentP_C (void);
void R_DrawSpanMaskedTranslucentP_C (void);
void R_DrawColumn();
void R_DrawColumnHorizP_C(void);
void R_DrawTranslatedColumnP_C(void);
void R_DrawSpanTranslucent (void);
void R_DrawSpanMaskedTranslucent (void);
void R_DrawTlatedLucentColumnP_C (void);
#define R_DrawTlatedLucentColumn R_DrawTlatedLucentColumnP_C

View file

@ -69,9 +69,8 @@ extern "C" void R_SetupAddCol();
extern "C" void R_SetupAddClampCol();
#endif
#ifndef X86_ASM
// Copies one span at hx to the screen at sx.
void rt_copy1col_c (int hx, int sx, int yl, int yh)
void rt_copy1col (int hx, int sx, int yl, int yh)
{
BYTE *source;
BYTE *dest;
@ -112,7 +111,7 @@ void rt_copy1col_c (int hx, int sx, int yl, int yh)
}
// Copies all four spans to the screen starting at sx.
void rt_copy4cols_c (int sx, int yl, int yh)
void rt_copy4cols (int sx, int yl, int yh)
{
int *source;
int *dest;
@ -145,7 +144,7 @@ void rt_copy4cols_c (int sx, int yl, int yh)
}
// Maps one span at hx to the screen at sx.
void rt_map1col_c (int hx, int sx, int yl, int yh)
void rt_map1col (int hx, int sx, int yl, int yh)
{
BYTE *colormap;
BYTE *source;
@ -180,7 +179,7 @@ void rt_map1col_c (int hx, int sx, int yl, int yh)
}
// Maps all four spans to the screen starting at sx.
void rt_map4cols_c (int sx, int yl, int yh)
void rt_map4cols (int sx, int yl, int yh)
{
BYTE *colormap;
BYTE *source;
@ -222,7 +221,6 @@ void rt_map4cols_c (int sx, int yl, int yh)
dest += pitch*2;
} while (--count);
}
#endif
void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh)
{