diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ec8c1c09d4..2f5649e07c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,12 +15,6 @@ include( CheckLibraryExists ) include( FindPkgConfig ) include( FindOpenGL ) -if( NOT APPLE ) - option( NO_ASM "Disable assembly code" OFF ) -else() - # At the moment asm code doesn't work with OS X, so disable by default - option( NO_ASM "Disable assembly code" ON ) -endif() if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) option( NO_STRIP "Do not strip Release or MinSizeRel builds" ) # At least some versions of Xcode fail if you strip with the linker @@ -115,7 +109,6 @@ if( WIN32 ) ) set( FMOD_INC_PATH_SUFFIXES PATH_SUFFIXES inc ) set( FMOD_LIB_PATH_SUFFIXES PATH_SUFFIXES lib ) - set( NASM_NAMES nasmw nasm ) find_path( D3D_INCLUDE_DIR d3d9.h PATHS ENV DXSDK_DIR @@ -240,7 +233,6 @@ else() endif() endif() endif() - set( NASM_NAMES nasm ) if( NO_GTK ) add_definitions( -DNO_GTK ) @@ -388,105 +380,6 @@ endif() find_package( FluidSynth ) -# Search for NASM - -if( NOT NO_ASM ) - if( UNIX AND X64 ) - find_program( GAS_PATH as ) - - if( GAS_PATH ) - set( ASSEMBLER ${GAS_PATH} ) - else() - message( STATUS "Could not find as. Disabling assembly code." ) - set( NO_ASM ON ) - endif() - else() - find_program( NASM_PATH NAMES ${NASM_NAMES} ) - find_program( YASM_PATH yasm ) - - if( X64 ) - if( YASM_PATH ) - set( ASSEMBLER ${YASM_PATH} ) - else() - message( STATUS "Could not find YASM. Disabling assembly code." ) - set( NO_ASM ON ) - endif() - else() - if( NASM_PATH ) - set( ASSEMBLER ${NASM_PATH} ) - else() - message( STATUS "Could not find NASM. Disabling assembly code." ) - set( NO_ASM ON ) - endif() - endif() - endif() - - # I think the only reason there was a version requirement was because the - # executable name for Windows changed from 0.x to 2.0, right? This is - # how to do it in case I need to do something similar later. - - # execute_process( COMMAND ${NASM_PATH} -v - # OUTPUT_VARIABLE NASM_VER_STRING ) - # string( REGEX REPLACE ".*version ([0-9]+[.][0-9]+).*" "\\1" NASM_VER "${NASM_VER_STRING}" ) - # if( NOT NASM_VER LESS 2 ) - # message( SEND_ERROR "NASM version should be 2 or later. (Installed version is ${NASM_VER}.)" ) - # endif() -endif() - -if( NOT NO_ASM ) - # Valgrind support is meaningless without assembly code. - if( VALGRIND ) - add_definitions( -DVALGRIND_AWARE=1 ) - # If you're Valgrinding, you probably want to keep symbols around. - set( NO_STRIP ON ) - endif() - - # Tell CMake how to assemble our files - if( UNIX ) - set( ASM_OUTPUT_EXTENSION .o ) - if( X64 ) - set( ASM_FLAGS ) - set( ASM_SOURCE_EXTENSION .s ) - else() - if( APPLE ) - set( ASM_FLAGS -fmacho -DM_TARGET_MACHO ) - else() - set( ASM_FLAGS -felf -DM_TARGET_LINUX ) - endif() - set( ASM_FLAGS "${ASM_FLAGS}" -i${CMAKE_CURRENT_SOURCE_DIR}/ ) - set( ASM_SOURCE_EXTENSION .asm ) - endif() - else() - set( ASM_OUTPUT_EXTENSION .obj ) - set( ASM_SOURCE_EXTENSION .asm ) - if( X64 ) - set( ASM_FLAGS -f win64 -DWIN32 -DWIN64 ) - else() - set( ASM_FLAGS -f win32 -DWIN32 -i${CMAKE_CURRENT_SOURCE_DIR}/ ) - endif() - endif() - if( WIN32 AND NOT X64 ) - set( FIXRTEXT fixrtext ) - else() - set( FIXRTEXT "" ) - endif() - message( STATUS "Selected assembler: ${ASSEMBLER}" ) - MACRO( ADD_ASM_FILE indir infile ) - set( ASM_OUTPUT_${infile} "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}/${infile}${ASM_OUTPUT_EXTENSION}" ) - if( WIN32 AND NOT X64 ) - set( FIXRTEXT_${infile} COMMAND ${FIXRTEXT} "${ASM_OUTPUT_${infile}}" ) - else() - set( FIXRTEXT_${infile} COMMAND "" ) - endif() - add_custom_command( OUTPUT ${ASM_OUTPUT_${infile}} - COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir} - COMMAND ${ASSEMBLER} ${ASM_FLAGS} -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${indir}/${infile}${ASM_SOURCE_EXTENSION}" - ${FIXRTEXT_${infile}} - DEPENDS ${indir}/${infile}.asm ${FIXRTEXT} ) - set( ASM_SOURCES ${ASM_SOURCES} "${ASM_OUTPUT_${infile}}" ) - ENDMACRO() -endif() - # Decide on SSE setup set( SSE_MATTERS NO ) @@ -801,25 +694,6 @@ if( HAVE_MMX ) PROPERTIES COMPILE_FLAGS "-mmmx" ) endif( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) endif( HAVE_MMX ) - -if( NOT ASM_SOURCES ) - set( ASM_SOURCES "" ) -endif() - -if( NO_ASM ) - add_definitions( -DNOASM ) -else() - if( X64 ) - ADD_ASM_FILE( asm_x86_64 tmap3 ) - else() - ADD_ASM_FILE( asm_ia32 a ) - ADD_ASM_FILE( asm_ia32 misc ) - ADD_ASM_FILE( asm_ia32 tmap ) - ADD_ASM_FILE( asm_ia32 tmap2 ) - ADD_ASM_FILE( asm_ia32 tmap3 ) - endif() -endif() - add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.h COMMAND lemon -C${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y DEPENDS lemon ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y ) @@ -927,16 +801,6 @@ set( NOT_COMPILED_SOURCE_FILES scripting/zscript/zcc-parse.lemon zcc-parse.c zcc-parse.h - - # We could have the ASM macro add these files, but it wouldn't add all - # platforms. - asm_ia32/a.asm - asm_ia32/misc.asm - asm_ia32/tmap.asm - asm_ia32/tmap2.asm - asm_ia32/tmap3.asm - asm_x86_64/tmap3.asm - asm_x86_64/tmap3.s ) set( FASTMATH_PCH_SOURCES @@ -944,7 +808,9 @@ set( FASTMATH_PCH_SOURCES r_3dfloors.cpp r_bsp.cpp r_draw.cpp - r_drawt.cpp + r_draw_pal.cpp + r_drawt_pal.cpp + r_thread.cpp r_main.cpp r_plane.cpp r_segs.cpp @@ -1351,7 +1217,6 @@ add_executable( zdoom WIN32 MACOSX_BUNDLE ${HEADER_FILES} ${NOT_COMPILED_SOURCE_FILES} __autostart.cpp - ${ASM_SOURCES} ${SYSTEM_SOURCES} ${X86_SOURCES} ${FASTMATH_SOURCES} @@ -1514,8 +1379,6 @@ install(TARGETS zdoom DESTINATION ${INSTALL_PATH} COMPONENT "Game executable") -source_group("Assembly Files\\ia32" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/asm_ia32/.+") -source_group("Assembly Files\\x86_64" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/asm_x86_64/.+") source_group("Audio Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/sound/.+") source_group("Audio Files\\OPL Synth" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/oplsynth/.+") source_group("Audio Files\\OPL Synth\\DOSBox" FILES oplsynth/dosbox/opl.cpp oplsynth/dosbox/opl.h) diff --git a/src/asm_ia32/a.asm b/src/asm_ia32/a.asm deleted file mode 100644 index 786396d4a4..0000000000 --- a/src/asm_ia32/a.asm +++ /dev/null @@ -1,812 +0,0 @@ -; "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -; Ken Silverman's official web site: "http://www.advsys.net/ken" -; See the included license file "BUILDLIC.TXT" for license info. -; This file has been modified from Ken Silverman's original release - -%include "valgrind.inc" - - SECTION .data - -%ifndef M_TARGET_LINUX -%define ylookup _ylookup -%define vince _vince -%define vplce _vplce -%define palookupoffse _palookupoffse -%define bufplce _bufplce -%define dc_iscale _dc_iscale -%define dc_colormap _dc_colormap -%define dc_count _dc_count -%define dc_dest _dc_dest -%define dc_source _dc_source -%define dc_texturefrac _dc_texturefrac - -%define setupvlineasm _setupvlineasm -%define prevlineasm1 _prevlineasm1 -%define vlineasm1 _vlineasm1 -%define vlineasm4 _vlineasm4 - -%define setupmvlineasm _setupmvlineasm -%define mvlineasm1 _mvlineasm1 -%define mvlineasm4 _mvlineasm4 - -%define R_SetupDrawSlabA _R_SetupDrawSlabA -%define R_DrawSlabA _R_DrawSlabA -%endif - -EXTERN ylookup ; near - -EXTERN vplce ; near -EXTERN vince ; near -EXTERN palookupoffse ; near -EXTERN bufplce ; near - -EXTERN dc_iscale -EXTERN dc_colormap -EXTERN dc_count -EXTERN dc_dest -EXTERN dc_source -EXTERN dc_texturefrac - - SECTION .text - -ALIGN 16 -GLOBAL setvlinebpl_ -setvlinebpl_: - mov [fixchain1a+2], eax - mov [fixchain1b+2], eax - mov [fixchain2a+2], eax - mov [fixchain1m+2], eax - mov [fixchain2ma+2], eax - mov [fixchain2mb+2], eax - selfmod fixchain1a, fixchain2mb+6 - -setdrawslabbpl: - mov dword [voxbpl1+2], eax - mov dword [voxbpl2+2], eax - mov dword [voxbpl3+2], eax - mov dword [voxbpl4+2], eax - mov dword [voxbpl5+2], eax - mov dword [voxbpl6+2], eax - mov dword [voxbpl7+2], eax - mov dword [voxbpl8+2], eax - selfmod voxbpl1, voxpl8+6 - ret - - SECTION .data - -lastslabcolormap: - dd 4 - - SECTION .text - -GLOBAL R_SetupDrawSlabA -GLOBAL @R_SetupDrawSlabA@4 -R_SetupDrawSlabA: - mov ecx, [esp+4] -@R_SetupDrawSlabA@4: - cmp [lastslabcolormap], ecx - je .done - mov [lastslabcolormap], ecx - mov dword [voxpal1+2], ecx - mov dword [voxpal2+2], ecx - mov dword [voxpal3+2], ecx - mov dword [voxpal4+2], ecx - mov dword [voxpal5+2], ecx - mov dword [voxpal6+2], ecx - mov dword [voxpal7+2], ecx - mov dword [voxpal8+2], ecx -.done ret - - -; pass it log2(texheight) - -ALIGN 16 -GLOBAL setupvlineasm -setupvlineasm: - mov ecx, [esp+4] - - ;First 2 lines for VLINEASM1, rest for VLINEASM4 - mov byte [premach3a+2], cl - mov byte [mach3a+2], cl - - mov byte [machvsh1+2], cl ;32-shy - mov byte [machvsh3+2], cl ;32-shy - mov byte [machvsh5+2], cl ;32-shy - mov byte [machvsh6+2], cl ;32-shy - mov ch, cl - sub ch, 16 - mov byte [machvsh8+2], ch ;16-shy - neg cl - mov byte [machvsh7+2], cl ;shy - mov byte [machvsh9+2], cl ;shy - mov byte [machvsh10+2], cl ;shy - mov byte [machvsh11+2], cl ;shy - mov byte [machvsh12+2], cl ;shy - mov eax, 1 - shl eax, cl - dec eax - mov dword [machvsh2+2], eax ;(1<>sh) -;vplc3 = (ebp<<(32-sh))+((edx&65535)<<(16-sh)) -machvsh5: shl esi, 88h ;32-sh - mov eax, edx -machvsh6: shl ebp, 88h ;32-sh - and edx, 0000ffffh -machvsh7: shr eax, 88h ;sh - add esi, eax -machvsh8: shl edx, 88h ;16-sh - add ebp, edx - mov dword [vplce+12], esi - mov dword [vplce+4], ebp - - pop edi - pop esi - pop ebx - pop ebp - ret - -;************************************************************************* -;************************* Masked Vertical Lines ************************* -;************************************************************************* - -; pass it log2(texheight) - -ALIGN 16 -GLOBAL setupmvlineasm -setupmvlineasm: - mov ecx, dword [esp+4] - mov byte [maskmach3a+2], cl - mov byte [machmv13+2], cl - - mov byte [machmv14+2], cl - mov byte [machmv15+2], cl - mov byte [machmv16+2], cl - selfmod maskmach3a, machmv13+6 - ret - -ALIGN 16 -GLOBAL mvlineasm1 ;Masked vline -mvlineasm1: - push ebx - push edi - push esi - push ebp - mov ecx, [dc_count] - mov ebp, [dc_colormap] - mov edi, [dc_dest] - mov eax, [dc_iscale] - mov edx, [dc_texturefrac] - mov esi, [dc_source] -beginmvline: - mov ebx, edx -maskmach3a: shr ebx, 32 - movzx ebx, byte [esi+ebx] - cmp ebx, 0 - je short skipmask1 -maskmach3c: mov bl, byte [ebp+ebx] - mov [edi], bl -skipmask1: add edx, eax -fixchain1m: add edi, 320 - dec ecx - jnz short beginmvline - - pop ebp - pop esi - pop edi - pop ebx - mov eax, edx - ret - -ALIGN 16 -GLOBAL mvlineasm4 -mvlineasm4: - push ebx - push esi - push edi - push ebp - - mov ecx,[dc_count] - mov edi,[dc_dest] - - mov eax, [bufplce+0] - mov ebx, [bufplce+4] - mov [machmv1+3], eax - mov [machmv4+3], ebx - mov eax, [bufplce+8] - mov ebx, [bufplce+12] - mov [machmv7+3], eax - mov [machmv10+3], ebx - - mov eax, [palookupoffse] - mov ebx, [palookupoffse+4] - mov [machmv2+2], eax - mov [machmv5+2], ebx - mov eax, [palookupoffse+8] - mov ebx, [palookupoffse+12] - mov [machmv8+2], eax - mov [machmv11+2], ebx - - mov eax, [vince] ;vince - mov ebx, [vince+4] - xor bl, bl - mov [machmv3+2], eax - mov [machmv6+2], ebx - mov eax, [vince+8] - mov ebx, [vince+12] - mov [machmv9+2], eax - mov [machmv12+2], ebx - - inc ecx - push ecx - mov ecx, [vplce+0] - mov edx, [vplce+4] - mov esi, [vplce+8] - mov ebp, [vplce+12] -fixchain2ma: sub edi, 320 - - selfmod beginmvlineasm4, machmv2+6 - jmp short beginmvlineasm4 -ALIGN 16 -beginmvlineasm4: - dec dword [esp] - jz near endmvlineasm4 - - mov eax, ebp - mov ebx, esi -machmv16: shr eax, 32 -machmv12: add ebp, 0x88888888 ;vince[3] -machmv15: shr ebx, 32 -machmv9: add esi, 0x88888888 ;vince[2] -machmv10: movzx eax, byte [eax+0x88888888];bufplce[3] -machmv7: movzx ebx, byte [ebx+0x88888888];bufplce[2] - cmp eax, 1 - adc dl, dl - cmp ebx, 1 - adc dl, dl -machmv8: mov bl, [ebx+0x88888888] ;palookupoffs[2] -machmv11: mov bh, [eax+0x88888888] ;palookupoffs[3] - - mov eax, edx -machmv6: add edx, 0x88888888 ;vince[1] -machmv14: shr eax, 32 - shl ebx, 16 -machmv4: movzx eax, byte [eax+0x88888888];bufplce[1] - cmp eax, 1 - adc dl, dl -machmv5: mov bh, [eax+0x88888888] ;palookupoffs[1] - - mov eax, ecx -machmv3: add ecx, 0x88888888 ;vince[0] -machmv13: shr eax, 32 -machmv1: movzx eax, byte [eax+0x88888888];bufplce[0] - cmp eax, 1 - adc dl, dl -machmv2: mov bl, [eax+0x88888888] ;palookupoffs[0] - - xor eax, eax - shl dl, 4 -fixchain2mb: add edi, 320 - mov al, dl - add eax, mvcase15 - jmp eax ;16 byte cases - -ALIGN 16 -endmvlineasm4: - mov [vplce], ecx - mov [vplce+4], edx - mov [vplce+8], esi - mov [vplce+12], ebp - pop ecx - pop ebp - pop edi - pop esi - pop ebx - ret - - ;5,7,8,8,11,13,12,14,11,13,14,14,12,14,15,7 -ALIGN 16 -mvcase15: mov [edi], ebx - jmp beginmvlineasm4 -ALIGN 16 -mvcase14: mov [edi+1], bh - shr ebx, 16 - mov [edi+2], bx - jmp beginmvlineasm4 -ALIGN 16 -mvcase13: mov [edi], bl - shr ebx, 16 - mov [edi+2], bx - jmp beginmvlineasm4 -ALIGN 16 -mvcase12: shr ebx, 16 - mov [edi+2], bx - jmp beginmvlineasm4 -ALIGN 16 -mvcase11: mov [edi], bx - shr ebx, 16 - mov [edi+3], bh - jmp beginmvlineasm4 -ALIGN 16 -mvcase10: mov [edi+1], bh - shr ebx, 16 - mov [edi+3], bh - jmp beginmvlineasm4 -ALIGN 16 -mvcase9: mov [edi], bl - shr ebx, 16 - mov [edi+3], bh - jmp beginmvlineasm4 -ALIGN 16 -mvcase8: shr ebx, 16 - mov [edi+3], bh - jmp beginmvlineasm4 -ALIGN 16 -mvcase7: mov [edi], bx - shr ebx, 16 - mov [edi+2], bl - jmp beginmvlineasm4 -ALIGN 16 -mvcase6: shr ebx, 8 - mov [edi+1], bx - jmp beginmvlineasm4 -ALIGN 16 -mvcase5: mov [edi], bl - shr ebx, 16 - mov [edi+2], bl - jmp beginmvlineasm4 -ALIGN 16 -mvcase4: shr ebx, 16 - mov [edi+2], bl - jmp beginmvlineasm4 -ALIGN 16 -mvcase3: mov [edi], bx - jmp beginmvlineasm4 -ALIGN 16 -mvcase2: mov [edi+1], bh - jmp beginmvlineasm4 -ALIGN 16 -mvcase1: mov [edi], bl - jmp beginmvlineasm4 -ALIGN 16 -mvcase0: jmp beginmvlineasm4 - -align 16 - - -;************************************************************************* -;***************************** Voxel Slabs ******************************* -;************************************************************************* - -GLOBAL R_DrawSlabA -R_DrawSlabA: - push ebx - push ebp - push esi - push edi - - mov eax, [esp+5*4+0] - mov ebx, [esp+5*4+4] - mov ecx, [esp+5*4+8] - mov edx, [esp+5*4+12] - mov esi, [esp+5*4+16] - mov edi, [esp+5*4+20] - - cmp eax, 2 - je voxbegdraw2 - ja voxskip2 - xor eax, eax -voxbegdraw1: - mov ebp, ebx - shr ebp, 16 - add ebx, edx - dec ecx - mov al, byte [esi+ebp] -voxpal1: mov al, byte [eax+88888888h] - mov byte [edi], al -voxbpl1: lea edi, [edi+88888888h] - jnz voxbegdraw1 - jmp voxskipslab5 - -voxbegdraw2: - mov ebp, ebx - shr ebp, 16 - add ebx, edx - xor eax, eax - dec ecx - mov al, byte [esi+ebp] -voxpal2: mov al, byte [eax+88888888h] - mov ah, al - mov word [edi], ax -voxbpl2: lea edi, [edi+88888888h] - jnz voxbegdraw2 - jmp voxskipslab5 - -voxskip2: - cmp eax, 4 - jne voxskip4 - xor eax, eax -voxbegdraw4: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal3: mov al, byte [eax+88888888h] - mov ah, al - shl eax, 8 - mov al, ah - shl eax, 8 - mov al, ah - mov dword [edi], eax -voxbpl3: add edi, 88888888h - dec ecx - jnz voxbegdraw4 - jmp voxskipslab5 - -voxskip4: - add eax, edi - - test edi, 1 - jz voxskipslab1 - cmp edi, eax - je voxskipslab1 - - push eax - push ebx - push ecx - push edi -voxbegslab1: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal4: mov al, byte [eax+88888888h] - mov byte [edi], al -voxbpl4: add edi, 88888888h - dec ecx - jnz voxbegslab1 - pop edi - pop ecx - pop ebx - pop eax - inc edi - -voxskipslab1: - push eax - test edi, 2 - jz voxskipslab2 - dec eax - cmp edi, eax - jge voxskipslab2 - - push ebx - push ecx - push edi -voxbegslab2: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal5: mov al, byte [eax+88888888h] - mov ah, al - mov word [edi], ax -voxbpl5: add edi, 88888888h - dec ecx - jnz voxbegslab2 - pop edi - pop ecx - pop ebx - add edi, 2 - -voxskipslab2: - mov eax, [esp] - - sub eax, 3 - cmp edi, eax - jge voxskipslab3 - -voxprebegslab3: - push ebx - push ecx - push edi -voxbegslab3: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal6: mov al, byte [eax+88888888h] - mov ah, al - shl eax, 8 - mov al, ah - shl eax, 8 - mov al, ah - mov dword [edi], eax -voxbpl6: add edi, 88888888h - dec ecx - jnz voxbegslab3 - pop edi - pop ecx - pop ebx - add edi, 4 - - mov eax, [esp] - - sub eax, 3 - cmp edi, eax - jl voxprebegslab3 - -voxskipslab3: - mov eax, [esp] - - dec eax - cmp edi, eax - jge voxskipslab4 - - push ebx - push ecx - push edi -voxbegslab4: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal7: mov al, byte [eax+88888888h] - mov ah, al - mov word [edi], ax -voxbpl7: add edi, 88888888h - dec ecx - jnz voxbegslab4 - pop edi - pop ecx - pop ebx - add edi, 2 - -voxskipslab4: - pop eax - - cmp edi, eax - je voxskipslab5 - -voxbegslab5: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal8: mov al, byte [eax+88888888h] - mov byte [edi], al -voxbpl8: add edi, 88888888h - dec ecx - jnz voxbegslab5 - -voxskipslab5: - pop edi - pop esi - pop ebp - pop ebx - ret - -align 16 - -%ifdef M_TARGET_MACHO -GLOBAL _rtext_a_end -_rtext_a_end: -%endif diff --git a/src/asm_ia32/misc.asm b/src/asm_ia32/misc.asm deleted file mode 100644 index b825a4d02a..0000000000 --- a/src/asm_ia32/misc.asm +++ /dev/null @@ -1,200 +0,0 @@ -;* -;* misc.nas -;* Miscellaneous assembly functions -;* -;*--------------------------------------------------------------------------- -;* Copyright 1998-2006 Randy Heit -;* All rights reserved. -;* -;* Redistribution and use in source and binary forms, with or without -;* modification, are permitted provided that the following conditions -;* are met: -;* -;* 1. Redistributions of source code must retain the above copyright -;* notice, this list of conditions and the following disclaimer. -;* 2. Redistributions in binary form must reproduce the above copyright -;* notice, this list of conditions and the following disclaimer in the -;* documentation and/or other materials provided with the distribution. -;* 3. The name of the author may not be used to endorse or promote products -;* derived from this software without specific prior written permission. -;* -;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -;*--------------------------------------------------------------------------- -;* - -BITS 32 - -%ifndef M_TARGET_LINUX - -%define DoBlending_MMX _DoBlending_MMX -%define BestColor_MMX _BestColor_MMX - -%endif - -%ifdef M_TARGET_WATCOM - SEGMENT DATA PUBLIC ALIGN=16 CLASS=DATA USE32 - SEGMENT DATA -%else - SECTION .data -%endif - -Blending256: - dd 0x01000100,0x00000100 - -%ifdef M_TARGET_WATCOM - SEGMENT CODE PUBLIC ALIGN=16 CLASS=CODE USE32 - SEGMENT CODE -%else - SECTION .text -%endif - -;----------------------------------------------------------- -; -; DoBlending_MMX -; -; MMX version of DoBlending -; -; (DWORD *from, DWORD *to, count, tor, tog, tob, toa) -;----------------------------------------------------------- - -GLOBAL DoBlending_MMX - -DoBlending_MMX: - pxor mm0,mm0 ; mm0 = 0 - mov eax,[esp+4*4] - shl eax,16 - mov edx,[esp+4*5] - shl edx,8 - or eax,[esp+4*6] - or eax,edx - mov ecx,[esp+4*3] ; ecx = count - movd mm1,eax ; mm1 = 00000000 00RRGGBB - mov eax,[esp+4*7] - shl eax,16 - mov edx,[esp+4*7] - shl edx,8 - or eax,[esp+4*7] - or eax,edx - mov edx,[esp+4*2] ; edx = dest - movd mm6,eax ; mm6 = 00000000 00AAAAAA - punpcklbw mm1,mm0 ; mm1 = 000000RR 00GG00BB - movq mm7,[Blending256] - punpcklbw mm6,mm0 ; mm6 = 000000AA 00AA00AA - mov eax,[esp+4*1] ; eax = source - pmullw mm1,mm6 ; mm1 = 000000RR 00GG00BB (multiplied by alpha) - psubusw mm7,mm6 ; mm7 = 000000aa 00aa00aa (one minus alpha) - nop ; Does this actually pair on a Pentium? - -; Do four colors per iteration: Count must be a multiple of four. - -.loop movq mm2,[eax] ; mm2 = 00r2g2b2 00r1g1b1 - add eax,8 - movq mm3,mm2 ; mm3 = 00r2g2b2 00r1g1b1 - punpcklbw mm2,mm0 ; mm2 = 000000r1 00g100b1 - punpckhbw mm3,mm0 ; mm3 = 000000r2 00g200b2 - pmullw mm2,mm7 ; mm2 = 0000r1rr g1ggb1bb - add edx,8 - pmullw mm3,mm7 ; mm3 = 0000r2rr g2ggb2bb - sub ecx,2 - paddusw mm2,mm1 - psrlw mm2,8 - paddusw mm3,mm1 - psrlw mm3,8 - packuswb mm2,mm3 ; mm2 = 00r2g2b2 00r1g1b1 - movq [edx-8],mm2 - - movq mm2,[eax] ; mm2 = 00r2g2b2 00r1g1b1 - add eax,8 - movq mm3,mm2 ; mm3 = 00r2g2b2 00r1g1b1 - punpcklbw mm2,mm0 ; mm2 = 000000r1 00g100b1 - punpckhbw mm3,mm0 ; mm3 = 000000r2 00g200b2 - pmullw mm2,mm7 ; mm2 = 0000r1rr g1ggb1bb - add edx,8 - pmullw mm3,mm7 ; mm3 = 0000r2rr g2ggb2bb - sub ecx,2 - paddusw mm2,mm1 - psrlw mm2,8 - paddusw mm3,mm1 - psrlw mm3,8 - packuswb mm2,mm3 ; mm2 = 00r2g2b2 00r1g1b1 - movq [edx-8],mm2 - - jnz .loop - - emms - ret - -;----------------------------------------------------------- -; -; BestColor_MMX -; -; Picks the closest matching color from a palette -; -; Passed FFRRGGBB and palette array in same format -; FF is the index of the first palette entry to consider -; -;----------------------------------------------------------- - -GLOBAL BestColor_MMX -GLOBAL @BestColor_MMX@8 - -BestColor_MMX: - mov ecx,[esp+4] - mov edx,[esp+8] -@BestColor_MMX@8: - pxor mm0,mm0 - movd mm1,ecx ; mm1 = color searching for - mov eax,257*257+257*257+257*257 ;eax = bestdist - push ebx - punpcklbw mm1,mm0 - mov ebx,ecx ; ebx = best color - shr ecx,24 ; ecx = count - and ebx,0xffffff - push esi - push ebp - -.loop movd mm2,[edx+ecx*4] ; mm2 = color considering now - inc ecx - punpcklbw mm2,mm0 - movq mm3,mm1 - psubsw mm3,mm2 - pmullw mm3,mm3 ; mm3 = color distance squared - - movd ebp,mm3 ; add the three components - psrlq mm3,32 ; into ebp to get the real - mov esi,ebp ; (squared) distance - shr esi,16 - and ebp,0xffff - add ebp,esi - movd esi,mm3 - add ebp,esi - - jz .perf ; found a perfect match - cmp eax,ebp - jb .skip - mov eax,ebp - lea ebx,[ecx-1] -.skip cmp ecx,256 - jne .loop - mov eax,ebx - pop ebp - pop esi - pop ebx - emms - ret - -.perf lea eax,[ecx-1] - pop ebp - pop esi - pop ebx - emms - ret diff --git a/src/asm_ia32/tmap.asm b/src/asm_ia32/tmap.asm deleted file mode 100644 index 2096b92229..0000000000 --- a/src/asm_ia32/tmap.asm +++ /dev/null @@ -1,1002 +0,0 @@ -;* -;* tmap.nas -;* The texture-mapping inner loops in pure assembly language. -;* -;*--------------------------------------------------------------------------- -;* Copyright 1998-2006 Randy Heit -;* All rights reserved. -;* -;* Redistribution and use in source and binary forms, with or without -;* modification, are permitted provided that the following conditions -;* are met: -;* -;* 1. Redistributions of source code must retain the above copyright -;* notice, this list of conditions and the following disclaimer. -;* 2. Redistributions in binary form must reproduce the above copyright -;* notice, this list of conditions and the following disclaimer in the -;* documentation and/or other materials provided with the distribution. -;* 3. The name of the author may not be used to endorse or promote products -;* derived from this software without specific prior written permission. -;* -;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -;*--------------------------------------------------------------------------- -;* - -BITS 32 - -%include "valgrind.inc" - -; Segment/section definition macros. - - SECTION .data - -%define SPACEFILLER4 (0x44444444) - -; If you change this in r_draw.c, be sure to change it here, too! -FUZZTABLE equ 50 - -%ifndef M_TARGET_LINUX - -%define ylookup _ylookup -%define centery _centery -%define fuzzpos _fuzzpos -%define fuzzoffset _fuzzoffset -%define NormalLight _NormalLight -%define viewheight _viewheight -%define fuzzviewheight _fuzzviewheight -%define CPU _CPU - -%define dc_pitch _dc_pitch -%define dc_colormap _dc_colormap -%define dc_color _dc_color -%define dc_iscale _dc_iscale -%define dc_texturefrac _dc_texturefrac -%define dc_srcblend _dc_srcblend -%define dc_destblend _dc_destblend -%define dc_source _dc_source -%define dc_yl _dc_yl -%define dc_yh _dc_yh -%define dc_x _dc_x -%define dc_count _dc_count -%define dc_dest _dc_dest -%define dc_destorg _dc_destorg - -%define Col2RGB8 _Col2RGB8 -%define RGB32k _RGB32k - -%define dc_ctspan _dc_ctspan -%define dc_temp _dc_temp - -%define ds_xstep _ds_xstep -%define ds_ystep _ds_ystep -%define ds_colormap _ds_colormap -%define ds_source _ds_source -%define ds_x1 _ds_x1 -%define ds_x2 _ds_x2 -%define ds_xfrac _ds_xfrac -%define ds_yfrac _ds_yfrac -%define ds_y _ds_y - -%define ds_cursource _ds_cursource -%define ds_curcolormap _ds_curcolormap - -%define R_SetSpanSource_ASM _R_SetSpanSource_ASM -%define R_SetSpanSize_ASM _R_SetSpanSize_ASM -%define R_SetSpanColormap_ASM _R_SetSpanColormap_ASM -%define R_SetupShadedCol _R_SetupShadedCol -%define R_SetupAddCol _R_SetupAddCol -%define R_SetupAddClampCol _R_SetupAddClampCol - -%endif - -EXTERN ylookup -EXTERN centery -EXTERN fuzzpos -EXTERN fuzzoffset -EXTERN NormalLight -EXTERN viewheight -EXTERN fuzzviewheight -EXTERN CPU - -EXTERN dc_pitch -EXTERN dc_colormap -EXTERN dc_color -EXTERN dc_iscale -EXTERN dc_texturefrac -EXTERN dc_srcblend -EXTERN dc_destblend -EXTERN dc_source -EXTERN dc_yl -EXTERN dc_yh -EXTERN dc_x -EXTERN dc_count -EXTERN dc_dest -EXTERN dc_destorg - -EXTERN dc_ctspan -EXTERN dc_temp - -EXTERN Col2RGB8 -EXTERN RGB32k - -EXTERN ds_xstep -EXTERN ds_ystep -EXTERN ds_colormap -EXTERN ds_source -EXTERN ds_x1 -EXTERN ds_x2 -EXTERN ds_xfrac -EXTERN ds_yfrac -EXTERN ds_y - -GLOBAL ds_cursource -GLOBAL ds_curcolormap - - -ds_cursource: - DD 0 - -ds_curcolormap: - DD 0 - - -; Local stuff: -lastAddress DD 0 -pixelcount DD 0 - - SECTION .text - - -GLOBAL @R_SetSpanSource_ASM@4 -GLOBAL R_SetSpanSource_ASM - -R_SetSpanSource_ASM: - mov ecx,[esp+4] - -@R_SetSpanSource_ASM@4: - mov [spreada+2],ecx - mov [spreadb+2],ecx - mov [spreadc+2],ecx - mov [spreadd+2],ecx - mov [spreade+2],ecx - mov [spreadf+2],ecx - mov [spreadg+2],ecx - - mov [mspreada+2],ecx - mov [mspreadb+2],ecx - mov [mspreadc+2],ecx - mov [mspreadd+2],ecx - mov [mspreade+2],ecx - mov [mspreadf+2],ecx - mov [mspreadg+2],ecx - - selfmod spreada, mspreadg+6 - - mov [ds_cursource],ecx - ret - -GLOBAL @R_SetSpanColormap_ASM@4 -GLOBAL R_SetSpanColormap_ASM - -R_SetSpanColormap_ASM: - mov ecx,[esp+4] - -@R_SetSpanColormap_ASM@4: - mov [spmapa+2],ecx - mov [spmapb+2],ecx - mov [spmapc+2],ecx - mov [spmapd+2],ecx - mov [spmape+2],ecx - mov [spmapf+2],ecx - mov [spmapg+2],ecx - - mov [mspmapa+2],ecx - mov [mspmapb+2],ecx - mov [mspmapc+2],ecx - mov [mspmapd+2],ecx - mov [mspmape+2],ecx - mov [mspmapf+2],ecx - mov [mspmapg+2],ecx - - selfmod spmapa, mspmapg+6 - - mov [ds_curcolormap],ecx - ret - -GLOBAL R_SetSpanSize_ASM - -EXTERN SetTiltedSpanSize - -R_SetSpanSize_ASM: - mov edx,[esp+4] - mov ecx,[esp+8] - call SetTiltedSpanSize - - mov [dsy1+2],dl - mov [dsy2+2],dl - - mov [dsx1+2],cl - mov [dsx2+2],cl - mov [dsx3+2],cl - mov [dsx4+2],cl - mov [dsx5+2],cl - mov [dsx6+2],cl - mov [dsx7+2],cl - - mov [dmsy1+2],dl - mov [dmsy2+2],dl - - mov [dmsx1+2],cl - mov [dmsx2+2],cl - mov [dmsx3+2],cl - mov [dmsx4+2],cl - mov [dmsx5+2],cl - mov [dmsx6+2],cl - mov [dmsx7+2],cl - - push ecx - add ecx,edx - mov eax,1 - shl eax,cl - dec eax - mov [dsm1+2],eax - mov [dsm5+1],eax - mov [dsm6+1],eax - mov [dsm7+1],eax - - mov [dmsm1+2],eax - mov [dmsm5+1],eax - mov [dmsm6+1],eax - mov [dmsm7+1],eax - pop ecx - ror eax,cl - mov [dsm2+2],eax - mov [dsm3+2],eax - mov [dsm4+2],eax - - mov [dmsm2+2],eax - mov [dmsm3+2],eax - mov [dmsm4+2],eax - and eax,0xffff - not eax - mov [dsm8+2],eax - mov [dsm9+2],eax - - mov [dmsm8+2],eax - mov [dmsm9+2],eax - - neg dl - mov [dsy3+2],dl - mov [dsy4+2],dl - - mov [dmsy3+2],dl - mov [dmsy4+2],dl - - selfmod dsy1, dmsm7+6 - -aret: ret - -%ifdef M_TARGET_MACHO - SECTION .text align=64 -%else - SECTION .rtext progbits alloc exec write align=64 -%endif - -%ifdef M_TARGET_MACHO -GLOBAL _rtext_tmap_start -_rtext_tmap_start: -%endif - -rtext_start: - -GLOBAL @R_DrawSpanP_ASM@0 -GLOBAL _R_DrawSpanP_ASM -GLOBAL R_DrawSpanP_ASM - -; eax: scratch -; ebx: zero -; ecx: yfrac at top end, xfrac int part at low end -; edx: xfrac frac part at top end -; edi: dest -; ebp: scratch -; esi: count -; [esp]: xstep -; [esp+4]: ystep - - align 16 - -@R_DrawSpanP_ASM@0: -_R_DrawSpanP_ASM: -R_DrawSpanP_ASM: - mov eax,[ds_x2] - mov ecx,[ds_x1] - sub eax,ecx - jl near rdspret ; count < 0: nothing to do, so leave - - push ebx - push edi - push ebp - push esi - sub esp, 8 - - mov edi,ecx - add edi,[dc_destorg] - mov ecx,[ds_y] - add edi,[ylookup+ecx*4] - mov edx,[ds_xstep] -dsy1: shl edx,6 - mov ebp,[ds_xstep] -dsy3: shr ebp,26 - xor ebx,ebx - lea esi,[eax+1] - mov [esp],edx - mov edx,[ds_ystep] - mov ecx,[ds_xfrac] -dsy4: shr ecx,26 -dsm8: and edx,strict dword 0xffffffc0 - or ebp,edx - mov [esp+4],ebp - mov ebp,[ds_yfrac] - mov edx,[ds_xfrac] -dsy2: shl edx,6 -dsm9: and ebp,strict dword 0xffffffc0 - or ecx,ebp - shr esi,1 - jnc dseven1 - -; do odd pixel - - mov ebp,ecx -dsx1: rol ebp,6 -dsm1: and ebp,0xfff - add edx,[esp] - adc ecx,[esp+4] -spreada mov bl,[ebp+SPACEFILLER4] -spmapa mov bl,[ebx+SPACEFILLER4] - mov [edi],bl - inc edi - -dseven1 shr esi,1 - jnc dsrest - -; do two more pixels - mov ebp,ecx - add edx,[esp] - adc ecx,[esp+4] -dsm2: and ebp,0xfc00003f -dsx2: rol ebp,6 - mov eax,ecx - add edx,[esp] - adc ecx,[esp+4] -spreadb mov bl,[ebp+SPACEFILLER4] ;read texel1 -dsx3: rol eax,6 -dsm6: and eax,0xfff -spmapb mov bl,[ebx+SPACEFILLER4] ;map texel1 - mov [edi],bl ;store texel1 - add edi,2 -spreadc mov bl,[eax+SPACEFILLER4] ;read texel2 -spmapc mov bl,[ebx+SPACEFILLER4] ;map texel2 - mov [edi-1],bl ;store texel2 - -; do the rest - -dsrest test esi,esi - jz near dsdone - - align 16 - -dsloop mov ebp,ecx -spstep1d add edx,[esp] -spstep2d adc ecx,[esp+4] -dsm3: and ebp,0xfc00003f -dsx4: rol ebp,6 - mov eax,ecx -spstep1e add edx,[esp] -spstep2e adc ecx,[esp+4] -spreadd mov bl,[ebp+SPACEFILLER4] ;read texel1 -dsx5: rol eax,6 -dsm5: and eax,0xfff -spmapd mov bl,[ebx+SPACEFILLER4] ;map texel1 - mov [edi],bl ;store texel1 - mov ebp,ecx -spreade mov bl,[eax+SPACEFILLER4] ;read texel2 -spstep1f add edx,[esp] -spstep2f adc ecx,[esp+4] -dsm4: and ebp,0xfc00003f -dsx6: rol ebp,6 -spmape mov bl,[ebx+SPACEFILLER4] ;map texel2 - mov eax,ecx - mov [edi+1],bl ;store texel2 -spreadf mov bl,[ebp+SPACEFILLER4] ;read texel3 -spmapf mov bl,[ebx+SPACEFILLER4] ;map texel3 - add edi,4 -dsx7: rol eax,6 -dsm7: and eax,0xfff - mov [edi-2],bl ;store texel3 -spreadg mov bl,[eax+SPACEFILLER4] ;read texel4 -spstep1g add edx,[esp] -spstep2g adc ecx,[esp+4] -spmapg mov bl,[ebx+SPACEFILLER4] ;map texel4 - dec esi - mov [edi-1],bl ;store texel4 - jnz near dsloop - -dsdone add esp,8 - pop esi - pop ebp - pop edi - pop ebx - -rdspret ret - -; This is the same as the previous routine, except it doesn't draw pixels -; where the texture's color value is 0. - -GLOBAL @R_DrawSpanMaskedP_ASM@0 -GLOBAL _R_DrawSpanMaskedP_ASM -GLOBAL R_DrawSpanMaskedP_ASM - -; eax: scratch -; ebx: zero -; ecx: yfrac at top end, xfrac int part at low end -; edx: xfrac frac part at top end -; edi: dest -; ebp: scratch -; esi: count -; [esp]: xstep -; [esp+4]: ystep - - align 16 - -@R_DrawSpanMaskedP_ASM@0: -_R_DrawSpanMaskedP_ASM: -R_DrawSpanMaskedP_ASM: - mov eax,[ds_x2] - mov ecx,[ds_x1] - sub eax,ecx - jl rdspret ; count < 0: nothing to do, so leave - - push ebx - push edi - push ebp - push esi - sub esp,8 - - mov edi,ecx - add edi,[dc_destorg] - mov ecx,[ds_y] - add edi,[ylookup+ecx*4] - mov edx,[ds_xstep] -dmsy1: shl edx,6 - mov ebp,[ds_xstep] -dmsy3: shr ebp,26 - xor ebx,ebx - lea esi,[eax+1] - mov [esp],edx - mov edx,[ds_ystep] - mov ecx,[ds_xfrac] -dmsy4: shr ecx,26 -dmsm8: and edx,strict dword 0xffffffc0 - or ebp,edx - mov [esp+4],ebp - mov ebp,[ds_yfrac] - mov edx,[ds_xfrac] -dmsy2: shl edx,6 -dmsm9: and ebp,strict dword 0xffffffc0 - or ecx,ebp - shr esi,1 - jnc dmseven1 - -; do odd pixel - - mov ebp,ecx -dmsx1: rol ebp,6 -dmsm1: and ebp,0xfff - add edx,[esp] - adc ecx,[esp+4] -mspreada mov bl,[ebp+SPACEFILLER4] - cmp bl,0 - je mspskipa -mspmapa mov bl,[ebx+SPACEFILLER4] - mov [edi],bl -mspskipa: inc edi - -dmseven1 shr esi,1 - jnc dmsrest - -; do two more pixels - mov ebp,ecx - add edx,[esp] - adc ecx,[esp+4] -dmsm2: and ebp,0xfc00003f -dmsx2: rol ebp,6 - mov eax,ecx - add edx,[esp] - adc ecx,[esp+4] -mspreadb mov bl,[ebp+SPACEFILLER4] ;read texel1 -dmsx3: rol eax,6 -dmsm6: and eax,0xfff - cmp bl,0 - je mspskipb -mspmapb mov bl,[ebx+SPACEFILLER4] ;map texel1 - mov [edi],bl ;store texel1 -mspskipb add edi,2 -mspreadc mov bl,[eax+SPACEFILLER4] ;read texel2 - cmp bl,0 - je dmsrest -mspmapc mov bl,[ebx+SPACEFILLER4] ;map texel2 - mov [edi-1],bl ;store texel2 - -; do the rest - -dmsrest test esi,esi - jz near dmsdone - - align 16 - -dmsloop mov ebp,ecx -mspstep1d add edx,[esp] -mspstep2d adc ecx,[esp+4] -dmsm3: and ebp,0xfc00003f -dmsx4: rol ebp,6 - mov eax,ecx -mspstep1e add edx,[esp] -mspstep2e adc ecx,[esp+4] -mspreadd mov bl,[ebp+SPACEFILLER4] ;read texel1 -dmsx5: rol eax,6 -dmsm5: and eax,0xfff - cmp bl,0 - mov ebp,ecx - je mspreade -mspmapd mov bl,[ebx+SPACEFILLER4] ;map texel1 - mov [edi],bl ;store texel1 -mspreade mov bl,[eax+SPACEFILLER4] ;read texel2 -mspstep1f add edx,[esp] -mspstep2f adc ecx,[esp+4] -dmsm4: and ebp,0xfc00003f -dmsx6: rol ebp,6 - cmp bl,0 - mov eax,ecx - je mspreadf -mspmape mov bl,[ebx+SPACEFILLER4] ;map texel2 - mov [edi+1],bl ;store texel2 -mspreadf mov bl,[ebp+SPACEFILLER4] ;read texel3 - add edi,4 -dmsx7: rol eax,6 -dmsm7: and eax,0xfff - cmp bl,0 - je mspreadg -mspmapf mov bl,[ebx+SPACEFILLER4] ;map texel3 - mov [edi-2],bl ;store texel3 -mspreadg mov bl,[eax+SPACEFILLER4] ;read texel4 -mspstep1g add edx,[esp] -mspstep2g adc ecx,[esp+4] - cmp bl,0 - je mspskipg -mspmapg mov bl,[ebx+SPACEFILLER4] ;map texel4 - mov [edi-1],bl ;store texel4 -mspskipg dec esi - jnz near dmsloop - -dmsdone add esp,8 - pop esi - pop ebp - pop edi - pop ebx - - ret - - - - -GLOBAL rt_shaded4cols_asm -GLOBAL _rt_shaded4cols_asm - -rt_shaded4cols_asm: -_rt_shaded4cols_asm: - mov ecx,[esp+8] - push ebp - mov ebp,[esp+16] - sub ebp,ecx - js near s4nil - mov eax,[ylookup+ecx*4] - add eax,[dc_destorg] ; eax = destination - push ebx - push esi - mov esi,[dc_temp] - inc ebp ; ebp = count - add eax,[esp+16] - push edi - lea esi,[esi+ecx*4] ; esi = source - - align 16 - -s4loop: movzx edx,byte [esi] - movzx ecx,byte [esi+1] -s4cm1: movzx edx,byte [SPACEFILLER4+edx] ; colormap -s4cm2: movzx edi,byte [SPACEFILLER4+ecx] ; colormap - shl edx,8 - movzx ebx,byte [eax] - shl edi,8 - movzx ecx,byte [eax+1] - sub ebx,edx - sub ecx,edi - mov ebx,[Col2RGB8+0x10000+ebx*4] - mov ecx,[Col2RGB8+0x10000+ecx*4] -s4fg1: add ebx,[SPACEFILLER4+edx*4] -s4fg2: add ecx,[SPACEFILLER4+edi*4] - or ebx,0x1f07c1f - or ecx,0x1f07c1f - mov edx,ebx - shr ebx,15 - mov edi,ecx - shr ecx,15 - and edx,ebx - and ecx,edi - mov bl,[RGB32k+edx] - movzx edx,byte [esi+2] - mov bh,[RGB32k+ecx] - movzx ecx,byte [esi+3] - mov [eax],bl - mov [eax+1],bh - -s4cm3: movzx edx,byte [SPACEFILLER4+edx] ; colormap -s4cm4: movzx edi,byte [SPACEFILLER4+ecx] ; colormap - shl edx,8 - movzx ebx,byte [eax+2] - shl edi,8 - movzx ecx,byte [eax+3] - sub ebx,edx - sub ecx,edi - mov ebx,[Col2RGB8+0x10000+ebx*4] - mov ecx,[Col2RGB8+0x10000+ecx*4] -s4fg3: add ebx,[SPACEFILLER4+edx*4] -s4fg4: add ecx,[SPACEFILLER4+edi*4] - or ebx,0x1f07c1f - or ecx,0x1f07c1f - mov edx,ebx - shr ebx,15 - mov edi,ecx - shr ecx,15 - and edx,ebx - and ecx,edi -s4p: add eax,320 ; pitch - add esi,4 - mov bl,[RGB32k+edx] - mov bh,[RGB32k+ecx] -s4p2: mov [eax-320+2],bl -s4p3: mov [eax-320+3],bh - dec ebp - jne s4loop - - pop edi - pop esi - pop ebx -s4nil: pop ebp - ret - - align 16 - -GLOBAL rt_add4cols_asm -GLOBAL _rt_add4cols_asm - -rt_add4cols_asm: -_rt_add4cols_asm: - mov ecx,[esp+8] - push edi - mov edi,[esp+16] - sub edi,ecx - js near a4nil - mov eax,[ylookup+ecx*4] - add eax,[dc_destorg] - push ebx - push esi - mov esi,[dc_temp] - push ebp - inc edi - add eax,[esp+20] - lea esi,[esi+ecx*4] - - align 16 -a4loop: - movzx ebx,byte [esi] - movzx edx,byte [esi+1] - movzx ecx,byte [eax] - movzx ebp,byte [eax+1] -a4cm1: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap -a4cm2: movzx edx,byte [SPACEFILLER4+edx] ; colormap -a4bg1: mov ecx,[SPACEFILLER4+ecx*4] ; bg2rgb -a4bg2: mov ebp,[SPACEFILLER4+ebp*4] ; bg2rgb -a4fg1: add ecx,[SPACEFILLER4+ebx*4] ; fg2rgb -a4fg2: add ebp,[SPACEFILLER4+edx*4] ; fg2rgb - or ecx,0x01f07c1f - or ebp,0x01f07c1f - mov ebx,ecx - shr ecx,15 - mov edx,ebp - shr ebp,15 - and ecx,ebx - and ebp,edx - movzx ebx,byte [esi+2] - movzx edx,byte [esi+3] - mov cl,[RGB32k+ecx] - mov ch,[RGB32k+ebp] - mov [eax],cl - mov [eax+1],ch - - movzx ecx,byte [eax+2] - movzx ebp,byte [eax+3] -a4cm3: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap -a4cm4: movzx edx,byte [SPACEFILLER4+edx] ; colormap -a4bg3: mov ecx,[SPACEFILLER4+ecx*4] ; bg2rgb -a4bg4: mov ebp,[SPACEFILLER4+ebp*4] ; bg2rgb -a4fg3: add ecx,[SPACEFILLER4+ebx*4] ; fg2rgb -a4fg4: add ebp,[SPACEFILLER4+edx*4] ; fg2rgb - or ecx,0x01f07c1f - or ebp,0x01f07c1f - mov ebx,ecx - shr ecx,15 - mov edx,ebp - shr ebp,15 - and ebx,ecx - and edx,ebp - mov cl,[RGB32k+ebx] - mov ch,[RGB32k+edx] - mov [eax+2],cl - mov [eax+3],ch - - add esi,4 -a4p: add eax,320 ; pitch - sub edi,1 - jne a4loop - pop ebp - pop esi - pop ebx -a4nil: pop edi - ret - - align 16 - -GLOBAL rt_addclamp4cols_asm -GLOBAL _rt_addclamp4cols_asm - -rt_addclamp4cols_asm: -_rt_addclamp4cols_asm: - mov ecx,[esp+8] - push edi - mov edi,[esp+16] - sub edi,ecx - js near ac4nil - mov eax,[ylookup+ecx*4] - add eax,[dc_destorg] - push ebx - push esi - mov esi,[dc_temp] - push ebp - inc edi - add eax,[esp+20] - lea esi,[esi+ecx*4] - push edi - - align 16 -ac4loop: - movzx ebx,byte [esi] - movzx edx,byte [esi+1] - mov [esp],edi -ac4cm1: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap -ac4cm2: movzx edx,byte [SPACEFILLER4+edx] ; colormap - movzx ecx,byte [eax] - movzx ebp,byte [eax+1] -ac4fg1: mov ebx,[SPACEFILLER4+ebx*4] ; fg2rgb -ac4fg2: mov edx,[SPACEFILLER4+edx*4] ; fg2rgb -ac4bg1: add ebx,[SPACEFILLER4+ecx*4] ; bg2rgb -ac4bg2: add edx,[SPACEFILLER4+ebp*4] ; bg2rgb - mov ecx,ebx - or ebx,0x01f07c1f - and ecx,0x40100400 - and ebx,0x3fffffff - mov edi,ecx - shr ecx,5 - mov ebp,edx - sub edi,ecx - or edx,0x01f07c1f - or ebx,edi - mov ecx,ebx - shr ebx,15 - and ebp,0x40100400 - and ebx,ecx - and edx,0x3fffffff - mov edi,ebp - shr ebp,5 - mov cl,[RGB32k+ebx] - sub edi,ebp - mov [eax],cl - or edx,edi - mov ebp,edx - shr edx,15 - movzx ebx,byte [esi+2] - and ebp,edx - movzx edx,byte [esi+3] -ac4cm3: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap - mov cl,[RGB32k+ebp] -ac4cm4: movzx edx,byte [SPACEFILLER4+edx] ; colormap - mov [eax+1],cl - movzx ecx,byte [eax+2] - movzx ebp,byte [eax+3] -ac4fg3: mov ebx,[SPACEFILLER4+ebx*4] ; fg2rgb -ac4fg4: mov edx,[SPACEFILLER4+edx*4] ; fg2rgb -ac4bg3: add ebx,[SPACEFILLER4+ecx*4] ; bg2rgb -ac4bg4: add edx,[SPACEFILLER4+ebp*4] ; bg2rgb - mov ecx,ebx - or ebx,0x01f07c1f - and ecx,0x40100400 - and ebx,0x3fffffff - mov edi,ecx - shr ecx,5 - mov ebp,edx - sub edi,ecx - or edx,0x01f07c1f - or ebx,edi - mov ecx,ebx - shr ebx,15 - and ebp,0x40100400 - and ebx,ecx - and edx,0x3fffffff - mov edi,ebp - shr ebp,5 - mov cl,[RGB32k+ebx] - sub edi,ebp - mov [eax+2],cl - or edx,edi - mov edi,[esp] - mov ebp,edx - shr edx,15 - add esi,4 - and edx,ebp - mov cl,[RGB32k+edx] - mov [eax+3],cl - -ac4p: add eax,320 ; pitch - sub edi,1 - jne ac4loop - pop edi - - pop ebp - pop esi - pop ebx -ac4nil: pop edi - ret - -rtext_end: -%ifdef M_TARGET_MACHO -GLOBAL _rtext_tmap_end -_rtext_tmap_end: -%endif - align 16 - -;************************ - - SECTION .text - -GLOBAL R_SetupShadedCol -GLOBAL @R_SetupShadedCol@0 - -# Patch the values of dc_colormap and dc_color into the shaded column drawer. - -R_SetupShadedCol: -@R_SetupShadedCol@0: - mov eax,[dc_colormap] - cmp [s4cm1+3],eax - je .cmdone - mov [s4cm1+3],eax - mov [s4cm2+3],eax - mov [s4cm3+3],eax - mov [s4cm4+3],eax -.cmdone mov eax,[dc_color] - lea eax,[Col2RGB8+eax*4] - cmp [s4fg1+3],eax - je .cdone - mov [s4fg1+3],eax - mov [s4fg2+3],eax - mov [s4fg3+3],eax - mov [s4fg4+3],eax - selfmod s4cm1, s4fg4+7 -.cdone ret - -GLOBAL R_SetupAddCol -GLOBAL @R_SetupAddCol@0 - -# Patch the values of dc_colormap, dc_srcblend, and dc_destblend into the -# unclamped adding column drawer. - -R_SetupAddCol: -@R_SetupAddCol@0: - mov eax,[dc_colormap] - cmp [a4cm1+3],eax - je .cmdone - mov [a4cm1+3],eax - mov [a4cm2+3],eax - mov [a4cm3+3],eax - mov [a4cm4+3],eax -.cmdone mov eax,[dc_srcblend] - cmp [a4fg1+3],eax - je .sbdone - mov [a4fg1+3],eax - mov [a4fg2+3],eax - mov [a4fg3+3],eax - mov [a4fg4+3],eax -.sbdone mov eax,[dc_destblend] - cmp [a4bg1+3],eax - je .dbdone - mov [a4bg1+3],eax - mov [a4bg2+3],eax - mov [a4bg3+3],eax - mov [a4bg4+3],eax - selfmod a4cm1, a4bg4+7 -.dbdone ret - -GLOBAL R_SetupAddClampCol -GLOBAL @R_SetupAddClampCol@0 - -# Patch the values of dc_colormap, dc_srcblend, and dc_destblend into the -# add with clamping column drawer. - -R_SetupAddClampCol: -@R_SetupAddClampCol@0: - mov eax,[dc_colormap] - cmp [ac4cm1+3],eax - je .cmdone - mov [ac4cm1+3],eax - mov [ac4cm2+3],eax - mov [ac4cm3+3],eax - mov [ac4cm4+3],eax -.cmdone mov eax,[dc_srcblend] - cmp [ac4fg1+3],eax - je .sbdone - mov [ac4fg1+3],eax - mov [ac4fg2+3],eax - mov [ac4fg3+3],eax - mov [ac4fg4+3],eax -.sbdone mov eax,[dc_destblend] - cmp [ac4bg1+3],eax - je .dbdone - mov [ac4bg1+3],eax - mov [ac4bg2+3],eax - mov [ac4bg3+3],eax - mov [ac4bg4+3],eax - selfmod ac4cm1, ac4bg4+7 -.dbdone ret - -EXTERN setvlinebpl_ -EXTERN setpitch3 - -GLOBAL @ASM_PatchPitch@0 -GLOBAL _ASM_PatchPitch -GLOBAL ASM_PatchPitch - -ASM_PatchPitch: -_ASM_PatchPitch: -@ASM_PatchPitch@0: - mov eax,[dc_pitch] - mov [s4p+1],eax - mov [a4p+1],eax - mov [ac4p+1],eax - mov ecx,eax - neg ecx - inc ecx - inc ecx - mov [s4p2+2],ecx - inc ecx - mov [s4p3+2],ecx - selfmod rtext_start, rtext_end - call setpitch3 - jmp setvlinebpl_ diff --git a/src/asm_ia32/tmap2.asm b/src/asm_ia32/tmap2.asm deleted file mode 100644 index ab1695d3cd..0000000000 --- a/src/asm_ia32/tmap2.asm +++ /dev/null @@ -1,643 +0,0 @@ -;* -;* tmap2.nas -;* The tilted plane inner loop. -;* -;*--------------------------------------------------------------------------- -;* Copyright 1998-2006 Randy Heit -;* All rights reserved. -;* -;* Redistribution and use in source and binary forms, with or without -;* modification, are permitted provided that the following conditions -;* are met: -;* -;* 1. Redistributions of source code must retain the above copyright -;* notice, this list of conditions and the following disclaimer. -;* 2. Redistributions in binary form must reproduce the above copyright -;* notice, this list of conditions and the following disclaimer in the -;* documentation and/or other materials provided with the distribution. -;* 3. The name of the author may not be used to endorse or promote products -;* derived from this software without specific prior written permission. -;* -;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -;*--------------------------------------------------------------------------- -;* -;* I tried doing the ROL trick that R_DrawSpanP_ASM uses, and it was -;* actually slightly slower than the more straight-forward approach -;* used here, probably because the trick requires too much setup time. -;* - -BITS 32 - -%include "valgrind.inc" - -%define SPACEFILLER4 (0x44444444) - -%ifndef M_TARGET_LINUX - -%define plane_sz _plane_sz -%define plane_su _plane_su -%define plane_sv _plane_sv -%define plane_shade _plane_shade -%define planelightfloat _planelightfloat -%define spanend _spanend -%define ylookup _ylookup -%define dc_destorg _dc_destorg -%define ds_colormap _ds_colormap -%define ds_source _ds_source -%define centery _centery -%define centerx _centerx -%define ds_curtiltedsource _ds_curtiltedsource -%define pviewx _pviewx -%define pviewy _pviewy -%define tiltlighting _tiltlighting - -%define R_DrawTiltedPlane_ASM _R_DrawTiltedPlane_ASM -%define R_SetTiltedSpanSource_ASM _R_SetTiltedSpanSource_ASM -%define R_CalcTiltedLighting _R_CalcTiltedLighting - -%endif - -EXTERN plane_sz -EXTERN plane_su -EXTERN plane_sv -EXTERN planelightfloat -EXTERN spanend -EXTERN ylookup -EXTERN dc_destorg -EXTERN ds_colormap -EXTERN centery -EXTERN centerx -EXTERN ds_source -EXTERN plane_shade -EXTERN pviewx -EXTERN pviewy -EXTERN tiltlighting -EXTERN R_CalcTiltedLighting - -GLOBAL ds_curtiltedsource - -%define sv_i plane_sv -%define sv_j plane_sv+4 -%define sv_k plane_sv+8 - -%define su_i plane_su -%define su_j plane_su+4 -%define su_k plane_su+8 - -%define sz_i plane_sz -%define sz_j plane_sz+4 -%define sz_k plane_sz+8 - -%define SPANBITS 3 - - section .bss - -start_u: resq 1 -start_v: resq 1 -step_u: resq 1 -step_v: resq 1 - -step_iz: resq 1 -step_uz: resq 1 -step_vz: resq 1 - -end_z: resd 1 - - section .data - -ds_curtiltedsource: dd SPACEFILLER4 - -fp_1: -spanrecips: dd 0x3f800000 ; 1/1 - dd 0x3f000000 ; 1/2 - dd 0x3eaaaaab ; 1/3 - dd 0x3e800000 ; 1/4 - dd 0x3e4ccccd ; 1/5 - dd 0x3e2aaaab ; 1/6 - dd 0x3e124925 ; 1/7 -fp_8recip: dd 0x3e000000 ; 1/8 - dd 0x3de38e39 ; 1/9 - dd 0x3dcccccd ; 1/10 - dd 0x3dba2e8c ; 1/11 - dd 0x3daaaaab ; 1/12 - dd 0x3d9d89d9 ; 1/13 - dd 0x3d924925 ; 1/14 - dd 0x3d888889 ; 1/15 - -fp_quickint: dd 0x3f800000 ; 1 - dd 0x40000000 ; 2 - dd 0x40400000 ; 3 - dd 0x40800000 ; 4 - dd 0x40a00000 ; 5 - dd 0x40c00000 ; 6 - dd 0x40e00000 ; 7 -fp_8: dd 0x41000000 ; 8 - - section .text - -GLOBAL R_SetTiltedSpanSource_ASM -GLOBAL @R_SetTiltedSpanSource_ASM@4 - -R_SetTiltedSpanSource_ASM: - mov ecx,[esp+4] - -@R_SetTiltedSpanSource_ASM@4: - mov [fetch1+3],ecx - mov [fetch2+3],ecx - mov [fetch3+3],ecx - mov [fetch4+3],ecx - mov [fetch5+3],ecx - mov [fetch6+3],ecx - mov [fetch7+3],ecx - mov [fetch8+3],ecx - mov [fetch9+3],ecx - mov [fetch10+3],ecx - mov [ds_curtiltedsource],ecx - selfmod rtext_start, rtext_end - ret - -GLOBAL SetTiltedSpanSize - -SetTiltedSpanSize: - push ecx - mov cl,dl - neg cl - mov eax,1 - shl eax,cl - mov cl,[esp] - neg cl - mov [x1+2],cl - mov [x2+2],cl - mov [x3+2],cl - mov [x4+2],cl - mov [x5+2],cl - mov [x6+2],cl - mov [x7+2],cl - mov [x8+2],cl - mov [x9+2],cl - mov [x10+2],cl - - sub cl,dl - dec eax - mov [y1+2],cl - mov [y2+2],cl - mov [y3+2],cl - mov [y4+2],cl - mov [y5+2],cl - mov [y6+2],cl - mov [y7+2],cl - mov [y8+2],cl - mov [y9+2],cl - mov [y10+2],cl - cmp eax,0 ; if x bits is 0, mask must be 0 too. - jz .notted - not eax -.notted: - pop ecx - - mov [m1+2],eax - mov [m2+2],eax - mov [m3+2],eax - mov [m4+2],eax - mov [m5+2],eax - mov [m6+2],eax - mov [m7+2],eax - mov [m8+2],eax - mov [m9+2],eax - mov [m10+2],eax - - selfmod rtext_start, rtext_end - - ret - -%ifndef M_TARGET_MACHO - SECTION .rtext progbits alloc exec write align=64 -%else - SECTION .text align=64 -GLOBAL _rtext_tmap2_start -_rtext_tmap2_start: -%endif - -rtext_start: - -GLOBAL R_DrawTiltedPlane_ASM -GLOBAL @R_DrawTiltedPlane_ASM@8 - -R_DrawTiltedPlane_ASM: - mov ecx,[esp+4] - mov edx,[esp+8] - - ; ecx = y - ; edx = x - -@R_DrawTiltedPlane_ASM@8: - push ebx - push esi - push edi - push ebp - - mov eax,[centery] - movzx ebx,word [spanend+ecx*2] - sub eax,ecx ; eax = centery-y - sub ebx,edx ; ebx = span length - 1 - mov edi,[ylookup+ecx*4] - push eax - add edi,[dc_destorg] - add edi,edx ; edi = frame buffer pointer - sub edx,[centerx] ; edx = x-centerx - push edx - xor eax,eax - - fild dword [esp+4] ; ymul - fild dword [esp] ; xmul | ymul - fld dword [sv_j] ; sv.j | xmul | ymul - fmul st0,st2 ; sv.j*ymul | xmul | ymul - fld dword [su_j] ; su.j | sv.j*ymul | xmul | ymul - fmul st0,st3 ; su.j*ymul | sv.j*ymul | xmul | ymul - fld dword [sz_j] ; sz.j | su.j*ymul | sv.j*ymul | xmul | ymul - fmulp st4,st0 ; su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fld dword [sv_i] ; sv.i | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fmul st0,st3 ; sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fld dword [su_i] ; su.i | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fmul st0,st4 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fld dword [sz_i] ; sz.i | su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fmulp st5,st0 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul - fxch st1 ; sv.i*xmul | su.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul - faddp st3,st0 ; su.i*xmul | su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul - faddp st1,st0 ; su.i*xmul+su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul - fxch st3 ; sz.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | su.i*xmul+su.j*ymul - faddp st2,st0 ; sv.i*xmul+sv.j*ymul | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul - fadd dword [sv_k] ; v/z | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul - fxch st1 ; sz.i*xmul+sz.j*ymul | v/z | su.i*xmul+su.j*ymul - fadd dword [sz_k] ; 1/z | v/z | su.i*xmul+su.j*ymul - fxch st2 ; su.i*xmul+su.j*ymul | v/z | 1/z - fadd dword [su_k] ; u/z | v/z | 1/z - fxch st2 ; 1/z | v/z | u/z - fxch st1 ; v/z | 1/z | u/z - -; if lighting is on, fill out the light table - mov al,[plane_shade] - test al,al - jz .litup - - push ebx - fild dword [esp] ; width | v/z | 1/z | u/z - fmul dword [sz_i] ; width*sz.i | v/z | 1/z | u/z - fadd st0,st2 ; 1/endz | v/z | 1/z | u/z - fld st2 ; 1/z | 1/endz | v/z | 1/z | u/z - fmul dword [planelightfloat] - fxch st1 - fmul dword [planelightfloat] - sub esp,16 - fstp qword [esp] - fstp qword [esp+8] - call R_CalcTiltedLighting - add esp, 20 - xor eax, eax - -.litup add esp, 8 - -; calculate initial z, u, and v values - fld st1 ; 1/z | v/z | 1/z | u/z - fdivr dword [fp_1] ; z | v/z | 1/z | u/z - - fld st3 ; u/z | z | v/z | 1/z | u/z - fmul st0,st1 ; u | z | v/z | 1/z | u/z - fld st2 ; v/z | u | z | v/z | 1/z | u/z - fmulp st2,st0 ; u | v | v/z | 1/z | u/z - fld st0 - fistp qword [start_u] - fld st1 - fistp qword [start_v] - - cmp ebx,7 ; Do we have at least 8 pixels to plot? - jl near ShortStrip - -; yes, we do, so figure out tex coords at end of this span - -; multiply i values by span length (8) - fld dword [su_i] ; su.i - fmul dword [fp_8] ; su.i*8 - fld dword [sv_i] ; sv.i | su.i*8 - fmul dword [fp_8] ; sv.i*8 | su.i*8 - fld dword [sz_i] ; sz.i | sv.i*8 | su.i*8 - fmul dword [fp_8] ; sz.i*8 | sv.i*8 | su.i*8 - fxch st2 ; su.i*8 | sv.i*8 | sz.i*8 - fstp qword [step_uz] ; sv.i*8 | sz.i*8 - fstp qword [step_vz] ; sz.i*8 - fst qword [step_iz] ; sz.i*8 - -; find tex coords at start of next span - faddp st4 - fld qword [step_vz] - faddp st3 - fld qword [step_uz] - faddp st5 - - fld st3 ; 1/z | u | v | v/z | 1/z | u/z - fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z - fst dword [end_z] - fld st5 ; u/z | z | u | v | v/z | 1/z | u/z - fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z - fxch st1 ; z | u' | u | v | v/z | 1/z | u/z - fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z - fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z - -; now subtract to get stepping values for this span - fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z - fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z - fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z - fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z - fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z - fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z - fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z - fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z - fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z - fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z - fistp qword [step_u] ; u | v | v/z | 1/z | u/z - -FullSpan: - xor eax,eax - cmp ebx,15 ; is there another complete span after this one? - jl NextIsShort - -; there is a complete span after this one - fld qword [step_iz] - faddp st4,st0 - fld qword [step_vz] - faddp st3,st0 - fld qword [step_uz] - faddp st5,st0 - jmp StartDiv - -NextIsShort: - cmp ebx,8 ; if next span is no more than 1 pixel, then we already - jle DrawFullSpan ; know everything we need to draw it - - fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint-8*4+ebx*4] - fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint-8*4+ebx*4] - fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint-8*4+ebx*4] - fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z - faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z - faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z - faddp st5,st0 ; u | v | v/z | 1/z | u/z - -StartDiv: - fld st3 ; 1/z | u | v | v/z | 1/z | u/z - fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z - -DrawFullSpan: - mov ecx,[start_v] - mov edx,[start_u] - - add ecx,[pviewy] - add edx,[pviewx] - - mov esi,edx - mov ebp,ecx -x1 shr ebp,26 -m1 and esi,0xfc000000 -y1 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch1 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+0],al - -x2 shr ebp,26 -m2 and esi,0xfc000000 -y2 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch2 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-4] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+1],al - -x3 shr ebp,26 -m3 and esi,0xfc000000 -y3 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch3 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-8] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+2],al - -x4 shr ebp,26 -m4 and esi,0xfc000000 -y4 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch4 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-12] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+3],al - -x5 shr ebp,26 -m5 and esi,0xfc000000 -y5 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch5 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-16] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+4],al - -x6 shr ebp,26 -m6 and esi,0xfc000000 -y6 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch6 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-20] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+5],al - -x7 shr ebp,26 -m7 and esi,0xfc000000 -y7 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch7 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-24] -x8 shr ecx,26 - mov al,[ebp+eax] -m8 and edx,0xfc000000 - mov [edi+6],al - -y8 shr edx,20 - mov ebp,[tiltlighting+ebx*4-28] -fetch8 mov al,[edx+ecx+SPACEFILLER4] - mov al,[ebp+eax] - mov [edi+7],al - add edi,8 - - sub ebx,8 - jl near Done - - fld st1 - fistp qword [start_u] - fld st2 - fistp qword [start_v] - - cmp ebx,7 - jl near EndIsShort - - fst dword [end_z] - fld st5 ; u/z | z | u | v | v/z | 1/z | u/z - fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z - fxch st1 ; z | u' | u | v | v/z | 1/z | u/z - fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z - fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z - fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z - fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z - fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z - fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z - fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z - fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z - fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z - fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z - fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z - fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z - fistp qword [step_u] ; u | v | v/z | 1/z | u/z - jmp FullSpan - -OnlyOnePixelAtEnd: - fld st0 - fistp qword [start_u] - fld st1 - fistp qword [start_v] - -OnlyOnePixel: - mov edx,[start_v] - mov ecx,[start_u] - add edx,[pviewy] - add ecx,[pviewx] -x9 shr edx,26 -m9 and ecx,0xfc000000 -y9 shr ecx,20 - mov ebp,[tiltlighting] -fetch9 mov al,[ecx+edx+SPACEFILLER4] - mov al,[ebp+eax] - mov [edi],al - -Done: - fcompp - fcompp - fstp st0 - - pop ebp - pop edi - pop esi - pop ebx - ret - -ShortStrip: - cmp ebx,0 - jle near OnlyOnePixel - -MoreThanOnePixel: - fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint+ebx*4] - fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint+ebx*4] - fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint+ebx*4] - fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z - faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z - faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z - faddp st5,st0 ; u | v | v/z | 1/z | u/z - fld st3 ; 1/z | u | v | v/z | 1/z | u/z - fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z - jmp CalcPartialSteps - -EndIsShort: - cmp ebx,0 - je near OnlyOnePixelAtEnd - -CalcPartialSteps: - fst dword [end_z] - fld st5 ; u/z | z | u | v | v/z | 1/z | u/z - fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z - fxch st1 ; z | u' | u | v | v/z | 1/z | u/z - fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z - fxch st1 ; u' | v' | u | v | v/z | 1/z | u/z - fsubrp st2,st0 ; v' | u'-u | v | v/z | 1/z | u/z - fsubrp st2,st0 ; u'-u | v'-v | v/z | 1/z | u/z - fmul dword [spanrecips+ebx*4] ;ustep | v'-v | v/z | 1/z | u/z - fxch st1 ; v'-v | ustep | v/z | 1/z | u/z - fmul dword [spanrecips+ebx*4] ;vstep | ustep | v/z | 1/z | u/z - fxch st1 ; ustep | vstep | v/z | 1/z | u/z - fistp qword [step_u] ; vstep | v/z | 1/z | u/z - fistp qword [step_v] ; v/z | 1/z | u/z - - mov ecx,[start_v] - mov edx,[start_u] - - add ecx,[pviewy] - add edx,[pviewx] - - mov esi,edx - mov ebp,ecx -endloop: -x10 shr ebp,26 -m10 and esi,0xfc000000 - -y10 shr esi,20 - inc edi - - add ecx,[step_v] - add edx,[step_u] - -fetch10 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4] - - mov esi,edx - dec ebx - - mov al,[ebp+eax] - mov ebp,ecx - - mov [edi-1],al - jge endloop - - fcompp - fstp st0 - - pop ebp - pop edi - pop esi - pop ebx - ret - -rtext_end: -%ifdef M_TARGET_MACHO -GLOBAL _rtext_tmap2_end -_rtext_tmap2_end: -%endif diff --git a/src/asm_ia32/tmap3.asm b/src/asm_ia32/tmap3.asm deleted file mode 100644 index bafc33627f..0000000000 --- a/src/asm_ia32/tmap3.asm +++ /dev/null @@ -1,344 +0,0 @@ -%include "valgrind.inc" - -%ifdef M_TARGET_WATCOM - SEGMENT DATA PUBLIC ALIGN=16 CLASS=DATA USE32 - SEGMENT DATA -%else - SECTION .data -%endif - -%ifndef M_TARGET_LINUX -%define ylookup _ylookup -%define vplce _vplce -%define vince _vince -%define palookupoffse _palookupoffse -%define bufplce _bufplce -%define dc_iscale _dc_iscale -%define dc_colormap _dc_colormap -%define dc_count _dc_count -%define dc_dest _dc_dest -%define dc_source _dc_source -%define dc_texturefrac _dc_texturefrac -%define dc_pitch _dc_pitch - -%define setupvlinetallasm _setupvlinetallasm -%define vlinetallasm4 _vlinetallasm4 -%define vlinetallasmathlon4 _vlinetallasmathlon4 -%define vlinetallasm1 _vlinetallasm1 -%define prevlinetallasm1 _prevlinetallasm1 -%endif - -EXTERN vplce -EXTERN vince -EXTERN palookupoffse -EXTERN bufplce - -EXTERN ylookup -EXTERN dc_iscale -EXTERN dc_colormap -EXTERN dc_count -EXTERN dc_dest -EXTERN dc_source -EXTERN dc_texturefrac -EXTERN dc_pitch - -GLOBAL vlt4pitch -GLOBAL vlt1pitch - -%ifdef M_TARGET_WATCOM - SEGMENT CODE PUBLIC ALIGN=16 CLASS=CODE USE32 - SEGMENT CODE -%else - SECTION .text -%endif - -ALIGN 16 -GLOBAL setpitch3 -setpitch3: - mov [vltpitch+2], eax - mov [vltpitcha+2],eax - mov [vlt1pitch1+2], eax - mov [vlt1pitch2+2], eax - selfmod vltpitch, vlt1pitch2+6 - ret - -ALIGN 16 -GLOBAL setupvlinetallasm -setupvlinetallasm: - mov ecx, [esp+4] - mov [shifter1+2], cl - mov [shifter2+2], cl - mov [shifter3+2], cl - mov [shifter4+2], cl - mov [shifter1a+2], cl - mov [shifter2a+2], cl - mov [shifter3a+2], cl - mov [shifter4a+2], cl - mov [preshift+2], cl - mov [shift11+2], cl - mov [shift12+2], cl - selfmod shifter1, shift12+6 - ret - -%ifdef M_TARGET_MACHO - SECTION .text align=64 -GLOBAL _rtext_tmap3_start -_rtext_tmap3_start: -%else - SECTION .rtext progbits alloc exec write align=64 -%endif - -ALIGN 16 - -GLOBAL vlinetallasm4 -vlinetallasm4: - push ebx - mov eax, [bufplce+0] - mov ebx, [bufplce+4] - mov ecx, [bufplce+8] - mov edx, [bufplce+12] - mov [source1+3], eax - mov [source2+3], ebx - mov [source3+3], ecx - mov [source4+3], edx - mov eax, [palookupoffse+0] - mov ebx, [palookupoffse+4] - mov ecx, [palookupoffse+8] - mov edx, [palookupoffse+12] - mov [lookup1+2], eax - mov [lookup2+2], ebx - mov [lookup3+2], ecx - mov [lookup4+2], edx - mov eax, [vince+0] - mov ebx, [vince+4] - mov ecx, [vince+8] - mov edx, [vince+12] - mov [step1+2], eax - mov [step2+2], ebx - mov [step3+2], ecx - mov [step4+1], edx - push ebp - push esi - push edi - mov ecx, [dc_count] - mov edi, [dc_dest] - mov eax, dword [ylookup+ecx*4-4] - add eax, edi - sub edi, eax - mov [write1+2],eax - inc eax - mov [write2+2],eax - inc eax - mov [write3+2],eax - inc eax - mov [write4+2],eax - mov ebx, [vplce] - mov ecx, [vplce+4] - mov esi, [vplce+8] - mov eax, [vplce+12] - selfmod loopit, vltpitch - jmp loopit - -ALIGN 16 -loopit: - mov edx, ebx -shifter1: shr edx, 24 -source1: movzx edx, BYTE [edx+0x88888888] -lookup1: mov dl, [edx+0x88888888] -write1: mov [edi+0x88888880], dl -step1: add ebx, 0x88888888 - mov edx, ecx -shifter2: shr edx, 24 -source2: movzx edx, BYTE [edx+0x88888888] -lookup2: mov dl, [edx+0x88888888] -write2: mov [edi+0x88888881], dl -step2: add ecx, 0x88888888 - mov edx, esi -shifter3: shr edx, 24 -source3: movzx edx, BYTE [edx+0x88888888] -lookup3: mov dl, BYTE [edx+0x88888888] -write3: mov [edi+0x88888882], dl -step3: add esi, 0x88888888 - mov edx, eax -shifter4: shr edx, 24 -source4: movzx edx, BYTE [edx+0x88888888] -lookup4: mov dl, [edx+0x88888888] -write4: mov [edi+0x88888883], dl -step4: add eax, 0x88888888 -vltpitch: add edi, 320 - jle near loopit - - mov [vplce], ebx - mov [vplce+4], ecx - mov [vplce+8], esi - mov [vplce+12], eax - - pop edi - pop esi - pop ebp - pop ebx - - ret - - ALIGN 16 - -GLOBAL vlinetallasmathlon4 -vlinetallasmathlon4: - push ebx - mov eax, [bufplce+0] - mov ebx, [bufplce+4] - mov ecx, [bufplce+8] - mov edx, [bufplce+12] - mov [source1a+3], eax - mov [source2a+3], ebx - mov [source3a+3], ecx - mov [source4a+3], edx - mov eax, [palookupoffse+0] - mov ebx, [palookupoffse+4] - mov ecx, [palookupoffse+8] - mov edx, [palookupoffse+12] - mov [lookup1a+2], eax - mov [lookup2a+2], ebx - mov [lookup3a+2], ecx - mov [lookup4a+2], edx - mov eax, [vince+0] - mov ebx, [vince+4] - mov ecx, [vince+8] - mov edx, [vince+12] - mov [step1a+2], eax - mov [step2a+2], ebx - mov [step3a+2], ecx - mov [step4a+1], edx - push ebp - push esi - push edi - mov ecx, [dc_count] - mov edi, [dc_dest] - mov eax, dword [ylookup+ecx*4-4] - add eax, edi - sub edi, eax - mov [write1a+2],eax - inc eax - mov [write2a+2],eax - inc eax - mov [write3a+2],eax - inc eax - mov [write4a+2],eax - mov ebp, [vplce] - mov ecx, [vplce+4] - mov esi, [vplce+8] - mov eax, [vplce+12] - selfmod loopita, vltpitcha - jmp loopita - -; Unfortunately, this code has not been carefully analyzed to determine -; how well it utilizes the processor's instruction units. Instead, I just -; kept rearranging code, seeing what sped it up and what slowed it down -; until I arrived at this. The is the fastest version I was able to -; manage, but that does not mean it cannot be made faster with careful -; instructing shuffling. - - ALIGN 64 - -loopita: mov edx, ebp - mov ebx, ecx -shifter1a: shr edx, 24 -shifter2a: shr ebx, 24 -source1a: movzx edx, BYTE [edx+0x88888888] -source2a: movzx ebx, BYTE [ebx+0x88888888] -step1a: add ebp, 0x88888888 -step2a: add ecx, 0x88888888 -lookup1a: mov dl, [edx+0x88888888] -lookup2a: mov dh, [ebx+0x88888888] - mov ebx, esi -write1a: mov [edi+0x88888880], dl -write2a: mov [edi+0x88888881], dh -shifter3a: shr ebx, 24 - mov edx, eax -source3a: movzx ebx, BYTE [ebx+0x88888888] -shifter4a: shr edx, 24 -step3a: add esi, 0x88888888 -source4a: movzx edx, BYTE [edx+0x88888888] -step4a: add eax, 0x88888888 -lookup3a: mov bl, [ebx+0x88888888] -lookup4a: mov dl, [edx+0x88888888] -write3a: mov [edi+0x88888882], bl -write4a: mov [edi+0x88888883], dl -vltpitcha: add edi, 320 - jle near loopita - - mov [vplce], ebp - mov [vplce+4], ecx - mov [vplce+8], esi - mov [vplce+12], eax - - pop edi - pop esi - pop ebp - pop ebx - - ret - -ALIGN 16 -GLOBAL prevlinetallasm1 -prevlinetallasm1: - mov ecx, [dc_count] - cmp ecx, 1 - ja vlinetallasm1 - - mov eax, [dc_iscale] - mov edx, [dc_texturefrac] - add eax, edx - mov ecx, [dc_source] -preshift: shr edx, 16 - push ebx - push edi - mov edi, [dc_colormap] - movzx ebx, byte [ecx+edx] - mov ecx, [dc_dest] - mov bl, byte [edi+ebx] - pop edi - mov byte [ecx], bl - pop ebx - ret - -ALIGN 16 -GLOBAL vlinetallasm1 -vlinetallasm1: - push ebp - push ebx - push edi - push esi - - mov ebp, [dc_count] - mov ebx, [dc_texturefrac] ; ebx = frac - mov edi, [dc_dest] - mov ecx, ebx -shift11: shr ecx, 16 - mov esi, [dc_source] - mov edx, [dc_iscale] -vlt1pitch1: sub edi, 0x88888888 - mov eax, [dc_colormap] - -loop2: - movzx ecx, BYTE [esi+ecx] - add ebx, edx -vlt1pitch2: add edi, 0x88888888 - mov cl,[eax+ecx] - mov [edi],cl - mov ecx,ebx -shift12: shr ecx,16 - dec ebp - jnz loop2 - - mov eax,ebx - pop esi - pop edi - pop ebx - pop ebp - ret - -%ifdef M_TARGET_MACHO -GLOBAL _rtext_tmap3_end -_rtext_tmap3_end: -%endif diff --git a/src/asm_x86_64/tmap3.asm b/src/asm_x86_64/tmap3.asm deleted file mode 100644 index e0f568fea1..0000000000 --- a/src/asm_x86_64/tmap3.asm +++ /dev/null @@ -1,150 +0,0 @@ -%ifnidn __OUTPUT_FORMAT__,win64 -%error tmap3.asm is for Win64 output. You should use tmap.s for other systems. -%endif - -BITS 64 -DEFAULT REL - -EXTERN vplce -EXTERN vince -EXTERN palookupoffse -EXTERN bufplce - -EXTERN dc_count -EXTERN dc_dest -EXTERN dc_pitch - -SECTION .text - -GLOBAL ASM_PatchPitch -ASM_PatchPitch: - mov ecx, [dc_pitch] - mov [pm+3], ecx - mov [vltpitch+3], ecx - ret - align 16 - -GLOBAL setupvlinetallasm -setupvlinetallasm: - mov [shifter1+2], cl - mov [shifter2+2], cl - mov [shifter3+2], cl - mov [shifter4+2], cl - ret - align 16 - -; Yasm can't do progbits alloc exec for win64? -; Hmm, looks like it's automatic. No worries, then. -SECTION .rtext write ;progbits alloc exec - -GLOBAL vlinetallasm4 -PROC_FRAME vlinetallasm4 - rex_push_reg rbx - push_reg rdi - push_reg r15 - push_reg r14 - push_reg r13 - push_reg r12 - push_reg rbp - push_reg rsi - alloc_stack 8 ; Stack must be 16-byte aligned -END_PROLOGUE -; rax = bufplce base address -; rbx = -; rcx = offset from rdi/count (negative) -; edx/rdx = scratch -; rdi = bottom of columns to write to -; r8d-r11d = column offsets -; r12-r15 = palookupoffse[0] - palookupoffse[4] - - mov ecx, [dc_count] - mov rdi, [dc_dest] - test ecx, ecx - jle vltepilog ; count must be positive - - mov rax, [bufplce] - mov r8, [bufplce+8] - sub r8, rax - mov r9, [bufplce+16] - sub r9, rax - mov r10, [bufplce+24] - sub r10, rax - mov [source2+4], r8d - mov [source3+4], r9d - mov [source4+4], r10d - -pm: imul rcx, 320 - - mov r12, [palookupoffse] - mov r13, [palookupoffse+8] - mov r14, [palookupoffse+16] - mov r15, [palookupoffse+24] - - mov r8d, [vince] - mov r9d, [vince+4] - mov r10d, [vince+8] - mov r11d, [vince+12] - mov [step1+3], r8d - mov [step2+3], r9d - mov [step3+3], r10d - mov [step4+3], r11d - - add rdi, rcx - neg rcx - - mov r8d, [vplce] - mov r9d, [vplce+4] - mov r10d, [vplce+8] - mov r11d, [vplce+12] - jmp loopit - -ALIGN 16 -loopit: - mov edx, r8d -shifter1: shr edx, 24 -step1: add r8d, 0x88888888 - movzx edx, BYTE [rax+rdx] - mov ebx, r9d - mov dl, [r12+rdx] -shifter2: shr ebx, 24 -step2: add r9d, 0x88888888 -source2: movzx ebx, BYTE [rax+rbx+0x88888888] - mov ebp, r10d - mov bl, [r13+rbx] -shifter3: shr ebp, 24 -step3: add r10d, 0x88888888 -source3: movzx ebp, BYTE [rax+rbp+0x88888888] - mov esi, r11d - mov bpl, BYTE [r14+rbp] -shifter4: shr esi, 24 -step4: add r11d, 0x88888888 -source4: movzx esi, BYTE [rax+rsi+0x88888888] - mov [rdi+rcx], dl - mov [rdi+rcx+1], bl - mov sil, BYTE [r15+rsi] - mov [rdi+rcx+2], bpl - mov [rdi+rcx+3], sil - -vltpitch: add rcx, 320 - jl loopit - - mov [vplce], r8d - mov [vplce+4], r9d - mov [vplce+8], r10d - mov [vplce+12], r11d - -vltepilog: - add rsp, 8 - pop rsi - pop rbp - pop r12 - pop r13 - pop r14 - pop r15 - pop rdi - pop rbx - ret -vlinetallasm4_end: -ENDPROC_FRAME - ALIGN 16 - diff --git a/src/asm_x86_64/tmap3.s b/src/asm_x86_64/tmap3.s deleted file mode 100644 index 867d11c759..0000000000 --- a/src/asm_x86_64/tmap3.s +++ /dev/null @@ -1,141 +0,0 @@ -#%include "valgrind.inc" - - .section .text - -.globl ASM_PatchPitch -ASM_PatchPitch: - movl dc_pitch(%rip), %ecx - movl %ecx, pm+3(%rip) - movl %ecx, vltpitch+3(%rip) -# selfmod pm, vltpitch+6 - ret - .align 16 - -.globl setupvlinetallasm -setupvlinetallasm: - movb %dil, shifter1+2(%rip) - movb %dil, shifter2+2(%rip) - movb %dil, shifter3+2(%rip) - movb %dil, shifter4+2(%rip) -# selfmod shifter1, shifter4+3 - ret - .align 16 - - .section .rtext,"awx" - -.globl vlinetallasm4 - .type vlinetallasm4,@function -vlinetallasm4: - .cfi_startproc - push %rbx - push %rdi - push %r15 - push %r14 - push %r13 - push %r12 - push %rbp - push %rsi - subq $8, %rsp # Does the stack need to be 16-byte aligned for Linux? - .cfi_adjust_cfa_offset 8 - -# rax = bufplce base address -# rbx = -# rcx = offset from rdi/count (negative) -# edx/rdx = scratch -# rdi = bottom of columns to write to -# r8d-r11d = column offsets -# r12-r15 = palookupoffse[0] - palookupoffse[4] - - movl dc_count(%rip), %ecx - movq dc_dest(%rip), %rdi - testl %ecx, %ecx - jle vltepilog # count must be positive - - movq bufplce(%rip), %rax - movq bufplce+8(%rip), %r8 - subq %rax, %r8 - movq bufplce+16(%rip), %r9 - subq %rax, %r9 - movq bufplce+24(%rip), %r10 - subq %rax, %r10 - movl %r8d, source2+4(%rip) - movl %r9d, source3+4(%rip) - movl %r10d, source4+4(%rip) - -pm: imulq $320, %rcx - - movq palookupoffse(%rip), %r12 - movq palookupoffse+8(%rip), %r13 - movq palookupoffse+16(%rip), %r14 - movq palookupoffse+24(%rip), %r15 - - movl vince(%rip), %r8d - movl vince+4(%rip), %r9d - movl vince+8(%rip), %r10d - movl vince+12(%rip), %r11d - movl %r8d, step1+3(%rip) - movl %r9d, step2+3(%rip) - movl %r10d, step3+3(%rip) - movl %r11d, step4+3(%rip) - - addq %rcx, %rdi - negq %rcx - - movl vplce(%rip), %r8d - movl vplce+4(%rip), %r9d - movl vplce+8(%rip), %r10d - movl vplce+12(%rip), %r11d -# selfmod loopit, vltepilog - jmp loopit - - .align 16 -loopit: - movl %r8d, %edx -shifter1: shrl $24, %edx -step1: addl $0x44444444, %r8d - movzbl (%rax,%rdx), %edx - movl %r9d, %ebx - movb (%r12,%rdx), %dl -shifter2: shrl $24, %ebx -step2: addl $0x44444444, %r9d -source2: movzbl 0x44444444(%rax,%rbx), %ebx - movl %r10d, %ebp - movb (%r13,%rbx), %bl -shifter3: shr $24, %ebp -step3: addl $0x44444444, %r10d -source3: movzbl 0x44444444(%rax,%rbp), %ebp - movl %r11d, %esi - movb (%r14,%rbp), %bpl -shifter4: shr $24, %esi -step4: add $0x44444444, %r11d -source4: movzbl 0x44444444(%rax,%rsi), %esi - movb %dl, (%rdi,%rcx) - movb %bl, 1(%rdi,%rcx) - movb (%r15,%rsi), %sil - movb %bpl, 2(%rdi,%rcx) - movb %sil, 3(%rdi,%rcx) - -vltpitch: addq $320, %rcx - jl loopit - - movl %r8d, vplce(%rip) - movl %r9d, vplce+4(%rip) - movl %r10d, vplce+8(%rip) - movl %r11d, vplce+12(%rip) - -vltepilog: - addq $8, %rsp - .cfi_adjust_cfa_offset -8 - pop %rsi - pop %rbp - pop %r12 - pop %r13 - pop %r14 - pop %r15 - pop %rdi - pop %rbx - ret - .cfi_endproc - .align 16 - - diff --git a/src/d_main.cpp b/src/d_main.cpp index 0f2d5af92b..04ba34cbb0 100644 --- a/src/d_main.cpp +++ b/src/d_main.cpp @@ -1030,7 +1030,7 @@ void D_DoomLoop () catch (CVMAbortException &error) { error.MaybePrintMessage(); - Printf("%s", error.stacktrace); + Printf("%s", error.stacktrace.GetChars()); D_ErrorCleanup(); } } diff --git a/src/doomtype.h b/src/doomtype.h index a9818df78c..264713d1b7 100644 --- a/src/doomtype.h +++ b/src/doomtype.h @@ -48,57 +48,6 @@ class PClassActor; typedef TMap FClassMap; -// Since this file is included by everything, it seems an appropriate place -// to check the NOASM/USEASM macros. - -// There are three assembly-related macros: -// -// NOASM - Assembly code is disabled -// X86_ASM - Using ia32 assembly code -// X64_ASM - Using amd64 assembly code -// -// Note that these relate only to using the pure assembly code. Inline -// assembly may still be used without respect to these macros, as -// deemed appropriate. - -#ifndef NOASM -// Select the appropriate type of assembly code to use. - -#if defined(_M_IX86) || defined(__i386__) - -#define X86_ASM -#ifdef X64_ASM -#undef X64_ASM -#endif - -#elif defined(_M_X64) || defined(__amd64__) - -#define X64_ASM -#ifdef X86_ASM -#undef X86_ASM -#endif - -#else - -#define NOASM - -#endif - -#endif - -#ifdef NOASM -// Ensure no assembly macros are defined if NOASM is defined. - -#ifdef X86_ASM -#undef X86_ASM -#endif - -#ifdef X64_ASM -#undef X64_ASM -#endif - -#endif - #if defined(_MSC_VER) #define NOVTABLE __declspec(novtable) diff --git a/src/r_3dfloors.cpp b/src/r_3dfloors.cpp index 61a23187d4..87c8af618e 100644 --- a/src/r_3dfloors.cpp +++ b/src/r_3dfloors.cpp @@ -15,6 +15,11 @@ #include "c_cvars.h" #include "r_3dfloors.h" +CVAR(Int, r_3dfloors, true, 0); + +namespace swrenderer +{ + // external variables int fake3D; F3DFloor *fakeFloor; @@ -28,8 +33,6 @@ HeightLevel *height_cur = NULL; int CurrentMirror = 0; int CurrentSkybox = 0; -CVAR(Int, r_3dfloors, true, 0); - // private variables int height_max = -1; TArray toplist; @@ -160,3 +163,4 @@ void R_3D_LeaveSkybox() CurrentSkybox--; } +} diff --git a/src/r_3dfloors.h b/src/r_3dfloors.h index cacb974443..a703ae19a4 100644 --- a/src/r_3dfloors.h +++ b/src/r_3dfloors.h @@ -3,6 +3,11 @@ #include "p_3dfloors.h" +EXTERN_CVAR(Int, r_3dfloors); + +namespace swrenderer +{ + // special types struct HeightLevel @@ -57,7 +62,6 @@ extern HeightLevel *height_top; extern HeightLevel *height_cur; extern int CurrentMirror; extern int CurrentSkybox; -EXTERN_CVAR(Int, r_3dfloors); // functions void R_3D_DeleteHeights(); @@ -67,4 +71,6 @@ void R_3D_ResetClip(); void R_3D_EnterSkybox(); void R_3D_LeaveSkybox(); +} + #endif diff --git a/src/r_bsp.cpp b/src/r_bsp.cpp index 8d423b3b31..91fb86e928 100644 --- a/src/r_bsp.cpp +++ b/src/r_bsp.cpp @@ -58,6 +58,13 @@ #include "po_man.h" #include "r_data/colormaps.h" +CVAR (Bool, r_drawflat, false, 0) // [RH] Don't texture segs? +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); + +namespace swrenderer +{ + using namespace drawerargs; + seg_t* curline; side_t* sidedef; line_t* linedef; @@ -104,8 +111,6 @@ TArray WallPortals(1000); // note: this array needs to go away as subsector_t *InSubsector; -CVAR (Bool, r_drawflat, false, 0) // [RH] Don't texture segs? -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); void R_StoreWallRange (int start, int stop); @@ -1396,3 +1401,5 @@ void R_RenderBSPNode (void *node) } R_Subsector ((subsector_t *)((BYTE *)node - 1)); } + +} diff --git a/src/r_bsp.h b/src/r_bsp.h index 48ca7565bb..e4d70c4cf1 100644 --- a/src/r_bsp.h +++ b/src/r_bsp.h @@ -27,6 +27,11 @@ #include #include "r_defs.h" +EXTERN_CVAR (Bool, r_drawflat) // [RH] Don't texture segs? + +namespace swrenderer +{ + // The 3072 below is just an arbitrary value picked to avoid // drawing lines the player is too close to that would overflow // the texture calculations. @@ -109,8 +114,6 @@ extern WORD MirrorFlags; typedef void (*drawfunc_t) (int start, int stop); -EXTERN_CVAR (Bool, r_drawflat) // [RH] Don't texture segs? - // BSP? void R_ClearClipSegs (short left, short right); void R_ClearDrawSegs (); @@ -119,5 +122,6 @@ void R_RenderBSPNode (void *node); // killough 4/13/98: fake floors/ceilings for deep water / fake ceilings: sector_t *R_FakeFlat(sector_t *, sector_t *, int *, int *, bool); +} #endif diff --git a/src/r_defs.h b/src/r_defs.h index 0f5ed0017b..6efca5cde8 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -59,7 +59,7 @@ enum SIL_BOTH }; -extern size_t MaxDrawSegs; +namespace swrenderer { extern size_t MaxDrawSegs; } struct FDisplacement; // diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 6f58ec2a3a..52f5f24e16 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1,27 +1,36 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// $Log:$ -// -// DESCRIPTION: -// The actual span/column drawing functions. -// Here find the main potential for optimization, -// e.g. inline assembly, different algorithms. -// -//----------------------------------------------------------------------------- +/* +** r_draw.cpp +** +**--------------------------------------------------------------------------- +** Copyright 1998-2016 Randy Heit +** Copyright 2016 Magnus Norddahl +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +*/ #include @@ -38,2850 +47,1312 @@ #include "r_data/r_translate.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "r_plane.h" +#include "r_draw.h" +#include "r_draw_pal.h" +#include "r_thread.h" -#include "gi.h" -#include "stats.h" -#include "x86.h" - -#undef RANGECHECK - -// status bar height at bottom of screen -// [RH] status bar position at bottom of screen -extern int ST_Y; - -// -// All drawing to the view buffer is accomplished in this file. -// The other refresh files only know about ccordinates, -// not the architecture of the frame buffer. -// Conveniently, the frame buffer is a linear one, -// and we need only the base address, -// and the total size == width*height*depth/8., -// - -BYTE* viewimage; -extern "C" { -int ylookup[MAXHEIGHT]; -BYTE *dc_destorg; -} -int scaledviewwidth; - -// [RH] Pointers to the different column drawers. -// These get changed depending on the current -// screen depth and asm/no asm. -void (*R_DrawColumnHoriz)(void); -void (*R_DrawTranslatedColumn)(void); -void (*R_DrawShadedColumn)(void); -void (*R_DrawSpan)(void); -void (*R_DrawSpanMasked)(void); - -// -// R_DrawColumn -// Source is the top of the column to scale. -// -double dc_texturemid; -extern "C" { -int dc_pitch=0xABadCafe; // [RH] Distance between rows - -lighttable_t* dc_colormap; -int dc_x; -int dc_yl; -int dc_yh; -fixed_t dc_iscale; -fixed_t dc_texturefrac; -int dc_color; // [RH] Color for column filler -DWORD dc_srccolor; -DWORD *dc_srcblend; // [RH] Source and destination -DWORD *dc_destblend; // blending lookups - -// first pixel in a column (possibly virtual) -const BYTE* dc_source; - -BYTE* dc_dest; -int dc_count; - -DWORD vplce[4]; -DWORD vince[4]; -BYTE* palookupoffse[4]; -const BYTE* bufplce[4]; -const BYTE* bufplce2[4]; -uint32_t bufheight[4]; - -// just for profiling -int dccount; -} - -int dc_fillcolor; -BYTE *dc_translation; -BYTE shadetables[NUMCOLORMAPS*16*256]; -FDynamicColormap ShadeFakeColormap[16]; -BYTE identitymap[256]; - -EXTERN_CVAR (Int, r_columnmethod) - - -void R_InitShadeMaps() +namespace swrenderer { - int i,j; - // set up shading tables for shaded columns - // 16 colormap sets, progressing from full alpha to minimum visible alpha + // Needed by R_DrawFogBoundary (which probably shouldn't be part of this file) + extern "C" short spanend[MAXHEIGHT]; + extern float rw_light; + extern float rw_lightstep; + extern int wallshade; - BYTE *table = shadetables; + double dc_texturemid; - // Full alpha - for (i = 0; i < 16; ++i) + int ylookup[MAXHEIGHT]; + uint8_t shadetables[NUMCOLORMAPS * 16 * 256]; + FDynamicColormap ShadeFakeColormap[16]; + uint8_t identitymap[256]; + FDynamicColormap identitycolormap; + int fuzzoffset[FUZZTABLE + 1]; + int fuzzpos; + int fuzzviewheight; + + namespace drawerargs { - ShadeFakeColormap[i].Color = ~0u; - ShadeFakeColormap[i].Desaturate = ~0u; - ShadeFakeColormap[i].Next = NULL; - ShadeFakeColormap[i].Maps = table; + int dc_pitch; + lighttable_t *dc_colormap; + int dc_x; + int dc_yl; + int dc_yh; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + uint32_t dc_textureheight; + int dc_color; + uint32_t dc_srccolor; + uint32_t dc_srccolor_bgra; + uint32_t *dc_srcblend; + uint32_t *dc_destblend; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + const uint8_t *dc_source; + const uint8_t *dc_source2; + uint32_t dc_texturefracx; + uint8_t *dc_translation; + uint8_t *dc_dest; + uint8_t *dc_destorg; + int dc_destheight; + int dc_count; + uint32_t vplce[4]; + uint32_t vince[4]; + uint8_t *palookupoffse[4]; + fixed_t palookuplight[4]; + const uint8_t *bufplce[4]; + const uint8_t *bufplce2[4]; + uint32_t buftexturefracx[4]; + uint32_t bufheight[4]; + int vlinebits; + int mvlinebits; + int tmvlinebits; + int ds_y; + int ds_x1; + int ds_x2; + lighttable_t * ds_colormap; + dsfixed_t ds_light; + dsfixed_t ds_xfrac; + dsfixed_t ds_yfrac; + dsfixed_t ds_xstep; + dsfixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + fixed_t ds_alpha; + double ds_lod; + const uint8_t *ds_source; + int ds_color; + unsigned int dc_tspans[4][MAXHEIGHT]; + unsigned int *dc_ctspan[4]; + unsigned int *horizspan[4]; + } - for (j = 0; j < NUMCOLORMAPS; ++j) + void R_InitColumnDrawers() + { + colfunc = basecolfunc = R_DrawColumn; + fuzzcolfunc = R_DrawFuzzColumn; + transcolfunc = R_DrawTranslatedColumn; + spanfunc = R_DrawSpan; + hcolfunc_pre = R_DrawColumnHoriz; + hcolfunc_post1 = rt_map1col; + hcolfunc_post4 = rt_map4cols; + } + + void R_InitShadeMaps() + { + int i, j; + // set up shading tables for shaded columns + // 16 colormap sets, progressing from full alpha to minimum visible alpha + + uint8_t *table = shadetables; + + // Full alpha + for (i = 0; i < 16; ++i) { - int a = (NUMCOLORMAPS - j) * 256 / NUMCOLORMAPS * (16-i); - for (int k = 0; k < 256; ++k) + ShadeFakeColormap[i].Color = ~0u; + ShadeFakeColormap[i].Desaturate = ~0u; + ShadeFakeColormap[i].Next = NULL; + ShadeFakeColormap[i].Maps = table; + + for (j = 0; j < NUMCOLORMAPS; ++j) { - BYTE v = (((k+2) * a) + 256) >> 14; - table[k] = MIN (v, 64); - } - table += 256; - } - } - for (i = 0; i < NUMCOLORMAPS*16*256; ++i) - { - assert(shadetables[i] <= 64); - } - - // Set up a guaranteed identity map - for (i = 0; i < 256; ++i) - { - identitymap[i] = i; - } -} - -/************************************/ -/* */ -/* Palettized drawers (C versions) */ -/* */ -/************************************/ - -// -// A column is a vertical slice/span from a wall texture that, -// given the DOOM style restrictions on the view orientation, -// will always have constant z depth. -// Thus a special case loop for very fast rendering can -// be used. It has also been used with Wolfenstein 3D. -// -void R_DrawColumn (void) -{ - int count; - BYTE* dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - - // Zero length, column does not exceed a pixel. - if (count <= 0) - return; - - // Framebuffer destination address. - dest = dc_dest; - - // Determine scaling, - // which is the only mapping to be done. - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - // [RH] Get local copies of these variables so that the compiler - // has a better chance of optimizing this well. - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - // Inner loop that does the actual texture mapping, - // e.g. a DDA-lile scaling. - // This is as fast as it gets. - do - { - // Re-map color indices from wall texture column - // using a lighting/special effects LUT. - *dest = colormap[source[frac>>FRACBITS]]; - - dest += pitch; - frac += fracstep; - - } while (--count); - } -} - - -// [RH] Just fills a column with a color -void R_FillColumnP (void) -{ - int count; - BYTE* dest; - - count = dc_count; - - if (count <= 0) - return; - - dest = dc_dest; - - { - int pitch = dc_pitch; - BYTE color = dc_color; - - do - { - *dest = color; - dest += pitch; - } while (--count); - } -} - -void R_FillAddColumn (void) -{ - int count; - BYTE *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - DWORD *bg2rgb; - DWORD fg; - - bg2rgb = dc_destblend; - fg = dc_srccolor; - int pitch = dc_pitch; - - do - { - DWORD bg; - bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; - *dest = RGB32k.All[bg & (bg>>15)]; - dest += pitch; - } while (--count); - -} - -void R_FillAddClampColumn (void) -{ - int count; - BYTE *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - DWORD *bg2rgb; - DWORD fg; - - bg2rgb = dc_destblend; - fg = dc_srccolor; - int pitch = dc_pitch; - - do - { - DWORD a = fg + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - } while (--count); - -} - -void R_FillSubClampColumn (void) -{ - int count; - BYTE *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - DWORD *bg2rgb; - DWORD fg; - - bg2rgb = dc_destblend; - fg = dc_srccolor | 0x40100400; - int pitch = dc_pitch; - - do - { - DWORD a = fg - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - } while (--count); - -} - -void R_FillRevSubClampColumn (void) -{ - int count; - BYTE *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - DWORD *bg2rgb; - DWORD fg; - - bg2rgb = dc_destblend; - fg = dc_srccolor; - int pitch = dc_pitch; - - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - } while (--count); - -} - -// -// Spectre/Invisibility. -// -#define FUZZTABLE 50 - -extern "C" -{ -int fuzzoffset[FUZZTABLE+1]; // [RH] +1 for the assembly routine -int fuzzpos = 0; -int fuzzviewheight; -} -/* - FUZZOFF,-FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, - FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, - FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF, - FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, - FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF, - FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF, - FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF -*/ - -static const signed char fuzzinit[FUZZTABLE] = { - 1,-1, 1,-1, 1, 1,-1, - 1, 1,-1, 1, 1, 1,-1, - 1, 1, 1,-1,-1,-1,-1, - 1,-1,-1, 1, 1, 1, 1,-1, - 1,-1, 1, 1,-1,-1, 1, - 1,-1,-1,-1,-1, 1, 1, - 1, 1,-1, 1, 1,-1, 1 -}; - -void R_InitFuzzTable (int fuzzoff) -{ - int i; - - for (i = 0; i < FUZZTABLE; i++) - { - fuzzoffset[i] = fuzzinit[i] * fuzzoff; - } -} - -// -// Creates a fuzzy image by copying pixels from adjacent ones above and below. -// Used with an all black colormap, this could create the SHADOW effect, -// i.e. spectres and invisible players. -// -void R_DrawFuzzColumn (void) -{ - int count; - BYTE *dest; - - // Adjust borders. Low... - if (dc_yl == 0) - dc_yl = 1; - - // .. and high. - if (dc_yh > fuzzviewheight) - dc_yh = fuzzviewheight; - - count = dc_yh - dc_yl; - - // Zero length. - if (count < 0) - return; - - count++; - - dest = ylookup[dc_yl] + dc_x + dc_destorg; - - // colormap #6 is used for shading (of 0-31, a bit brighter than average) - { - // [RH] Make local copies of global vars to try and improve - // the optimizations made by the compiler. - int pitch = dc_pitch; - int fuzz = fuzzpos; - int cnt; - BYTE *map = &NormalLight.Maps[6*256]; - - // [RH] Split this into three separate loops to minimize - // the number of times fuzzpos needs to be clamped. - if (fuzz) - { - cnt = MIN(FUZZTABLE-fuzz,count); - count -= cnt; - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--cnt); - } - if (fuzz == FUZZTABLE || count > 0) - { - while (count >= FUZZTABLE) - { - fuzz = 0; - cnt = FUZZTABLE; - count -= FUZZTABLE; - do + int a = (NUMCOLORMAPS - j) * 256 / NUMCOLORMAPS * (16 - i); + for (int k = 0; k < 256; ++k) { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--cnt); - } - fuzz = 0; - if (count > 0) - { - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--count); - } - } - fuzzpos = fuzz; - } -} - -// -// R_DrawTranlucentColumn -// - -/* -[RH] This translucency algorithm is based on DOSDoom 0.65's, but uses -a 32k RGB table instead of an 8k one. At least on my machine, it's -slightly faster (probably because it uses only one shift instead of -two), and it looks considerably less green at the ends of the -translucency range. The extra size doesn't appear to be an issue. - -The following note is from DOSDoom 0.65: - -New translucency algorithm, by Erik Sandberg: - -Basically, we compute the red, green and blue values for each pixel, and -then use a RGB table to check which one of the palette colours that best -represents those RGB values. The RGB table is 8k big, with 4 R-bits, -5 G-bits and 4 B-bits. A 4k table gives a bit too bad precision, and a 32k -table takes up more memory and results in more cache misses, so an 8k -table seemed to be quite ultimate. - -The computation of the RGB for each pixel is accelerated by using two -1k tables for each translucency level. -The xth element of one of these tables contains the r, g and b values for -the colour x, weighted for the current translucency level (for example, -the weighted rgb values for background colour at 75% translucency are 1/4 -of the original rgb values). The rgb values are stored as three -low-precision fixed point values, packed into one long per colour: -Bit 0-4: Frac part of blue (5 bits) -Bit 5-8: Int part of blue (4 bits) -Bit 9-13: Frac part of red (5 bits) -Bit 14-17: Int part of red (4 bits) -Bit 18-22: Frac part of green (5 bits) -Bit 23-27: Int part of green (5 bits) -Bit 28-31: All zeros (4 bits) - -The point of this format is that the two colours now can be added, and -then be converted to a RGB table index very easily: First, we just set -all the frac bits and the four upper zero bits to 1. It's now possible -to get the RGB table index by anding the current value >> 5 with the -current value >> 19. When asm-optimised, this should be the fastest -algorithm that uses RGB tables. - -*/ - -void R_DrawAddColumnP_C (void) -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - DWORD fg = colormap[source[frac>>FRACBITS]]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// -// R_DrawTranslatedColumn -// Used to draw player sprites with the green colorramp mapped to others. -// Could be used with different translation tables, e.g. the lighter colored -// version of the BaronOfHell, the HellKnight, uses identical sprites, kinda -// brightened up. -// - -void R_DrawTranslatedColumnP_C (void) -{ - int count; - BYTE* dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - // [RH] Local copies of global vars to improve compiler optimizations - BYTE *colormap = dc_colormap; - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - *dest = colormap[translation[source[frac>>FRACBITS]]]; - dest += pitch; - - frac += fracstep; - } while (--count); - } -} - -// Draw a column that is both translated and translucent -void R_DrawTlatedAddColumnP_C (void) -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - DWORD fg = colormap[translation[source[frac>>FRACBITS]]]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Draw a column whose "color" values are actually translucency -// levels for a base color stored in dc_color. -void R_DrawShadedColumnP_C (void) -{ - int count; - BYTE *dest; - fixed_t frac, fracstep; - - count = dc_count; - - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; - int pitch = dc_pitch; - DWORD *fgstart = &Col2RGB8[0][dc_color]; - - do - { - DWORD val = colormap[source[frac>>FRACBITS]]; - DWORD fg = fgstart[val<<8]; - val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; - *dest = RGB32k.All[val & (val>>15)]; - - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Add source to destination, clamping it to white -void R_DrawAddClampColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = fg2rgb[colormap[source[frac>>FRACBITS]]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Add translated source to destination, clamping it to white -void R_DrawAddClampTranslatedColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = fg2rgb[colormap[translation[source[frac>>FRACBITS]]]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[(a>>15) & a]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Subtract destination from source, clamping it to black -void R_DrawSubClampColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (fg2rgb[colormap[source[frac>>FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Subtract destination from source, clamping it to black -void R_DrawSubClampTranslatedColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (fg2rgb[colormap[translation[source[frac>>FRACBITS]]]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Subtract source from destination, clamping it to black -void R_DrawRevSubClampColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac>>FRACBITS]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Subtract source from destination, clamping it to black -void R_DrawRevSubClampTranslatedColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac>>FRACBITS]]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - - - -// -// R_DrawSpan -// With DOOM style restrictions on view orientation, -// the floors and ceilings consist of horizontal slices -// or spans with constant z depth. -// However, rotation around the world z axis is possible, -// thus this mapping, while simpler and faster than -// perspective correct texture mapping, has to traverse -// the texture at an angle in all but a few cases. -// In consequence, flats are not stored by column (like walls), -// and the inner loop has to step in texture space u and v. -// -// [RH] I'm not sure who wrote this, but floor/ceiling mapping -// *is* perspective correct for spans of constant z depth, which -// Doom guarantees because it does not let you change your pitch. -// Also, because of the new texture system, flats *are* stored by -// column to make it easy to use them on walls too. To accomodate -// this, the use of x/u and y/v in R_DrawSpan just needs to be -// swapped. -// -extern "C" { -int ds_color; // [RH] color for non-textured spans - -int ds_y; -int ds_x1; -int ds_x2; - -lighttable_t* ds_colormap; - -dsfixed_t ds_xfrac; -dsfixed_t ds_yfrac; -dsfixed_t ds_xstep; -dsfixed_t ds_ystep; -int ds_xbits; -int ds_ybits; - -// start of a floor/ceiling tile image -const BYTE* ds_source; - -// just for profiling -int dscount; - -#ifdef X86_ASM -extern "C" void R_SetSpanSource_ASM (const BYTE *flat); -extern "C" void R_SetSpanSize_ASM (int xbits, int ybits); -extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); -extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; -#endif -} - -//========================================================================== -// -// R_SetSpanSource -// -// Sets the source bitmap for the span drawing routines. -// -//========================================================================== - -void R_SetSpanSource(const BYTE *pixels) -{ - ds_source = pixels; -#ifdef X86_ASM - if (ds_cursource != ds_source) - { - R_SetSpanSource_ASM(pixels); - } -#endif -} - -//========================================================================== -// -// R_SetSpanColormap -// -// Sets the colormap for the span drawing routines. -// -//========================================================================== - -void R_SetSpanColormap(BYTE *colormap) -{ - ds_colormap = colormap; -#ifdef X86_ASM - if (ds_colormap != ds_curcolormap) - { - R_SetSpanColormap_ASM (ds_colormap); - } -#endif -} - -//========================================================================== -// -// R_SetupSpanBits -// -// Sets the texture size for the span drawing routines. -// -//========================================================================== - -void R_SetupSpanBits(FTexture *tex) -{ - tex->GetWidth (); - ds_xbits = tex->WidthBits; - ds_ybits = tex->HeightBits; - if ((1 << ds_xbits) > tex->GetWidth()) - { - ds_xbits--; - } - if ((1 << ds_ybits) > tex->GetHeight()) - { - ds_ybits--; - } -#ifdef X86_ASM - R_SetSpanSize_ASM (ds_xbits, ds_ybits); -#endif -} - -// -// Draws the actual span. -//#ifndef X86_ASM -void R_DrawSpanP_C (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - -#ifdef RANGECHECK - if (ds_x2 < ds_x1 || ds_x1 < 0 - || ds_x2 >= screen->width || ds_y > screen->height) - { - I_Error ("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); - } -// dscount++; -#endif - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - // Current texture index in u,v. - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = colormap[source[spot]]; - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = colormap[source[spot]]; - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -// [RH] Draw a span with holes -void R_DrawSpanMaskedP_C (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - int texdata; - - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - texdata = source[spot]; - if (texdata != 0) - { - *dest = colormap[texdata]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - int texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - *dest = colormap[texdata]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} -//#endif - -void R_DrawSpanTranslucent (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - DWORD fg = colormap[source[spot]]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[fg & (fg>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - DWORD fg = colormap[source[spot]]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[fg & (fg>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -void R_DrawSpanMaskedTranslucent (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - texdata = source[spot]; - if (texdata != 0) - { - DWORD fg = colormap[texdata]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - DWORD fg = colormap[texdata]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -void R_DrawSpanAddClamp (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest++ = RGB32k.All[a & (a>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest++ = RGB32k.All[a & (a>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -void R_DrawSpanMaskedAddClamp (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - texdata = source[spot]; - if (texdata != 0) - { - DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -// [RH] Just fill a span with a color -void R_FillSpan (void) -{ - memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, ds_x2 - ds_x1 + 1); -} - -// Draw a voxel slab -// -// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -// Ken Silverman's official web site: "http://www.advsys.net/ken" -// See the included license file "BUILDLIC.TXT" for license info. - -// Actually, this is just R_DrawColumn with an extra width parameter. - -#ifndef X86_ASM -static const BYTE *slabcolormap; - -extern "C" void R_SetupDrawSlabC(const BYTE *colormap) -{ - slabcolormap = colormap; -} - -extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p) -{ - int x; - const BYTE *colormap = slabcolormap; - int pitch = dc_pitch; - - assert(dx > 0); - - if (dx == 1) - { - while (dy > 0) - { - *p = colormap[vptr[v >> FRACBITS]]; - p += pitch; - v += vi; - dy--; - } - } - else if (dx == 2) - { - while (dy > 0) - { - BYTE color = colormap[vptr[v >> FRACBITS]]; - p[0] = color; - p[1] = color; - p += pitch; - v += vi; - dy--; - } - } - else if (dx == 3) - { - while (dy > 0) - { - BYTE color = colormap[vptr[v >> FRACBITS]]; - p[0] = color; - p[1] = color; - p[2] = color; - p += pitch; - v += vi; - dy--; - } - } - else if (dx == 4) - { - while (dy > 0) - { - BYTE color = colormap[vptr[v >> FRACBITS]]; - p[0] = color; - p[1] = color; - p[2] = color; - p[3] = color; - p += pitch; - v += vi; - dy--; - } - } - else while (dy > 0) - { - BYTE color = colormap[vptr[v >> FRACBITS]]; - // The optimizer will probably turn this into a memset call. - // Since dx is not likely to be large, I'm not sure that's a good thing, - // hence the alternatives above. - for (x = 0; x < dx; x++) - { - p[x] = color; - } - p += pitch; - v += vi; - dy--; - } -} -#endif - - -/****************************************************/ -/****************************************************/ - -// wallscan stuff, in C - -#ifndef X86_ASM -static DWORD vlinec1 (); -static int vlinebits; - -DWORD (*dovline1)() = vlinec1; -DWORD (*doprevline1)() = vlinec1; - -#ifdef X64_ASM -extern "C" void vlinetallasm4(); -#define dovline4 vlinetallasm4 -extern "C" void setupvlinetallasm (int); -#else -static void vlinec4 (); -void (*dovline4)() = vlinec4; -#endif - -static DWORD mvlinec1(); -static void mvlinec4(); -static int mvlinebits; - -DWORD (*domvline1)() = mvlinec1; -void (*domvline4)() = mvlinec4; - -#else - -extern "C" -{ -DWORD vlineasm1 (); -DWORD prevlineasm1 (); -DWORD vlinetallasm1 (); -DWORD prevlinetallasm1 (); -void vlineasm4 (); -void vlinetallasmathlon4 (); -void vlinetallasm4 (); -void setupvlineasm (int); -void setupvlinetallasm (int); - -DWORD mvlineasm1(); -void mvlineasm4(); -void setupmvlineasm (int); -} - -DWORD (*dovline1)() = vlinetallasm1; -DWORD (*doprevline1)() = prevlinetallasm1; -void (*dovline4)() = vlinetallasm4; - -DWORD (*domvline1)() = mvlineasm1; -void (*domvline4)() = mvlineasm4; -#endif - -void setupvline (int fracbits) -{ -#ifdef X86_ASM - if (CPU.Family <= 5) - { - if (fracbits >= 24) - { - setupvlineasm (fracbits); - dovline4 = vlineasm4; - dovline1 = vlineasm1; - doprevline1 = prevlineasm1; - } - else - { - setupvlinetallasm (fracbits); - dovline1 = vlinetallasm1; - doprevline1 = prevlinetallasm1; - dovline4 = vlinetallasm4; - } - } - else - { - setupvlinetallasm (fracbits); - if (CPU.bIsAMD && CPU.AMDFamily >= 7) - { - dovline4 = vlinetallasmathlon4; - } - } -#else - vlinebits = fracbits; -#ifdef X64_ASM - setupvlinetallasm(fracbits); -#endif -#endif -} - -#if !defined(X86_ASM) -DWORD vlinec1 () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = vlinebits; - int pitch = dc_pitch; - - do - { - *dest = colormap[source[frac>>bits]]; - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -#ifndef _M_X64 -void vlinec4 () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = vlinebits; - DWORD place; - - do - { - dest[0] = palookupoffse[0][bufplce[0][(place=vplce[0])>>bits]]; vplce[0] = place+vince[0]; - dest[1] = palookupoffse[1][bufplce[1][(place=vplce[1])>>bits]]; vplce[1] = place+vince[1]; - dest[2] = palookupoffse[2][bufplce[2][(place=vplce[2])>>bits]]; vplce[2] = place+vince[2]; - dest[3] = palookupoffse[3][bufplce[3][(place=vplce[3])>>bits]]; vplce[3] = place+vince[3]; - dest += dc_pitch; - } while (--count); -} -#else -// Optimized version for 64 bit. In 64 bit mode, accessing global variables is very expensive so even though -// this exceeds the register count, loading all those values into a local variable is faster than not loading all of them. -void vlinec4() -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = vlinebits; - DWORD place; - auto pal0 = palookupoffse[0]; - auto pal1 = palookupoffse[1]; - auto pal2 = palookupoffse[2]; - auto pal3 = palookupoffse[3]; - auto buf0 = bufplce[0]; - auto buf1 = bufplce[1]; - auto buf2 = bufplce[2]; - auto buf3 = bufplce[3]; - const auto vince0 = vince[0]; - const auto vince1 = vince[1]; - const auto vince2 = vince[2]; - const auto vince3 = vince[3]; - auto vplce0 = vplce[0]; - auto vplce1 = vplce[1]; - auto vplce2 = vplce[2]; - auto vplce3 = vplce[3]; - - do - { - dest[0] = pal0[buf0[(place = vplce0) >> bits]]; vplce0 = place + vince0; - dest[1] = pal1[buf1[(place = vplce1) >> bits]]; vplce1 = place + vince1; - dest[2] = pal2[buf2[(place = vplce2) >> bits]]; vplce2 = place + vince2; - dest[3] = pal3[buf3[(place = vplce3) >> bits]]; vplce3 = place + vince3; - dest += dc_pitch; - } while (--count); -} -#endif - -#endif - -void setupmvline (int fracbits) -{ -#if defined(X86_ASM) - setupmvlineasm (fracbits); - domvline1 = mvlineasm1; - domvline4 = mvlineasm4; -#else - mvlinebits = fracbits; -#endif -} - -#if !defined(X86_ASM) -DWORD mvlinec1 () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = mvlinebits; - int pitch = dc_pitch; - - do - { - BYTE pix = source[frac>>bits]; - if (pix != 0) - { - *dest = colormap[pix]; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -void mvlinec4 () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = mvlinebits; - DWORD place; - - do - { - BYTE pix; - - pix = bufplce[0][(place=vplce[0])>>bits]; if(pix) dest[0] = palookupoffse[0][pix]; vplce[0] = place+vince[0]; - pix = bufplce[1][(place=vplce[1])>>bits]; if(pix) dest[1] = palookupoffse[1][pix]; vplce[1] = place+vince[1]; - pix = bufplce[2][(place=vplce[2])>>bits]; if(pix) dest[2] = palookupoffse[2][pix]; vplce[2] = place+vince[2]; - pix = bufplce[3][(place=vplce[3])>>bits]; if(pix) dest[3] = palookupoffse[3][pix]; vplce[3] = place+vince[3]; - dest += dc_pitch; - } while (--count); -} -#endif - -extern "C" short spanend[MAXHEIGHT]; -extern float rw_light; -extern float rw_lightstep; -extern int wallshade; - -static void R_DrawFogBoundarySection (int y, int y2, int x1) -{ - BYTE *colormap = dc_colormap; - BYTE *dest = ylookup[y] + dc_destorg; - - for (; y < y2; ++y) - { - int x2 = spanend[y]; - int x = x1; - do - { - dest[x] = colormap[dest[x]]; - } while (++x <= x2); - dest += dc_pitch; - } -} - -static void R_DrawFogBoundaryLine (int y, int x) -{ - int x2 = spanend[y]; - BYTE *colormap = dc_colormap; - BYTE *dest = ylookup[y] + dc_destorg; - do - { - dest[x] = colormap[dest[x]]; - } while (++x <= x2); -} - -void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip) -{ - // This is essentially the same as R_MapVisPlane but with an extra step - // to create new horizontal spans whenever the light changes enough that - // we need to use a new colormap. - - double lightstep = rw_lightstep; - double light = rw_light + rw_lightstep*(x2-x1-1); - int x = x2-1; - int t2 = uclip[x]; - int b2 = dclip[x]; - int rcolormap = GETPALOOKUP(light, wallshade); - int lcolormap; - BYTE *basecolormapdata = basecolormap->Maps; - - if (b2 > t2) - { - clearbufshort (spanend+t2, b2-t2, x); - } - - dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT); - - for (--x; x >= x1; --x) - { - int t1 = uclip[x]; - int b1 = dclip[x]; - const int xr = x+1; - int stop; - - light -= rw_lightstep; - lcolormap = GETPALOOKUP(light, wallshade); - if (lcolormap != rcolormap) - { - if (t2 < b2 && rcolormap != 0) - { // Colormap 0 is always the identity map, so rendering it is - // just a waste of time. - R_DrawFogBoundarySection (t2, b2, xr); - } - if (t1 < t2) t2 = t1; - if (b1 > b2) b2 = b1; - if (t2 < b2) - { - clearbufshort (spanend+t2, b2-t2, x); - } - rcolormap = lcolormap; - dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT); - } - else - { - if (dc_colormap != basecolormapdata) - { - stop = MIN (t1, b2); - while (t2 < stop) - { - R_DrawFogBoundaryLine (t2++, xr); + uint8_t v = (((k + 2) * a) + 256) >> 14; + table[k] = MIN(v, 64); } - stop = MAX (b1, t2); - while (b2 > stop) + table += 256; + } + } + for (i = 0; i < NUMCOLORMAPS * 16 * 256; ++i) + { + assert(shadetables[i] <= 64); + } + + // Set up a guaranteed identity map + for (i = 0; i < 256; ++i) + { + identitymap[i] = i; + } + } + + void R_InitFuzzTable(int fuzzoff) + { + /* + FUZZOFF,-FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, + FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, + FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF, + FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, + FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF, + FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF, + FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF + */ + + static const int8_t fuzzinit[FUZZTABLE] = { + 1,-1, 1,-1, 1, 1,-1, + 1, 1,-1, 1, 1, 1,-1, + 1, 1, 1,-1,-1,-1,-1, + 1,-1,-1, 1, 1, 1, 1,-1, + 1,-1, 1, 1,-1,-1, 1, + 1,-1,-1,-1,-1, 1, 1, + 1, 1,-1, 1, 1,-1, 1 + }; + + for (int i = 0; i < FUZZTABLE; i++) + { + fuzzoffset[i] = fuzzinit[i] * fuzzoff; + } + } + + namespace + { + bool R_SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags) + { + using namespace drawerargs; + + // r_drawtrans is a seriously bad thing to turn off. I wonder if I should + // just remove it completely. + if (!r_drawtrans || (op == STYLEOP_Add && fglevel == FRACUNIT && bglevel == 0 && !(flags & STYLEF_InvertSource))) + { + if (flags & STYLEF_ColorIsFixed) { - R_DrawFogBoundaryLine (--b2, xr); + colfunc = R_FillColumn; + hcolfunc_post1 = rt_copy1col; + hcolfunc_post4 = rt_copy4cols; + } + else if (dc_translation == NULL) + { + colfunc = basecolfunc; + hcolfunc_post1 = rt_map1col; + hcolfunc_post4 = rt_map4cols; + } + else + { + colfunc = transcolfunc; + hcolfunc_post1 = rt_tlate1col; + hcolfunc_post4 = rt_tlate4cols; + } + return true; + } + if (flags & STYLEF_InvertSource) + { + dc_srcblend = Col2RGB8_Inverse[fglevel >> 10]; + dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) + { + dc_srcblend = Col2RGB8[fglevel >> 10]; + dc_destblend = Col2RGB8[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + else + { + dc_srcblend = Col2RGB8_LessPrecision[fglevel >> 10]; + dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + switch (op) + { + case STYLEOP_Add: + if (fglevel == 0 && bglevel == FRACUNIT) + { + return false; + } + if (fglevel + bglevel <= FRACUNIT) + { // Colors won't overflow when added + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillAddColumn; + hcolfunc_post1 = rt_add1col; + hcolfunc_post4 = rt_add4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawAddColumn; + hcolfunc_post1 = rt_add1col; + hcolfunc_post4 = rt_add4cols; + } + else + { + colfunc = R_DrawTlatedAddColumn; + hcolfunc_post1 = rt_tlateadd1col; + hcolfunc_post4 = rt_tlateadd4cols; + } + } + else + { // Colors might overflow when added + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillAddClampColumn; + hcolfunc_post1 = rt_addclamp1col; + hcolfunc_post4 = rt_addclamp4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawAddClampColumn; + hcolfunc_post1 = rt_addclamp1col; + hcolfunc_post4 = rt_addclamp4cols; + } + else + { + colfunc = R_DrawAddClampTranslatedColumn; + hcolfunc_post1 = rt_tlateaddclamp1col; + hcolfunc_post4 = rt_tlateaddclamp4cols; + } + } + return true; + + case STYLEOP_Sub: + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillSubClampColumn; + hcolfunc_post1 = rt_subclamp1col; + hcolfunc_post4 = rt_subclamp4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawSubClampColumn; + hcolfunc_post1 = rt_subclamp1col; + hcolfunc_post4 = rt_subclamp4cols; + } + else + { + colfunc = R_DrawSubClampTranslatedColumn; + hcolfunc_post1 = rt_tlatesubclamp1col; + hcolfunc_post4 = rt_tlatesubclamp4cols; + } + return true; + + case STYLEOP_RevSub: + if (fglevel == 0 && bglevel == FRACUNIT) + { + return false; + } + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillRevSubClampColumn; + hcolfunc_post1 = rt_subclamp1col; + hcolfunc_post4 = rt_subclamp4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawRevSubClampColumn; + hcolfunc_post1 = rt_revsubclamp1col; + hcolfunc_post4 = rt_revsubclamp4cols; + } + else + { + colfunc = R_DrawRevSubClampTranslatedColumn; + hcolfunc_post1 = rt_tlaterevsubclamp1col; + hcolfunc_post4 = rt_tlaterevsubclamp4cols; + } + return true; + + default: + return false; + } + } + + fixed_t GetAlpha(int type, fixed_t alpha) + { + switch (type) + { + case STYLEALPHA_Zero: return 0; + case STYLEALPHA_One: return OPAQUE; + case STYLEALPHA_Src: return alpha; + case STYLEALPHA_InvSrc: return OPAQUE - alpha; + default: return 0; + } + } + + FDynamicColormap *basecolormapsave; + } + + ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color) + { + using namespace drawerargs; + + fixed_t fglevel, bglevel; + + style.CheckFuzz(); + + if (style.BlendOp == STYLEOP_Shadow) + { + style = LegacyRenderStyles[STYLE_TranslucentStencil]; + alpha = TRANSLUC33; + color = 0; + } + + if (style.Flags & STYLEF_TransSoulsAlpha) + { + alpha = fixed_t(transsouls * OPAQUE); + } + else if (style.Flags & STYLEF_Alpha1) + { + alpha = FRACUNIT; + } + else + { + alpha = clamp(alpha, 0, OPAQUE); + } + + if (translation != -1) + { + dc_translation = NULL; + if (translation != 0) + { + FRemapTable *table = TranslationToTable(translation); + if (table != NULL && !table->Inactive) + { + dc_translation = table->Remap; } } - else + } + basecolormapsave = basecolormap; + hcolfunc_pre = R_DrawColumnHoriz; + + // Check for special modes + if (style.BlendOp == STYLEOP_Fuzz) + { + colfunc = fuzzcolfunc; + return DoDraw0; + } + else if (style == LegacyRenderStyles[STYLE_Shaded]) + { + // Shaded drawer only gets 16 levels of alpha because it saves memory. + if ((alpha >>= 12) == 0) + return DontDraw; + colfunc = R_DrawShadedColumn; + hcolfunc_post1 = rt_shaded1col; + hcolfunc_post4 = rt_shaded4cols; + dc_color = fixedcolormap ? fixedcolormap[APART(color)] : basecolormap->Maps[APART(color)]; + dc_colormap = (basecolormap = &ShadeFakeColormap[16 - alpha])->Maps; + if (fixedlightlev >= 0 && fixedcolormap == NULL) { - t2 = MAX (t2, MIN (t1, b2)); - b2 = MIN (b2, MAX (b1, t2)); + dc_colormap += fixedlightlev; } + return r_columnmethod ? DoDraw1 : DoDraw0; + } - stop = MIN (t2, b1); - while (t1 < stop) + fglevel = GetAlpha(style.SrcAlpha, alpha); + bglevel = GetAlpha(style.DestAlpha, alpha); + + if (style.Flags & STYLEF_ColorIsFixed) + { + uint32_t x = fglevel >> 10; + uint32_t r = RPART(color); + uint32_t g = GPART(color); + uint32_t b = BPART(color); + // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. + dc_color = RGB32k.RGB[r >> 3][g >> 3][b >> 3]; + if (style.Flags & STYLEF_InvertSource) { - spanend[t1++] = x; - } - stop = MAX (b2, t2); - while (b1 > stop) - { - spanend[--b1] = x; + r = 255 - r; + g = 255 - g; + b = 255 - b; } + uint32_t alpha = clamp(fglevel >> (FRACBITS - 8), 0, 255); + dc_srccolor_bgra = (alpha << 24) | (r << 16) | (g << 8) | b; + // dc_srccolor is used by the R_Fill* routines. It is premultiplied + // with the alpha. + dc_srccolor = ((((r*x) >> 4) << 20) | ((g*x) >> 4) | ((((b)*x) >> 4) << 10)) & 0x3feffbff; + hcolfunc_pre = R_FillColumnHoriz; + R_SetColorMapLight(identitycolormap.Maps, 0, 0); } - t2 = uclip[x]; - b2 = dclip[x]; - } - if (t2 < b2 && rcolormap != 0) - { - R_DrawFogBoundarySection (t2, b2, x1); - } -} - -int tmvlinebits; - -void setuptmvline (int bits) -{ - tmvlinebits = bits; -} - -fixed_t tmvline1_add () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - BYTE pix = source[frac>>bits]; - if (pix != 0) + if (!R_SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) { - DWORD fg = fg2rgb[colormap[pix]]; - DWORD bg = bg2rgb[*dest]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -void tmvline4_add () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - DWORD fg = fg2rgb[palookupoffse[i][pix]]; - DWORD bg = bg2rgb[dest[i]]; - fg = (fg+bg) | 0x1f07c1f; - dest[i] = RGB32k.All[fg & (fg>>15)]; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - -fixed_t tmvline1_addclamp () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - BYTE pix = source[frac>>bits]; - if (pix != 0) - { - DWORD a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -void tmvline4_addclamp () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - DWORD a = fg2rgb[palookupoffse[i][pix]] + bg2rgb[dest[i]]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[i] = RGB32k.All[a & (a>>15)]; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - -fixed_t tmvline1_subclamp () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - BYTE pix = source[frac>>bits]; - if (pix != 0) - { - DWORD a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -void tmvline4_subclamp () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - DWORD a = (fg2rgb[palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a>>15)]; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - -fixed_t tmvline1_revsubclamp () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - BYTE pix = source[frac>>bits]; - if (pix != 0) - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -void tmvline4_revsubclamp () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - DWORD a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[palookupoffse[i][pix]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a>>15)]; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - -void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) -{ - uint8_t *dest = dc_dest; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0 = bufplce[0]; - int textureheight0 = bufheight[0]; - - int32_t frac = vplce[0]; - int32_t fracstep = vince[0]; - - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - - for (int index = 0; index < count; index++) - { - uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[sample_index]; - - int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); - int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); - - if (alpha_top == 256 && alpha_bottom == 256) - { - *dest = fg; - } - else - { - int inv_alpha_top = 256 - alpha_top; - int inv_alpha_bottom = 256 - alpha_bottom; - - const auto &c = GPalette.BaseColors[fg]; - int c_red = c.r; - int c_green = c.g; - int c_blue = c.b; - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - } - - frac += fracstep; - dest += pitch; - } -} - -void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) -{ - uint8_t *dest = dc_dest; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0[4] = { bufplce[0], bufplce[1], bufplce[2], bufplce[3] }; - int textureheight0 = bufheight[0]; - const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; - int32_t frac[4] = { (int32_t)vplce[0], (int32_t)vplce[1], (int32_t)vplce[2], (int32_t)vplce[3] }; - int32_t fracstep[4] = { (int32_t)vince[0], (int32_t)vince[1], (int32_t)vince[2], (int32_t)vince[3] }; - uint8_t output[4]; - - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; - uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; - solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; - solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; - - // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: - int fade_length = (1 << (24 - start_fade)); - int start_fadetop_y = (-frac[0]) / fracstep[0]; - int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; - int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; - int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; - for (int col = 1; col < 4; col++) - { - start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); - end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); - start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); - end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); - } - start_fadetop_y = clamp(start_fadetop_y, 0, count); - end_fadetop_y = clamp(end_fadetop_y, 0, count); - start_fadebottom_y = clamp(start_fadebottom_y, 0, count); - end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - - // Top solid color: - for (int index = 0; index < start_fadetop_y; index++) - { - *((uint32_t*)dest) = solid_top_fill; - dest += pitch; - for (int col = 0; col < 4; col++) - frac[col] += fracstep[col]; - } - - // Top fade: - for (int index = start_fadetop_y; index < end_fadetop_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - - uint32_t c = palette[fg]; - int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); - int inv_alpha_top = 256 - alpha_top; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Textured center: - for (int index = end_fadetop_y; index < start_fadebottom_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - output[col] = source0[col][sample_index]; - - frac[col] += fracstep[col]; - } - - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Fade bottom: - for (int index = start_fadebottom_y; index < end_fadebottom_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - - uint32_t c = palette[fg]; - int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); - int inv_alpha_bottom = 256 - alpha_bottom; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Bottom solid color: - for (int index = end_fadebottom_y; index < count; index++) - { - *((uint32_t*)dest) = solid_bottom_fill; - dest += pitch; - } -} - -void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) -{ - uint8_t *dest = dc_dest; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0 = bufplce[0]; - const uint8_t *source1 = bufplce2[0]; - int textureheight0 = bufheight[0]; - uint32_t maxtextureheight1 = bufheight[1] - 1; - - int32_t frac = vplce[0]; - int32_t fracstep = vince[0]; - - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - - for (int index = 0; index < count; index++) - { - uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[sample_index2]; - } - - int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); - int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); - - if (alpha_top == 256 && alpha_bottom == 256) - { - *dest = fg; - } - else - { - int inv_alpha_top = 256 - alpha_top; - int inv_alpha_bottom = 256 - alpha_bottom; - - const auto &c = GPalette.BaseColors[fg]; - int c_red = c.r; - int c_green = c.g; - int c_blue = c.b; - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - } - - frac += fracstep; - dest += pitch; - } -} - -void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) -{ - uint8_t *dest = dc_dest; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0[4] = { bufplce[0], bufplce[1], bufplce[2], bufplce[3] }; - const uint8_t *source1[4] = { bufplce2[0], bufplce2[1], bufplce2[2], bufplce2[3] }; - int textureheight0 = bufheight[0]; - uint32_t maxtextureheight1 = bufheight[1] - 1; - const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; - int32_t frac[4] = { (int32_t)vplce[0], (int32_t)vplce[1], (int32_t)vplce[2], (int32_t)vplce[3] }; - int32_t fracstep[4] = { (int32_t)vince[0], (int32_t)vince[1], (int32_t)vince[2], (int32_t)vince[3] }; - uint8_t output[4]; - - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; - uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; - solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; - solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; - - // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: - int fade_length = (1 << (24 - start_fade)); - int start_fadetop_y = (-frac[0]) / fracstep[0]; - int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; - int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; - int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; - for (int col = 1; col < 4; col++) - { - start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); - end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); - start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); - end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); - } - start_fadetop_y = clamp(start_fadetop_y, 0, count); - end_fadetop_y = clamp(end_fadetop_y, 0, count); - start_fadebottom_y = clamp(start_fadebottom_y, 0, count); - end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - - // Top solid color: - for (int index = 0; index < start_fadetop_y; index++) - { - *((uint32_t*)dest) = solid_top_fill; - dest += pitch; - for (int col = 0; col < 4; col++) - frac[col] += fracstep[col]; - } - - // Top fade: - for (int index = start_fadetop_y; index < end_fadetop_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[col][sample_index2]; - } - output[col] = fg; - - uint32_t c = palette[fg]; - int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); - int inv_alpha_top = 256 - alpha_top; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Textured center: - for (int index = end_fadetop_y; index < start_fadebottom_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[col][sample_index2]; - } - output[col] = fg; - - frac[col] += fracstep[col]; - } - - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Fade bottom: - for (int index = start_fadebottom_y; index < end_fadebottom_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[col][sample_index2]; - } - output[col] = fg; - - uint32_t c = palette[fg]; - int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); - int inv_alpha_bottom = 256 - alpha_bottom; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Bottom solid color: - for (int index = end_fadebottom_y; index < count; index++) - { - *((uint32_t*)dest) = solid_bottom_fill; - dest += pitch; - } -} - -//========================================================================== -// -// R_GetColumn -// -//========================================================================== - -const BYTE *R_GetColumn (FTexture *tex, int col) -{ - int width; - - // If the texture's width isn't a power of 2, then we need to make it a - // positive offset for proper clamping. - if (col < 0 && (width = tex->GetWidth()) != (1 << tex->WidthBits)) - { - col = width + (col % width); - } - return tex->GetColumn (col, NULL); -} - - -// [RH] Initialize the column drawer pointers -void R_InitColumnDrawers () -{ -#ifdef X86_ASM - R_DrawColumnHoriz = R_DrawColumnHorizP_C; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; - R_DrawShadedColumn = R_DrawShadedColumnP_C; - R_DrawSpan = R_DrawSpanP_ASM; - R_DrawSpanMasked = R_DrawSpanMaskedP_ASM; -#else - R_DrawColumnHoriz = R_DrawColumnHorizP_C; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; - R_DrawShadedColumn = R_DrawShadedColumnP_C; - R_DrawSpan = R_DrawSpanP_C; - R_DrawSpanMasked = R_DrawSpanMaskedP_C; -#endif -} - -// [RH] Choose column drawers in a single place -EXTERN_CVAR (Int, r_drawfuzz) -EXTERN_CVAR (Bool, r_drawtrans) -EXTERN_CVAR (Float, transsouls) - -static FDynamicColormap *basecolormapsave; - -static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) -{ - // r_drawtrans is a seriously bad thing to turn off. I wonder if I should - // just remove it completely. - if (!r_drawtrans || (op == STYLEOP_Add && fglevel == FRACUNIT && bglevel == 0 && !(flags & STYLEF_InvertSource))) - { - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillColumnP; - hcolfunc_post1 = rt_copy1col; - hcolfunc_post4 = rt_copy4cols; - } - else if (dc_translation == NULL) - { - colfunc = basecolfunc; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; - } - else - { - colfunc = transcolfunc; - hcolfunc_post1 = rt_tlate1col; - hcolfunc_post4 = rt_tlate4cols; - } - return true; - } - if (flags & STYLEF_InvertSource) - { - dc_srcblend = Col2RGB8_Inverse[fglevel>>10]; - dc_destblend = Col2RGB8_LessPrecision[bglevel>>10]; - } - else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) - { - dc_srcblend = Col2RGB8[fglevel>>10]; - dc_destblend = Col2RGB8[bglevel>>10]; - } - else - { - dc_srcblend = Col2RGB8_LessPrecision[fglevel>>10]; - dc_destblend = Col2RGB8_LessPrecision[bglevel>>10]; - } - switch (op) - { - case STYLEOP_Add: - if (fglevel == 0 && bglevel == FRACUNIT) - { - return false; - } - if (fglevel + bglevel <= FRACUNIT) - { // Colors won't overflow when added - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillAddColumn; - hcolfunc_post1 = rt_add1col; - hcolfunc_post4 = rt_add4cols; - } - else if (dc_translation == NULL) - { - colfunc = R_DrawAddColumnP_C; - hcolfunc_post1 = rt_add1col; - hcolfunc_post4 = rt_add4cols; - } - else - { - colfunc = R_DrawTlatedAddColumnP_C; - hcolfunc_post1 = rt_tlateadd1col; - hcolfunc_post4 = rt_tlateadd4cols; - } - } - else - { // Colors might overflow when added - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillAddClampColumn; - hcolfunc_post1 = rt_addclamp1col; - hcolfunc_post4 = rt_addclamp4cols; - } - else if (dc_translation == NULL) - { - colfunc = R_DrawAddClampColumnP_C; - hcolfunc_post1 = rt_addclamp1col; - hcolfunc_post4 = rt_addclamp4cols; - } - else - { - colfunc = R_DrawAddClampTranslatedColumnP_C; - hcolfunc_post1 = rt_tlateaddclamp1col; - hcolfunc_post4 = rt_tlateaddclamp4cols; - } - } - return true; - - case STYLEOP_Sub: - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillSubClampColumn; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; - } - else if (dc_translation == NULL) - { - colfunc = R_DrawSubClampColumnP_C; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; - } - else - { - colfunc = R_DrawSubClampTranslatedColumnP_C; - hcolfunc_post1 = rt_tlatesubclamp1col; - hcolfunc_post4 = rt_tlatesubclamp4cols; - } - return true; - - case STYLEOP_RevSub: - if (fglevel == 0 && bglevel == FRACUNIT) - { - return false; - } - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillRevSubClampColumn; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; - } - else if (dc_translation == NULL) - { - colfunc = R_DrawRevSubClampColumnP_C; - hcolfunc_post1 = rt_revsubclamp1col; - hcolfunc_post4 = rt_revsubclamp4cols; - } - else - { - colfunc = R_DrawRevSubClampTranslatedColumnP_C; - hcolfunc_post1 = rt_tlaterevsubclamp1col; - hcolfunc_post4 = rt_tlaterevsubclamp4cols; - } - return true; - - default: - return false; - } -} - -static fixed_t GetAlpha(int type, fixed_t alpha) -{ - switch (type) - { - case STYLEALPHA_Zero: return 0; - case STYLEALPHA_One: return OPAQUE; - case STYLEALPHA_Src: return alpha; - case STYLEALPHA_InvSrc: return OPAQUE - alpha; - default: return 0; - } -} - -ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, DWORD color) -{ - fixed_t fglevel, bglevel; - - style.CheckFuzz(); - - if (style.BlendOp == STYLEOP_Shadow) - { - style = LegacyRenderStyles[STYLE_TranslucentStencil]; - alpha = TRANSLUC33; - color = 0; - } - - if (style.Flags & STYLEF_TransSoulsAlpha) - { - alpha = fixed_t(transsouls * OPAQUE); - } - else if (style.Flags & STYLEF_Alpha1) - { - alpha = FRACUNIT; - } - else - { - alpha = clamp (alpha, 0, OPAQUE); - } - - dc_translation = NULL; - if (translation != 0) - { - FRemapTable *table = TranslationToTable(translation); - if (table != NULL && !table->Inactive) - { - dc_translation = table->Remap; - } - } - basecolormapsave = basecolormap; - hcolfunc_pre = R_DrawColumnHoriz; - - // Check for special modes - if (style.BlendOp == STYLEOP_Fuzz) - { - colfunc = fuzzcolfunc; - return DoDraw0; - } - else if (style == LegacyRenderStyles[STYLE_Shaded]) - { - // Shaded drawer only gets 16 levels of alpha because it saves memory. - if ((alpha >>= 12) == 0) return DontDraw; - colfunc = R_DrawShadedColumn; - hcolfunc_post1 = rt_shaded1col; - hcolfunc_post4 = rt_shaded4cols; - dc_color = fixedcolormap ? fixedcolormap[APART(color)] : basecolormap->Maps[APART(color)]; - dc_colormap = (basecolormap = &ShadeFakeColormap[16-alpha])->Maps; - if (fixedlightlev >= 0 && fixedcolormap == NULL) - { - dc_colormap += fixedlightlev; } return r_columnmethod ? DoDraw1 : DoDraw0; } - fglevel = GetAlpha(style.SrcAlpha, alpha); - bglevel = GetAlpha(style.DestAlpha, alpha); - - if (style.Flags & STYLEF_ColorIsFixed) + ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color) { - int x = fglevel >> 10; - int r = RPART(color); - int g = GPART(color); - int b = BPART(color); - // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. - dc_color = RGB32k.RGB[r>>3][g>>3][b>>3]; - if (style.Flags & STYLEF_InvertSource) + return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color); + } + + void R_FinishSetPatchStyle() + { + basecolormap = basecolormapsave; + } + + const uint8_t *R_GetColumn(FTexture *tex, int col) + { + int width; + + // If the texture's width isn't a power of 2, then we need to make it a + // positive offset for proper clamping. + if (col < 0 && (width = tex->GetWidth()) != (1 << tex->WidthBits)) { - r = 255 - r; - g = 255 - g; - b = 255 - b; + col = width + (col % width); } - // dc_srccolor is used by the R_Fill* routines. It is premultiplied - // with the alpha. - dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; - hcolfunc_pre = R_FillColumnHorizP; - dc_colormap = identitymap; + + return tex->GetColumn(col, nullptr); } - if (!R_SetBlendFunc (style.BlendOp, fglevel, bglevel, style.Flags)) + bool R_GetTransMaskDrawers(fixed_t(**tmvline1)(), void(**tmvline4)()) { - return DontDraw; + if (colfunc == R_DrawAddColumn) + { + *tmvline1 = tmvline1_add; + *tmvline4 = tmvline4_add; + return true; + } + if (colfunc == R_DrawAddClampColumn) + { + *tmvline1 = tmvline1_addclamp; + *tmvline4 = tmvline4_addclamp; + return true; + } + if (colfunc == R_DrawSubClampColumn) + { + *tmvline1 = tmvline1_subclamp; + *tmvline4 = tmvline4_subclamp; + return true; + } + if (colfunc == R_DrawRevSubClampColumn) + { + *tmvline1 = tmvline1_revsubclamp; + *tmvline4 = tmvline4_revsubclamp; + return true; + } + return false; + } + + void setupvline(int fracbits) + { + drawerargs::vlinebits = fracbits; + } + + void setupmvline(int fracbits) + { + drawerargs::mvlinebits = fracbits; + } + + void setuptmvline(int fracbits) + { + drawerargs::tmvlinebits = fracbits; + } + + void R_SetColorMapLight(lighttable_t *base_colormap, float light, int shade) + { + using namespace drawerargs; + + dc_colormap = base_colormap + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + } + + void R_SetDSColorMapLight(lighttable_t *base_colormap, float light, int shade) + { + using namespace drawerargs; + + ds_colormap = base_colormap + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + } + + void R_SetTranslationMap(lighttable_t *translation) + { + using namespace drawerargs; + + dc_colormap = translation; + } + + void rt_initcols(uint8_t *buffer) + { + using namespace drawerargs; + + for (int y = 3; y >= 0; y--) + horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; + + DrawerCommandQueue::QueueCommand(buffer); + } + + void rt_span_coverage(int x, int start, int stop) + { + using namespace drawerargs; + + unsigned int **tspan = &dc_ctspan[x & 3]; + (*tspan)[0] = start; + (*tspan)[1] = stop; + *tspan += 2; + } + + void rt_flip_posts() + { + using namespace drawerargs; + + unsigned int *front = horizspan[dc_x & 3]; + unsigned int *back = dc_ctspan[dc_x & 3] - 2; + + while (front < back) + { + swapvalues(front[0], back[0]); + swapvalues(front[1], back[1]); + front += 2; + back -= 2; + } + } + + void rt_draw4cols(int sx) + { + using namespace drawerargs; + + int x, bad; + unsigned int maxtop, minbot, minnexttop; + + // Place a dummy "span" in each column. These don't get + // drawn. They're just here to avoid special cases in the + // max/min calculations below. + for (x = 0; x < 4; ++x) + { + dc_ctspan[x][0] = screen->GetHeight()+1; + dc_ctspan[x][1] = screen->GetHeight(); + } + + for (;;) + { + // If a column is out of spans, mark it as such + bad = 0; + minnexttop = 0xffffffff; + for (x = 0; x < 4; ++x) + { + if (horizspan[x] >= dc_ctspan[x]) + { + bad |= 1 << x; + } + else if ((horizspan[x]+2)[0] < minnexttop) + { + minnexttop = (horizspan[x]+2)[0]; + } + } + // Once all columns are out of spans, we're done + if (bad == 15) + { + return; + } + + // Find the largest shared area for the spans in each column + maxtop = MAX (MAX (horizspan[0][0], horizspan[1][0]), + MAX (horizspan[2][0], horizspan[3][0])); + minbot = MIN (MIN (horizspan[0][1], horizspan[1][1]), + MIN (horizspan[2][1], horizspan[3][1])); + + // If there is no shared area with these spans, draw each span + // individually and advance to the next spans until we reach a shared area. + // However, only draw spans down to the highest span in the next set of + // spans. If we allow the entire height of a span to be drawn, it could + // prevent any more shared areas from being drawn in these four columns. + // + // Example: Suppose we have the following arrangement: + // A CD + // A CD + // B D + // B D + // aB D + // aBcD + // aBcD + // aBc + // + // If we draw the entire height of the spans, we end up drawing this first: + // A CD + // A CD + // B D + // B D + // B D + // B D + // B D + // B D + // B + // + // This leaves only the "a" and "c" columns to be drawn, and they are not + // part of a shared area, but if we can include B and D with them, we can + // get a shared area. So we cut off everything in the first set just + // above the "a" column and end up drawing this first: + // A CD + // A CD + // B D + // B D + // + // Then the next time through, we have the following arrangement with an + // easily shared area to draw: + // aB D + // aBcD + // aBcD + // aBc + if (bad != 0 || maxtop > minbot) + { + int drawcount = 0; + for (x = 0; x < 4; ++x) + { + if (!(bad & 1)) + { + if (horizspan[x][1] < minnexttop) + { + hcolfunc_post1 (x, sx+x, horizspan[x][0], horizspan[x][1]); + horizspan[x] += 2; + drawcount++; + } + else if (minnexttop > horizspan[x][0]) + { + hcolfunc_post1 (x, sx+x, horizspan[x][0], minnexttop-1); + horizspan[x][0] = minnexttop; + drawcount++; + } + } + bad >>= 1; + } + // Drawcount *should* always be non-zero. The reality is that some situations + // can make this not true. Unfortunately, I'm not sure what those situations are. + if (drawcount == 0) + { + return; + } + continue; + } + + // Draw any span fragments above the shared area. + for (x = 0; x < 4; ++x) + { + if (maxtop > horizspan[x][0]) + { + hcolfunc_post1 (x, sx+x, horizspan[x][0], maxtop-1); + } + } + + // Draw the shared area. + hcolfunc_post4 (sx, maxtop, minbot); + + // For each column, if part of the span is past the shared area, + // set its top to just below the shared area. Otherwise, advance + // to the next span in that column. + for (x = 0; x < 4; ++x) + { + if (minbot < horizspan[x][1]) + { + horizspan[x][0] = minbot+1; + } + else + { + horizspan[x] += 2; + } + } + } + } + + void R_SetupSpanBits(FTexture *tex) + { + using namespace drawerargs; + + tex->GetWidth(); + ds_xbits = tex->WidthBits; + ds_ybits = tex->HeightBits; + if ((1 << ds_xbits) > tex->GetWidth()) + { + ds_xbits--; + } + if ((1 << ds_ybits) > tex->GetHeight()) + { + ds_ybits--; + } + } + + void R_SetSpanColormap(lighttable_t *colormap) + { + using namespace drawerargs; + + ds_colormap = colormap; + } + + void R_SetSpanSource(FTexture *tex) + { + using namespace drawerargs; + + ds_source = tex->GetPixels(); + } + + ///////////////////////////////////////////////////////////////////////// + + void R_FillColumnHoriz() + { + using namespace drawerargs; + + if (dc_count <= 0) + return; + + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; + + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawColumnHoriz() + { + using namespace drawerargs; + + if (dc_count <= 0) + return; + + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; + + DrawerCommandQueue::QueueCommand(); + } + + // Copies one span at hx to the screen at sx. + void rt_copy1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Copies all four spans to the screen starting at sx. + void rt_copy4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Maps one span at hx to the screen at sx. + void rt_map1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Maps all four spans to the screen starting at sx. + void rt_map4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates one span at hx to the screen at sx. + void rt_tlate1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_map1col(hx, sx, yl, yh); + } + + // Translates all four spans to the screen starting at sx. + void rt_tlate4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_map4cols(sx, yl, yh); + } + + // Adds one span at hx to the screen at sx without clamping. + void rt_add1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Adds all four spans to the screen starting at sx without clamping. + void rt_add4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and adds one span at hx to the screen at sx without clamping. + void rt_tlateadd1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_add1col(hx, sx, yl, yh); + } + + // Translates and adds all four spans to the screen starting at sx without clamping. + void rt_tlateadd4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_add4cols(sx, yl, yh); + } + + // Shades one span at hx to the screen at sx. + void rt_shaded1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Shades all four spans to the screen starting at sx. + void rt_shaded4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Adds one span at hx to the screen at sx with clamping. + void rt_addclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Adds all four spans to the screen starting at sx with clamping. + void rt_addclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and adds one span at hx to the screen at sx with clamping. + void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_addclamp1col(hx, sx, yl, yh); + } + + // Translates and adds all four spans to the screen starting at sx with clamping. + void rt_tlateaddclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_addclamp4cols(sx, yl, yh); + } + + // Subtracts one span at hx to the screen at sx with clamping. + void rt_subclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Subtracts all four spans to the screen starting at sx with clamping. + void rt_subclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and subtracts one span at hx to the screen at sx with clamping. + void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_subclamp1col(hx, sx, yl, yh); + } + + // Translates and subtracts all four spans to the screen starting at sx with clamping. + void rt_tlatesubclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_subclamp4cols(sx, yl, yh); + } + + // Subtracts one span at hx from the screen at sx with clamping. + void rt_revsubclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Subtracts all four spans from the screen starting at sx with clamping. + void rt_revsubclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and subtracts one span at hx from the screen at sx with clamping. + void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_revsubclamp1col(hx, sx, yl, yh); + } + + // Translates and subtracts all four spans from the screen starting at sx with clamping. + void rt_tlaterevsubclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_revsubclamp4cols(sx, yl, yh); + } + + uint32_t vlinec1() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void vlinec4() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + uint32_t mvlinec1() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void mvlinec4() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_add() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_add() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_addclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_addclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_subclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_subclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_revsubclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_revsubclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillAddColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillAddClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillRevSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawFuzzColumn() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + dc_yl = MAX(dc_yl, 1); + dc_yh = MIN(dc_yh, fuzzviewheight); + if (dc_yl <= dc_yh) + fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; + } + + void R_DrawAddColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawTlatedAddColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawShadedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawAddClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawAddClampTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSubClampTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawRevSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawRevSubClampTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpan() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanMasked() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanTranslucent() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanMaskedTranslucent() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanAddClamp() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanMaskedAddClamp() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillSpan() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) + { + DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + } + + void R_DrawColoredSpan(int y, int x1, int x2) + { + DrawerCommandQueue::QueueCommand(y, x1, x2); + } + + namespace + { + const uint8_t *slab_colormap; + } + + void R_SetupDrawSlab(uint8_t *colormap) + { + slab_colormap = colormap; + } + + void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p) + { + DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_colormap); + } + + void R_DrawFogBoundarySection(int y, int y2, int x1) + { + for (; y < y2; ++y) + { + int x2 = spanend[y]; + DrawerCommandQueue::QueueCommand(y, x1, x2); + } + } + + void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip) + { + // This is essentially the same as R_MapVisPlane but with an extra step + // to create new horizontal spans whenever the light changes enough that + // we need to use a new colormap. + + double lightstep = rw_lightstep; + double light = rw_light + rw_lightstep*(x2 - x1 - 1); + int x = x2 - 1; + int t2 = uclip[x]; + int b2 = dclip[x]; + int rcolormap = GETPALOOKUP(light, wallshade); + int lcolormap; + uint8_t *basecolormapdata = basecolormap->Maps; + + if (b2 > t2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + + R_SetColorMapLight(basecolormap->Maps, (float)light, wallshade); + + uint8_t *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + for (--x; x >= x1; --x) + { + int t1 = uclip[x]; + int b1 = dclip[x]; + const int xr = x + 1; + int stop; + + light -= rw_lightstep; + lcolormap = GETPALOOKUP(light, wallshade); + if (lcolormap != rcolormap) + { + if (t2 < b2 && rcolormap != 0) + { // Colormap 0 is always the identity map, so rendering it is + // just a waste of time. + R_DrawFogBoundarySection(t2, b2, xr); + } + if (t1 < t2) t2 = t1; + if (b1 > b2) b2 = b1; + if (t2 < b2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + rcolormap = lcolormap; + R_SetColorMapLight(basecolormap->Maps, (float)light, wallshade); + fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + } + else + { + if (fake_dc_colormap != basecolormapdata) + { + stop = MIN(t1, b2); + while (t2 < stop) + { + int y = t2++; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + int y = --b2; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + } + else + { + t2 = MAX(t2, MIN(t1, b2)); + b2 = MIN(b2, MAX(b1, t2)); + } + + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + } + + t2 = uclip[x]; + b2 = dclip[x]; + } + if (t2 < b2 && rcolormap != 0) + { + R_DrawFogBoundarySection(t2, b2, x1); + } + } + + void R_DrawParticle(vissprite_t *sprite) + { + R_DrawParticle_C(sprite); } - return r_columnmethod ? DoDraw1 : DoDraw0; } - -void R_FinishSetPatchStyle () -{ - basecolormap = basecolormapsave; -} - -bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) -{ - if (colfunc == R_DrawAddColumnP_C) - { - *tmvline1 = tmvline1_add; - *tmvline4 = tmvline4_add; - return true; - } - if (colfunc == R_DrawAddClampColumnP_C) - { - *tmvline1 = tmvline1_addclamp; - *tmvline4 = tmvline4_addclamp; - return true; - } - if (colfunc == R_DrawSubClampColumnP_C) - { - *tmvline1 = tmvline1_subclamp; - *tmvline4 = tmvline4_subclamp; - return true; - } - if (colfunc == R_DrawRevSubClampColumnP_C) - { - *tmvline1 = tmvline1_revsubclamp; - *tmvline4 = tmvline4_revsubclamp; - return true; - } - return false; -} - diff --git a/src/r_draw.h b/src/r_draw.h index 6713d40915..40b3328964 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -1,287 +1,208 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// DESCRIPTION: -// System specific interface stuff. -// -//----------------------------------------------------------------------------- - -#ifndef __R_DRAW__ -#define __R_DRAW__ +#pragma once #include "r_defs.h" -extern "C" int ylookup[MAXHEIGHT]; +EXTERN_CVAR(Bool, r_multithreaded); +EXTERN_CVAR(Int, r_drawfuzz); +EXTERN_CVAR(Bool, r_drawtrans); +EXTERN_CVAR(Float, transsouls); +EXTERN_CVAR(Int, r_columnmethod); -extern "C" int dc_pitch; // [RH] Distance between rows - -extern "C" lighttable_t*dc_colormap; -extern "C" int dc_x; -extern "C" int dc_yl; -extern "C" int dc_yh; -extern "C" fixed_t dc_iscale; -extern double dc_texturemid; -extern "C" fixed_t dc_texturefrac; -extern "C" int dc_color; // [RH] For flat colors (no texturing) -extern "C" DWORD dc_srccolor; -extern "C" DWORD *dc_srcblend; -extern "C" DWORD *dc_destblend; - -// first pixel in a column -extern "C" const BYTE* dc_source; - -extern "C" BYTE *dc_dest, *dc_destorg; -extern "C" int dc_count; - -extern "C" DWORD vplce[4]; -extern "C" DWORD vince[4]; -extern "C" BYTE* palookupoffse[4]; -extern "C" const BYTE* bufplce[4]; -extern "C" const BYTE* bufplce2[4]; -extern "C" uint32_t bufheight[4]; - -// [RH] Temporary buffer for column drawing -extern "C" BYTE *dc_temp; -extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; -extern "C" unsigned int *dc_ctspan[4]; -extern "C" unsigned int horizspans[4]; - - -// [RH] Pointers to the different column and span drawers... - -// The span blitting interface. -// Hook in assembler or system specific BLT here. - -extern DWORD (*dovline1) (); -extern DWORD (*doprevline1) (); -#ifdef X64_ASM -#define dovline4 vlinetallasm4 -extern "C" void vlinetallasm4(); -#else -extern void (*dovline4) (); -#endif -extern void setupvline (int); - -extern DWORD (*domvline1) (); -extern void (*domvline4) (); -extern void setupmvline (int); - -extern void setuptmvline (int); - -// The Spectre/Invisibility effect. -extern void R_DrawFuzzColumn(void); - -// [RH] Draw shaded column -extern void (*R_DrawShadedColumn)(void); - -// Draw with color translation tables, for player sprite rendering, -// Green/Red/Blue/Indigo shirts. -extern void (*R_DrawTranslatedColumn)(void); - -// Span drawing for rows, floor/ceiling. No Spectre effect needed. -extern void (*R_DrawSpan)(void); -void R_SetupSpanBits(FTexture *tex); -void R_SetSpanColormap(BYTE *colormap); -void R_SetSpanSource(const BYTE *pixels); - -// Span drawing for masked textures. -extern void (*R_DrawSpanMasked)(void); - -// Span drawing for translucent textures. -void R_DrawSpanTranslucent(void); - -// Span drawing for masked, translucent textures. -void R_DrawSpanMaskedTranslucent(void); - -// Span drawing for translucent, additive textures. -void R_DrawSpanAddClamp(void); - -// Span drawing for masked, translucent, additive textures. -void R_DrawSpanMaskedAddClamp(void); - -// [RH] Span blit into an interleaved intermediate buffer -extern void (*R_DrawColumnHoriz)(void); - -// [RH] Initialize the above pointers -void R_InitColumnDrawers (); - -// [RH] Moves data from the temporary buffer to the screen. - -void rt_copy1col(int hx, int sx, int yl, int yh); -void rt_copy4cols(int sx, int yl, int yh); -void rt_map4cols(int sx, int yl, int yh); - -extern "C" +namespace swrenderer { + struct vissprite_t; -void rt_shaded1col (int hx, int sx, int yl, int yh); -void rt_shaded4cols_c (int sx, int yl, int yh); -void rt_shaded4cols_asm (int sx, int yl, int yh); + extern double dc_texturemid; -void rt_map1col (int hx, int sx, int yl, int yh); -void rt_add1col (int hx, int sx, int yl, int yh); -void rt_addclamp1col (int hx, int sx, int yl, int yh); -void rt_subclamp1col (int hx, int sx, int yl, int yh); -void rt_revsubclamp1col (int hx, int sx, int yl, int yh); + namespace drawerargs + { + extern int dc_pitch; + extern lighttable_t *dc_colormap; + extern int dc_x; + extern int dc_yl; + extern int dc_yh; + extern fixed_t dc_iscale; + extern fixed_t dc_texturefrac; + extern uint32_t dc_textureheight; + extern int dc_color; + extern uint32_t dc_srccolor; + extern uint32_t dc_srccolor_bgra; + extern uint32_t *dc_srcblend; + extern uint32_t *dc_destblend; + extern fixed_t dc_srcalpha; + extern fixed_t dc_destalpha; + extern const uint8_t *dc_source; + extern const uint8_t *dc_source2; + extern uint32_t dc_texturefracx; + extern uint8_t *dc_translation; + extern uint8_t *dc_dest; + extern uint8_t *dc_destorg; + extern int dc_destheight; + extern int dc_count; -void rt_tlate1col (int hx, int sx, int yl, int yh); -void rt_tlateadd1col (int hx, int sx, int yl, int yh); -void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh); -void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh); -void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh); + extern uint32_t vplce[4]; + extern uint32_t vince[4]; + extern uint8_t *palookupoffse[4]; + extern fixed_t palookuplight[4]; + extern const uint8_t *bufplce[4]; + extern const uint8_t *bufplce2[4]; + extern uint32_t buftexturefracx[4]; + extern uint32_t bufheight[4]; + extern int vlinebits; + extern int mvlinebits; + extern int tmvlinebits; -void rt_add4cols_c (int sx, int yl, int yh); -void rt_addclamp4cols_c (int sx, int yl, int yh); -void rt_subclamp4cols (int sx, int yl, int yh); -void rt_revsubclamp4cols (int sx, int yl, int yh); + extern int ds_y; + extern int ds_x1; + extern int ds_x2; + extern lighttable_t * ds_colormap; + extern dsfixed_t ds_light; + extern dsfixed_t ds_xfrac; + extern dsfixed_t ds_yfrac; + extern dsfixed_t ds_xstep; + extern dsfixed_t ds_ystep; + extern int ds_xbits; + extern int ds_ybits; + extern fixed_t ds_alpha; + extern double ds_lod; + extern const uint8_t *ds_source; + extern int ds_color; -void rt_tlate4cols (int sx, int yl, int yh); -void rt_tlateadd4cols (int sx, int yl, int yh); -void rt_tlateaddclamp4cols (int sx, int yl, int yh); -void rt_tlatesubclamp4cols (int sx, int yl, int yh); -void rt_tlaterevsubclamp4cols (int sx, int yl, int yh); + extern unsigned int dc_tspans[4][MAXHEIGHT]; + extern unsigned int *dc_ctspan[4]; + extern unsigned int *horizspan[4]; + } -void rt_add4cols_asm (int sx, int yl, int yh); -void rt_addclamp4cols_asm (int sx, int yl, int yh); + extern int ylookup[MAXHEIGHT]; + extern uint8_t shadetables[/*NUMCOLORMAPS*16*256*/]; + extern FDynamicColormap ShadeFakeColormap[16]; + extern uint8_t identitymap[256]; + extern FDynamicColormap identitycolormap; + + // Spectre/Invisibility. + #define FUZZTABLE 50 + extern int fuzzoffset[FUZZTABLE + 1]; + extern int fuzzpos; + extern int fuzzviewheight; + + void R_InitColumnDrawers(); + void R_InitShadeMaps(); + void R_InitFuzzTable(int fuzzoff); + + enum ESPSResult + { + DontDraw, // not useful to draw this + DoDraw0, // draw this as if r_columnmethod is 0 + DoDraw1, // draw this as if r_columnmethod is 1 + }; + + ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color); + ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color); + void R_FinishSetPatchStyle(); // Call this after finished drawing the current thing, in case its style was STYLE_Shade + bool R_GetTransMaskDrawers(fixed_t(**tmvline1)(), void(**tmvline4)()); + + const uint8_t *R_GetColumn(FTexture *tex, int col); + void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + + void rt_initcols(uint8_t *buffer = nullptr); + void rt_span_coverage(int x, int start, int stop); + void rt_draw4cols(int sx); + void rt_flip_posts(); + void rt_copy1col(int hx, int sx, int yl, int yh); + void rt_copy4cols(int sx, int yl, int yh); + void rt_shaded1col(int hx, int sx, int yl, int yh); + void rt_shaded4cols(int sx, int yl, int yh); + void rt_map1col(int hx, int sx, int yl, int yh); + void rt_add1col(int hx, int sx, int yl, int yh); + void rt_addclamp1col(int hx, int sx, int yl, int yh); + void rt_subclamp1col(int hx, int sx, int yl, int yh); + void rt_revsubclamp1col(int hx, int sx, int yl, int yh); + void rt_tlate1col(int hx, int sx, int yl, int yh); + void rt_tlateadd1col(int hx, int sx, int yl, int yh); + void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh); + void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh); + void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh); + void rt_map4cols(int sx, int yl, int yh); + void rt_add4cols(int sx, int yl, int yh); + void rt_addclamp4cols(int sx, int yl, int yh); + void rt_subclamp4cols(int sx, int yl, int yh); + void rt_revsubclamp4cols(int sx, int yl, int yh); + void rt_tlate4cols(int sx, int yl, int yh); + void rt_tlateadd4cols(int sx, int yl, int yh); + void rt_tlateaddclamp4cols(int sx, int yl, int yh); + void rt_tlatesubclamp4cols(int sx, int yl, int yh); + void rt_tlaterevsubclamp4cols(int sx, int yl, int yh); + void R_DrawColumnHoriz(); + void R_DrawColumn(); + void R_DrawFuzzColumn(); + void R_DrawTranslatedColumn(); + void R_DrawShadedColumn(); + void R_FillColumn(); + void R_FillAddColumn(); + void R_FillAddClampColumn(); + void R_FillSubClampColumn(); + void R_FillRevSubClampColumn(); + void R_DrawAddColumn(); + void R_DrawTlatedAddColumn(); + void R_DrawAddClampColumn(); + void R_DrawAddClampTranslatedColumn(); + void R_DrawSubClampColumn(); + void R_DrawSubClampTranslatedColumn(); + void R_DrawRevSubClampColumn(); + void R_DrawRevSubClampTranslatedColumn(); + void R_DrawSpan(); + void R_DrawSpanMasked(); + void R_DrawSpanTranslucent(); + void R_DrawSpanMaskedTranslucent(); + void R_DrawSpanAddClamp(); + void R_DrawSpanMaskedAddClamp(); + void R_FillSpan(); + void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + void R_DrawColoredSpan(int y, int x1, int x2); + void R_SetupDrawSlab(uint8_t *colormap); + void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p); + void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); + uint32_t vlinec1(); + void vlinec4(); + uint32_t mvlinec1(); + void mvlinec4(); + fixed_t tmvline1_add(); + void tmvline4_add(); + fixed_t tmvline1_addclamp(); + void tmvline4_addclamp(); + fixed_t tmvline1_subclamp(); + void tmvline4_subclamp(); + fixed_t tmvline1_revsubclamp(); + void tmvline4_revsubclamp(); + void R_FillColumnHoriz(); + void R_FillSpan(); + + inline uint32_t dovline1() { return vlinec1(); } + inline void dovline4() { vlinec4(); } + inline uint32_t domvline1() { return mvlinec1(); } + inline void domvline4() { mvlinec4(); } + + void setupvline(int fracbits); + void setupmvline(int fracbits); + void setuptmvline(int fracbits); + + void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); + + void R_SetColorMapLight(lighttable_t *base_colormap, float light, int shade); + void R_SetDSColorMapLight(lighttable_t *base_colormap, float light, int shade); + void R_SetTranslationMap(lighttable_t *translation); + + void R_SetupSpanBits(FTexture *tex); + void R_SetSpanColormap(lighttable_t *colormap); + void R_SetSpanSource(FTexture *tex); + + void R_MapTiltedPlane(int y, int x1); + void R_MapColoredPlane(int y, int x1); + void R_DrawParticle(vissprite_t *); } - - -#ifdef X86_ASM -#define rt_shaded4cols rt_shaded4cols_asm -#define rt_add4cols rt_add4cols_asm -#define rt_addclamp4cols rt_addclamp4cols_asm -#else -#define rt_shaded4cols rt_shaded4cols_c -#define rt_add4cols rt_add4cols_c -#define rt_addclamp4cols rt_addclamp4cols_c -#endif - -void rt_flip_posts(); -void rt_draw4cols (int sx); - -// [RH] Preps the temporary horizontal buffer. -void rt_initcols (BYTE *buffer=NULL); - -void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip); - - -#ifdef X86_ASM - - void R_DrawShadedColumnP_C (void); -extern "C" void R_DrawSpanP_ASM (void); -extern "C" void R_DrawSpanMaskedP_ASM (void); - -void R_DrawColumnHorizP_C(void); - -#else - -void R_DrawShadedColumnP_C (void); -void R_DrawSpanP_C (void); -void R_DrawSpanMaskedP_C (void); - -#endif - -void R_DrawColumn(); -void R_DrawColumnHorizP_C(void); -void R_DrawTranslatedColumnP_C(void); -void R_DrawSpanTranslucent (void); -void R_DrawSpanMaskedTranslucent (void); - -void R_DrawTlatedLucentColumnP_C (void); -#define R_DrawTlatedLucentColumn R_DrawTlatedLucentColumnP_C - -void R_FillColumnP (void); -void R_FillColumnHorizP (void); -void R_FillSpan (void); - -#ifdef X86_ASM -#define R_SetupDrawSlab R_SetupDrawSlabA -#define R_DrawSlab R_DrawSlabA -#else -#define R_SetupDrawSlab R_SetupDrawSlabC -#define R_DrawSlab R_DrawSlabC -#endif - -extern "C" void R_SetupDrawSlab(const BYTE *colormap); -extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); - -extern "C" int ds_y; -extern "C" int ds_x1; -extern "C" int ds_x2; - -extern "C" lighttable_t* ds_colormap; - -extern "C" dsfixed_t ds_xfrac; -extern "C" dsfixed_t ds_yfrac; -extern "C" dsfixed_t ds_xstep; -extern "C" dsfixed_t ds_ystep; -extern "C" int ds_xbits; -extern "C" int ds_ybits; -extern "C" fixed_t ds_alpha; - -// start of a 64*64 tile image -extern "C" const BYTE* ds_source; - -extern "C" int ds_color; // [RH] For flat color (no texturing) - -extern BYTE shadetables[/*NUMCOLORMAPS*16*256*/]; -extern FDynamicColormap ShadeFakeColormap[16]; -extern BYTE identitymap[256]; -extern BYTE *dc_translation; - -// [RH] Added for muliresolution support -void R_InitShadeMaps(); -void R_InitFuzzTable (int fuzzoff); - -// [RH] Consolidate column drawer selection -enum ESPSResult -{ - DontDraw, // not useful to draw this - DoDraw0, // draw this as if r_columnmethod is 0 - DoDraw1, // draw this as if r_columnmethod is 1 -}; -ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, DWORD color); -inline ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, DWORD color) -{ - return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color); -} - -// Call this after finished drawing the current thing, in case its -// style was STYLE_Shade -void R_FinishSetPatchStyle (); - -// transmaskwallscan calls this to find out what column drawers to use -bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()); - -// Retrieve column data for wallscan. Should probably be removed -// to just use the texture's GetColumn() method. It just exists -// for double-layer skies. -const BYTE *R_GetColumn (FTexture *tex, int col); -void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - -// maskwallscan is exactly like wallscan but does not draw anything where the texture is color 0. -void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - -// transmaskwallscan is like maskwallscan, but it can also blend to the background -void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - -void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); -void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); -void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); -void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); - -#endif diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp new file mode 100644 index 0000000000..cfb55a6706 --- /dev/null +++ b/src/r_draw_pal.cpp @@ -0,0 +1,2626 @@ +/* +** r_draw_pal.cpp +** +**--------------------------------------------------------------------------- +** Copyright 1998-2016 Randy Heit +** Copyright 2016 Magnus Norddahl +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +*/ + +#include "templates.h" +#include "doomtype.h" +#include "doomdef.h" +#include "r_defs.h" +#include "r_draw.h" +#include "r_main.h" +#include "r_things.h" +#include "v_video.h" +#include "r_draw_pal.h" + +/* + [RH] This translucency algorithm is based on DOSDoom 0.65's, but uses + a 32k RGB table instead of an 8k one. At least on my machine, it's + slightly faster (probably because it uses only one shift instead of + two), and it looks considerably less green at the ends of the + translucency range. The extra size doesn't appear to be an issue. + + The following note is from DOSDoom 0.65: + + New translucency algorithm, by Erik Sandberg: + + Basically, we compute the red, green and blue values for each pixel, and + then use a RGB table to check which one of the palette colours that best + represents those RGB values. The RGB table is 8k big, with 4 R-bits, + 5 G-bits and 4 B-bits. A 4k table gives a bit too bad precision, and a 32k + table takes up more memory and results in more cache misses, so an 8k + table seemed to be quite ultimate. + + The computation of the RGB for each pixel is accelerated by using two + 1k tables for each translucency level. + The xth element of one of these tables contains the r, g and b values for + the colour x, weighted for the current translucency level (for example, + the weighted rgb values for background colour at 75% translucency are 1/4 + of the original rgb values). The rgb values are stored as three + low-precision fixed point values, packed into one long per colour: + Bit 0-4: Frac part of blue (5 bits) + Bit 5-8: Int part of blue (4 bits) + Bit 9-13: Frac part of red (5 bits) + Bit 14-17: Int part of red (4 bits) + Bit 18-22: Frac part of green (5 bits) + Bit 23-27: Int part of green (5 bits) + Bit 28-31: All zeros (4 bits) + + The point of this format is that the two colours now can be added, and + then be converted to a RGB table index very easily: First, we just set + all the frac bits and the four upper zero bits to 1. It's now possible + to get the RGB table index by anding the current value >> 5 with the + current value >> 19. When asm-optimised, this should be the fastest + algorithm that uses RGB tables. +*/ + +namespace swrenderer +{ + PalWall1Command::PalWall1Command() + { + using namespace drawerargs; + + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _colormap = dc_colormap; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; + _vlinebits = vlinebits; + _mvlinebits = mvlinebits; + _tmvlinebits = tmvlinebits; + _pitch = dc_pitch; + _srcblend = dc_srcblend; + _destblend = dc_destblend; + } + + PalWall4Command::PalWall4Command() + { + using namespace drawerargs; + + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _vlinebits = vlinebits; + _mvlinebits = mvlinebits; + _tmvlinebits = tmvlinebits; + for (int col = 0; col < 4; col++) + { + _palookupoffse[col] = palookupoffse[col]; + _bufplce[col] = bufplce[col]; + _vince[col] = vince[col]; + _vplce[col] = vplce[col]; + } + _srcblend = dc_srcblend; + _destblend = dc_destblend; + } + + void DrawWall1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _vlinebits; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + *dest = colormap[source[frac >> bits]]; + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWall4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _vlinebits; + uint32_t place; + auto pal0 = _palookupoffse[0]; + auto pal1 = _palookupoffse[1]; + auto pal2 = _palookupoffse[2]; + auto pal3 = _palookupoffse[3]; + auto buf0 = _bufplce[0]; + auto buf1 = _bufplce[1]; + auto buf2 = _bufplce[2]; + auto buf3 = _bufplce[3]; + auto vince0 = _vince[0]; + auto vince1 = _vince[1]; + auto vince2 = _vince[2]; + auto vince3 = _vince[3]; + auto vplce0 = _vplce[0]; + auto vplce1 = _vplce[1]; + auto vplce2 = _vplce[2]; + auto vplce3 = _vplce[3]; + auto pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + vplce0 += vince0 * skipped; + vplce1 += vince1 * skipped; + vplce2 += vince2 * skipped; + vplce3 += vince3 * skipped; + vince0 *= thread->num_cores; + vince1 *= thread->num_cores; + vince2 *= thread->num_cores; + vince3 *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + dest[0] = pal0[buf0[(place = vplce0) >> bits]]; vplce0 = place + vince0; + dest[1] = pal1[buf1[(place = vplce1) >> bits]]; vplce1 = place + vince1; + dest[2] = pal2[buf2[(place = vplce2) >> bits]]; vplce2 = place + vince2; + dest[3] = pal3[buf3[(place = vplce3) >> bits]]; vplce3 = place + vince3; + dest += pitch; + } while (--count); + } + + void DrawWallMasked1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _mvlinebits; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + *dest = colormap[pix]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWallMasked4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _mvlinebits; + uint32_t place; + auto pal0 = _palookupoffse[0]; + auto pal1 = _palookupoffse[1]; + auto pal2 = _palookupoffse[2]; + auto pal3 = _palookupoffse[3]; + auto buf0 = _bufplce[0]; + auto buf1 = _bufplce[1]; + auto buf2 = _bufplce[2]; + auto buf3 = _bufplce[3]; + auto vince0 = _vince[0]; + auto vince1 = _vince[1]; + auto vince2 = _vince[2]; + auto vince3 = _vince[3]; + auto vplce0 = _vplce[0]; + auto vplce1 = _vplce[1]; + auto vplce2 = _vplce[2]; + auto vplce3 = _vplce[3]; + auto pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + vplce0 += vince0 * skipped; + vplce1 += vince1 * skipped; + vplce2 += vince2 * skipped; + vplce3 += vince3 * skipped; + vince0 *= thread->num_cores; + vince1 *= thread->num_cores; + vince2 *= thread->num_cores; + vince3 *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix; + + pix = buf0[(place = vplce0) >> bits]; if (pix) dest[0] = pal0[pix]; vplce0 = place + vince0; + pix = buf1[(place = vplce1) >> bits]; if (pix) dest[1] = pal1[pix]; vplce1 = place + vince1; + pix = buf2[(place = vplce2) >> bits]; if (pix) dest[2] = pal2[pix]; vplce2 = place + vince2; + pix = buf3[(place = vplce3) >> bits]; if (pix) dest[3] = pal3[pix]; vplce3 = place + vince3; + dest += pitch; + } while (--count); + } + + void DrawWallAdd1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _tmvlinebits; + int pitch = _pitch; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = fg2rgb[colormap[pix]]; + uint32_t bg = bg2rgb[*dest]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWallAdd4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _tmvlinebits; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; + + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = fg2rgb[_palookupoffse[i][pix]]; + uint32_t bg = bg2rgb[dest[i]]; + fg = (fg + bg) | 0x1f07c1f; + dest[i] = RGB32k.All[fg & (fg >> 15)]; + } + vplce[i] += vince[i]; + } + dest += pitch; + } while (--count); + } + + void DrawWallAddClamp1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _tmvlinebits; + int pitch = _pitch; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWallAddClamp4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _tmvlinebits; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; + + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t a = fg2rgb[_palookupoffse[i][pix]] + bg2rgb[dest[i]]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[i] = RGB32k.All[a & (a >> 15)]; + } + vplce[i] += vince[i]; + } + dest += pitch; + } while (--count); + } + + void DrawWallSubClamp1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _tmvlinebits; + int pitch = _pitch; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWallSubClamp4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _tmvlinebits; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; + + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t a = (fg2rgb[_palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[i] = RGB32k.All[a & (a >> 15)]; + } + vplce[i] += vince[i]; + } + dest += pitch; + } while (--count); + } + + void DrawWallRevSubClamp1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _tmvlinebits; + int pitch = _pitch; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWallRevSubClamp4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _tmvlinebits; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; + + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[_palookupoffse[i][pix]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[i] = RGB32k.All[a & (a >> 15)]; + } + vplce[i] += vince[i]; + } + dest += _pitch; + } while (--count); + } + + ///////////////////////////////////////////////////////////////////////// + + PalSkyCommand::PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom) : solid_top(solid_top), solid_bottom(solid_bottom) + { + using namespace drawerargs; + + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + for (int col = 0; col < 4; col++) + { + _bufplce[col] = bufplce[col]; + _bufplce2[col] = bufplce2[col]; + _bufheight[col] = bufheight[col]; + _vince[col] = vince[col]; + _vplce[col] = vplce[col]; + } + } + + void DrawSingleSky1PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int pitch = _pitch; + const uint8_t *source0 = _bufplce[0]; + int textureheight0 = _bufheight[0]; + + int32_t frac = _vplce[0]; + int32_t fracstep = _vince[0]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * skipped; + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[sample_index]; + + int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); + int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + + if (alpha_top == 256 && alpha_bottom == 256) + { + *dest = fg; + } + else + { + int inv_alpha_top = 256 - alpha_top; + int inv_alpha_bottom = 256 - alpha_bottom; + + const auto &c = GPalette.BaseColors[fg]; + int c_red = c.r; + int c_green = c.g; + int c_blue = c.b; + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + } + + frac += fracstep; + dest += pitch; + } + } + + void DrawSingleSky4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int pitch = _pitch; + const uint8_t *source0[4] = { _bufplce[0], _bufplce[1], _bufplce[2], _bufplce[3] }; + int textureheight0 = _bufheight[0]; + const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; + int32_t frac[4] = { (int32_t)_vplce[0], (int32_t)_vplce[1], (int32_t)_vplce[2], (int32_t)_vplce[3] }; + int32_t fracstep[4] = { (int32_t)_vince[0], (int32_t)_vince[1], (int32_t)_vince[2], (int32_t)_vince[3] }; + uint8_t output[4]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; + uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; + solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; + solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac[0]) / fracstep[0]; + int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; + int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; + int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; + for (int col = 1; col < 4; col++) + { + start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); + end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); + start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); + end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); + } + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int col = 0; col < 4; col++) + { + frac[col] += fracstep[col] * skipped; + fracstep[col] *= thread->num_cores; + } + pitch *= thread->num_cores; + int num_cores = thread->num_cores; + int index = skipped; + + // Top solid color: + while (index < start_fadetop_y) + { + *((uint32_t*)dest) = solid_top_fill; + dest += pitch; + for (int col = 0; col < 4; col++) + frac[col] += fracstep[col]; + index += num_cores; + } + + // Top fade: + while (index < end_fadetop_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + + uint32_t c = palette[fg]; + int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); + int inv_alpha_top = 256 - alpha_top; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Textured center: + while (index < start_fadebottom_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + output[col] = source0[col][sample_index]; + + frac[col] += fracstep[col]; + } + + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Fade bottom: + while (index < end_fadebottom_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + + uint32_t c = palette[fg]; + int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); + int inv_alpha_bottom = 256 - alpha_bottom; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Bottom solid color: + while (index < count) + { + *((uint32_t*)dest) = solid_bottom_fill; + dest += pitch; + index += num_cores; + } + } + + void DrawDoubleSky1PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int pitch = _pitch; + const uint8_t *source0 = _bufplce[0]; + const uint8_t *source1 = _bufplce2[0]; + int textureheight0 = _bufheight[0]; + uint32_t maxtextureheight1 = _bufheight[1] - 1; + + int32_t frac = _vplce[0]; + int32_t fracstep = _vince[0]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * skipped; + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); + int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + + if (alpha_top == 256 && alpha_bottom == 256) + { + *dest = fg; + } + else + { + int inv_alpha_top = 256 - alpha_top; + int inv_alpha_bottom = 256 - alpha_bottom; + + const auto &c = GPalette.BaseColors[fg]; + int c_red = c.r; + int c_green = c.g; + int c_blue = c.b; + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + } + + frac += fracstep; + dest += pitch; + } + } + + void DrawDoubleSky4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int pitch = _pitch; + const uint8_t *source0[4] = { _bufplce[0], _bufplce[1], _bufplce[2], _bufplce[3] }; + const uint8_t *source1[4] = { _bufplce2[0], _bufplce2[1], _bufplce2[2], _bufplce2[3] }; + int textureheight0 = _bufheight[0]; + uint32_t maxtextureheight1 = _bufheight[1] - 1; + const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; + int32_t frac[4] = { (int32_t)_vplce[0], (int32_t)_vplce[1], (int32_t)_vplce[2], (int32_t)_vplce[3] }; + int32_t fracstep[4] = { (int32_t)_vince[0], (int32_t)_vince[1], (int32_t)_vince[2], (int32_t)_vince[3] }; + uint8_t output[4]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; + uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; + solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; + solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac[0]) / fracstep[0]; + int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; + int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; + int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; + for (int col = 1; col < 4; col++) + { + start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); + end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); + start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); + end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); + } + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int col = 0; col < 4; col++) + { + frac[col] += fracstep[col] * skipped; + fracstep[col] *= thread->num_cores; + } + pitch *= thread->num_cores; + int num_cores = thread->num_cores; + int index = skipped; + + // Top solid color: + while (index < start_fadetop_y) + { + *((uint32_t*)dest) = solid_top_fill; + dest += pitch; + for (int col = 0; col < 4; col++) + frac[col] += fracstep[col]; + index += num_cores; + } + + // Top fade: + while (index < end_fadetop_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + uint32_t c = palette[fg]; + int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); + int inv_alpha_top = 256 - alpha_top; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Textured center: + while (index < start_fadebottom_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + frac[col] += fracstep[col]; + } + + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Fade bottom: + while (index < end_fadebottom_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + uint32_t c = palette[fg]; + int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); + int inv_alpha_bottom = 256 - alpha_bottom; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Bottom solid color: + while (index < count) + { + *((uint32_t*)dest) = solid_bottom_fill; + dest += pitch; + index += num_cores; + } + } + + ///////////////////////////////////////////////////////////////////////// + + PalColumnCommand::PalColumnCommand() + { + using namespace drawerargs; + + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _colormap = dc_colormap; + _source = dc_source; + _translation = dc_translation; + _color = dc_color; + _srcblend = dc_srcblend; + _destblend = dc_destblend; + _srccolor = dc_srccolor; + } + + void DrawColumnPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + + // Framebuffer destination address. + dest = _dest; + + // Determine scaling, + // which is the only mapping to be done. + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + // [RH] Get local copies of these variables so that the compiler + // has a better chance of optimizing this well. + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + + // Inner loop that does the actual texture mapping, + // e.g. a DDA-lile scaling. + // This is as fast as it gets. + do + { + // Re-map color indices from wall texture column + // using a lighting/special effects LUT. + *dest = colormap[source[frac >> FRACBITS]]; + + dest += pitch; + frac += fracstep; + + } while (--count); + } + + void FillColumnPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + + count = _count; + dest = _dest; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + + uint8_t color = _color; + do + { + *dest = color; + dest += pitch; + } while (--count); + } + + void FillColumnAddPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + + count = _count; + dest = _dest; + uint32_t *bg2rgb; + uint32_t fg; + + bg2rgb = _destblend; + fg = _srccolor; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + + do + { + uint32_t bg; + bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; + *dest = RGB32k.All[bg & (bg >> 15)]; + dest += pitch; + } while (--count); + + } + + void FillColumnAddClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + + count = _count; + + dest = _dest; + uint32_t *bg2rgb; + uint32_t fg; + + bg2rgb = _destblend; + fg = _srccolor; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + + do + { + uint32_t a = fg + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + } while (--count); + } + + void FillColumnSubClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + + count = _count; + + dest = _dest; + uint32_t *bg2rgb; + uint32_t fg; + + bg2rgb = _destblend; + fg = _srccolor | 0x40100400; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + + do + { + uint32_t a = fg - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + } while (--count); + } + + void FillColumnRevSubClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + + count = _count; + if (count <= 0) + return; + + dest = _dest; + uint32_t *bg2rgb; + uint32_t fg; + + bg2rgb = _destblend; + fg = _srccolor; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + + do + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + } while (--count); + } + + void DrawColumnAddPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + + do + { + uint32_t fg = colormap[source[frac >> FRACBITS]]; + uint32_t bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnTranslatedPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + // [RH] Local copies of global vars to improve compiler optimizations + const uint8_t *colormap = _colormap; + const uint8_t *translation = _translation; + const uint8_t *source = _source; + + do + { + *dest = colormap[translation[source[frac >> FRACBITS]]]; + dest += pitch; + + frac += fracstep; + } while (--count); + } + + void DrawColumnTlatedAddPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + const uint8_t *translation = _translation; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + + do + { + uint32_t fg = colormap[translation[source[frac >> FRACBITS]]]; + uint32_t bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnShadedPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac, fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + uint32_t *fgstart = &Col2RGB8[0][_color]; + + do + { + uint32_t val = colormap[source[frac >> FRACBITS]]; + uint32_t fg = fgstart[val << 8]; + val = (Col2RGB8[64 - val][*dest] + fg) | 0x1f07c1f; + *dest = RGB32k.All[val & (val >> 15)]; + + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnAddClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnAddClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *translation = _translation; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnSubClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = (fg2rgb[colormap[source[frac >> FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnSubClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *translation = _translation; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = (fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnRevSubClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac >> FRACBITS]]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnRevSubClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *translation = _translation; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac >> FRACBITS]]]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + ///////////////////////////////////////////////////////////////////////// + + DrawFuzzColumnPalCommand::DrawFuzzColumnPalCommand() + { + using namespace drawerargs; + + _yl = dc_yl; + _yh = dc_yh; + _x = dc_x; + _destorg = dc_destorg; + _pitch = dc_pitch; + _fuzzpos = fuzzpos; + _fuzzviewheight = fuzzviewheight; + } + + void DrawFuzzColumnPalCommand::Execute(DrawerThread *thread) + { + int yl = MAX(_yl, 1); + int yh = MIN(_yh, _fuzzviewheight); + + int count = thread->count_for_thread(yl, yh - yl + 1); + + // Zero length. + if (count <= 0) + return; + + uint8_t *map = &NormalLight.Maps[6 * 256]; + + uint8_t *dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + _x + _destorg); + + int pitch = _pitch * thread->num_cores; + int fuzzstep = thread->num_cores; + int fuzz = (_fuzzpos + thread->skipped_by_thread(yl)) % FUZZTABLE; + + yl += thread->skipped_by_thread(yl); + + // Handle the case where we would go out of bounds at the top: + if (yl < fuzzstep) + { + uint8_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep + pitch; + //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (_pitch)) < viewheight); + + *dest = map[*srcdest]; + dest += pitch; + fuzz += fuzzstep; + fuzz %= FUZZTABLE; + + count--; + if (count == 0) + return; + } + + bool lowerbounds = (yl + (count + fuzzstep - 1) * fuzzstep > _fuzzviewheight); + if (lowerbounds) + count--; + + // Fuzz where fuzzoffset stays within bounds + while (count > 0) + { + int available = (FUZZTABLE - fuzz); + int next_wrap = available / fuzzstep; + if (available % fuzzstep != 0) + next_wrap++; + + int cnt = MIN(count, next_wrap); + count -= cnt; + do + { + uint8_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep; + //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (_pitch)) < viewheight); + + *dest = map[*srcdest]; + dest += pitch; + fuzz += fuzzstep; + } while (--cnt); + + fuzz %= FUZZTABLE; + } + + // Handle the case where we would go out of bounds at the bottom + if (lowerbounds) + { + uint8_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep - pitch; + //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (_pitch)) < viewheight); + + *dest = map[*srcdest]; + } + } + + ///////////////////////////////////////////////////////////////////////// + + PalSpanCommand::PalSpanCommand() + { + using namespace drawerargs; + + _source = ds_source; + _colormap = ds_colormap; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _y = ds_y; + _x1 = ds_x1; + _x2 = ds_x2; + _destorg = dc_destorg; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcblend = dc_srcblend; + _destblend = dc_destblend; + _color = ds_color; + } + + void DrawSpanPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = colormap[source[spot]]; + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = colormap[source[spot]]; + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void DrawSpanMaskedPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + int texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + *dest = colormap[texdata]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + int texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + *dest = colormap[texdata]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void DrawSpanTranslucentPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest++ = RGB32k.All[fg & (fg >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest++ = RGB32k.All[fg & (fg >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void DrawSpanMaskedTranslucentPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + uint8_t texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + uint8_t texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void DrawSpanAddClampPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest++ = RGB32k.All[a & (a >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest++ = RGB32k.All[a & (a >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void DrawSpanMaskedAddClampPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + uint8_t texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + uint8_t texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void FillSpanPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + memset(ylookup[_y] + _x1 + _destorg, _color, _x2 - _x1 + 1); + } + + ///////////////////////////////////////////////////////////////////////// + + DrawTiltedSpanPalCommand::DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) + : y(y), x1(x1), x2(x2), plane_sz(plane_sz), plane_su(plane_su), plane_sv(plane_sv), plane_shade(plane_shade), planeshade(planeshade), planelightfloat(planelightfloat), pviewx(pviewx), pviewy(pviewy) + { + using namespace drawerargs; + + _colormap = ds_colormap; + _destorg = dc_destorg; + _ybits = ds_ybits; + _xbits = ds_xbits; + _source = ds_source; + basecolormapdata = basecolormap->Maps; + } + + void DrawTiltedSpanPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(y)) + return; + + const uint8_t **tiltlighting = thread->tiltlighting; + + int width = x2 - x1; + double iz, uz, vz; + uint8_t *fb; + uint32_t u, v; + int i; + + iz = plane_sz[2] + plane_sz[1] * (centery - y) + plane_sz[0] * (x1 - centerx); + + // Lighting is simple. It's just linear interpolation from start to end + if (plane_shade) + { + uz = (iz + plane_sz[0] * width) * planelightfloat; + vz = iz * planelightfloat; + CalcTiltedLighting(vz, uz, width, thread); + } + else + { + for (int i = 0; i < width; ++i) + { + tiltlighting[i] = _colormap; + } + } + + uz = plane_su[2] + plane_su[1] * (centery - y) + plane_su[0] * (x1 - centerx); + vz = plane_sv[2] + plane_sv[1] * (centery - y) + plane_sv[0] * (x1 - centerx); + + fb = ylookup[y] + x1 + _destorg; + + uint8_t vshift = 32 - _ybits; + uint8_t ushift = vshift - _xbits; + int umask = ((1 << _xbits) - 1) << _ybits; + + #if 0 + // The "perfect" reference version of this routine. Pretty slow. + // Use it only to see how things are supposed to look. + i = 0; + do + { + double z = 1.f / iz; + + u = int64_t(uz*z) + pviewx; + v = int64_t(vz*z) + pviewy; + R_SetDSColorMapLight(tiltlighting[i], 0, 0); + fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; + iz += plane_sz[0]; + uz += plane_su[0]; + vz += plane_sv[0]; + } while (--width >= 0); + #else + //#define SPANSIZE 32 + //#define INVSPAN 0.03125f + //#define SPANSIZE 8 + //#define INVSPAN 0.125f + #define SPANSIZE 16 + #define INVSPAN 0.0625f + + double startz = 1.f / iz; + double startu = uz*startz; + double startv = vz*startz; + double izstep, uzstep, vzstep; + + izstep = plane_sz[0] * SPANSIZE; + uzstep = plane_su[0] * SPANSIZE; + vzstep = plane_sv[0] * SPANSIZE; + x1 = 0; + width++; + + while (width >= SPANSIZE) + { + iz += izstep; + uz += uzstep; + vz += vzstep; + + double endz = 1.f / iz; + double endu = uz*endz; + double endv = vz*endz; + uint32_t stepu = (uint32_t)int64_t((endu - startu) * INVSPAN); + uint32_t stepv = (uint32_t)int64_t((endv - startv) * INVSPAN); + u = (uint32_t)(int64_t(startu) + pviewx); + v = (uint32_t)(int64_t(startv) + pviewy); + + for (i = SPANSIZE - 1; i >= 0; i--) + { + fb[x1] = *(tiltlighting[x1] + _source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + startu = endu; + startv = endv; + width -= SPANSIZE; + } + if (width > 0) + { + if (width == 1) + { + u = (uint32_t)int64_t(startu); + v = (uint32_t)int64_t(startv); + fb[x1] = *(tiltlighting[x1] + _source[(v >> vshift) | ((u >> ushift) & umask)]); + } + else + { + double left = width; + iz += plane_sz[0] * left; + uz += plane_su[0] * left; + vz += plane_sv[0] * left; + + double endz = 1.f / iz; + double endu = uz*endz; + double endv = vz*endz; + left = 1.f / left; + uint32_t stepu = (uint32_t)int64_t((endu - startu) * left); + uint32_t stepv = (uint32_t)int64_t((endv - startv) * left); + u = (uint32_t)(int64_t(startu) + pviewx); + v = (uint32_t)(int64_t(startv) + pviewy); + + for (; width != 0; width--) + { + fb[x1] = *(tiltlighting[x1] + _source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + } + } + #endif + } + + // Calculates the lighting for one row of a tilted plane. If the definition + // of GETPALOOKUP changes, this needs to change, too. + void DrawTiltedSpanPalCommand::CalcTiltedLighting(double lval, double lend, int width, DrawerThread *thread) + { + const uint8_t **tiltlighting = thread->tiltlighting; + + double lstep; + uint8_t *lightfiller; + int i = 0; + + if (width == 0 || lval == lend) + { // Constant lighting + lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); + } + else + { + lstep = (lend - lval) / width; + if (lval >= MAXLIGHTVIS) + { // lval starts "too bright". + lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); + for (; i <= width && lval >= MAXLIGHTVIS; ++i) + { + tiltlighting[i] = lightfiller; + lval += lstep; + } + } + if (lend >= MAXLIGHTVIS) + { // lend ends "too bright". + lightfiller = basecolormapdata + (GETPALOOKUP(lend, planeshade) << COLORMAPSHIFT); + for (; width > i && lend >= MAXLIGHTVIS; --width) + { + tiltlighting[width] = lightfiller; + lend -= lstep; + } + } + if (width > 0) + { + lval = FIXED2DBL(planeshade) - lval; + lend = FIXED2DBL(planeshade) - lend; + lstep = (lend - lval) / width; + if (lstep < 0) + { // Going from dark to light + if (lval < 1.) + { // All bright + lightfiller = basecolormapdata; + } + else + { + if (lval >= NUMCOLORMAPS) + { // Starts beyond the dark end + uint8_t *clight = basecolormapdata + ((NUMCOLORMAPS - 1) << COLORMAPSHIFT); + while (lval >= NUMCOLORMAPS && i <= width) + { + tiltlighting[i++] = clight; + lval += lstep; + } + if (i > width) + return; + } + while (i <= width && lval >= 0) + { + tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); + lval += lstep; + } + lightfiller = basecolormapdata; + } + } + else + { // Going from light to dark + if (lval >= (NUMCOLORMAPS - 1)) + { // All dark + lightfiller = basecolormapdata + ((NUMCOLORMAPS - 1) << COLORMAPSHIFT); + } + else + { + while (lval < 0 && i <= width) + { + tiltlighting[i++] = basecolormapdata; + lval += lstep; + } + if (i > width) + return; + while (i <= width && lval < (NUMCOLORMAPS - 1)) + { + tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); + lval += lstep; + } + lightfiller = basecolormapdata + ((NUMCOLORMAPS - 1) << COLORMAPSHIFT); + } + } + } + } + for (; i <= width; i++) + { + tiltlighting[i] = lightfiller; + } + } + + ///////////////////////////////////////////////////////////////////////// + + DrawColoredSpanPalCommand::DrawColoredSpanPalCommand(int y, int x1, int x2) : y(y), x1(x1), x2(x2) + { + using namespace drawerargs; + color = ds_color; + destorg = dc_destorg; + } + + void DrawColoredSpanPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(y)) + return; + + memset(ylookup[y] + x1 + destorg, color, x2 - x1 + 1); + } + + ///////////////////////////////////////////////////////////////////////// + + DrawSlabPalCommand::DrawSlabPalCommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, const uint8_t *colormap) + : _dx(dx), _v(v), _dy(dy), _vi(vi), _vvptr(vptr), _p(p), _colormap(colormap) + { + using namespace drawerargs; + _pitch = dc_pitch; + _start_y = static_cast((p - dc_destorg) / dc_pitch); + } + + void DrawSlabPalCommand::Execute(DrawerThread *thread) + { + int count = _dy; + uint8_t *dest = _p; + int pitch = _pitch; + int width = _dx; + const uint8_t *colormap = _colormap; + const uint8_t *source = _vvptr; + fixed_t fracpos = _v; + fixed_t iscale = _vi; + + count = thread->count_for_thread(_start_y, count); + dest = thread->dest_for_thread(_start_y, pitch, dest); + fracpos += iscale * thread->skipped_by_thread(_start_y); + iscale *= thread->num_cores; + pitch *= thread->num_cores; + + while (count > 0) + { + uint8_t color = colormap[source[fracpos >> FRACBITS]]; + + for (int x = 0; x < width; x++) + dest[x] = color; + + dest += pitch; + fracpos += iscale; + count--; + } + } + + ///////////////////////////////////////////////////////////////////////// + + DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(int y, int x1, int x2) : y(y), x1(x1), x2(x2) + { + using namespace drawerargs; + _colormap = dc_colormap; + _destorg = dc_destorg; + } + + void DrawFogBoundaryLinePalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(y)) + return; + + const uint8_t *colormap = _colormap; + uint8_t *dest = ylookup[y] + _destorg; + int x = x1; + do + { + dest[x] = colormap[dest[x]]; + } while (++x <= x2); + } +} diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h new file mode 100644 index 0000000000..f2b1f05712 --- /dev/null +++ b/src/r_draw_pal.h @@ -0,0 +1,333 @@ + +#pragma once + +#include "r_draw.h" +#include "v_palette.h" +#include "r_thread.h" + +namespace swrenderer +{ + class PalWall1Command : public DrawerCommand + { + public: + PalWall1Command(); + FString DebugInfo() override { return "PalWallCommand"; } + + protected: + uint32_t _iscale; + uint32_t _texturefrac; + uint8_t *_colormap; + int _count; + const uint8_t *_source; + uint8_t *_dest; + int _vlinebits; + int _mvlinebits; + int _tmvlinebits; + int _pitch; + uint32_t *_srcblend; + uint32_t *_destblend; + }; + + class PalWall4Command : public DrawerCommand + { + public: + PalWall4Command(); + FString DebugInfo() override { return "PalWallCommand"; } + + protected: + uint8_t *_dest; + int _count; + int _pitch; + int _vlinebits; + int _mvlinebits; + int _tmvlinebits; + uint8_t *_palookupoffse[4]; + const uint8_t *_bufplce[4]; + uint32_t _vince[4]; + uint32_t _vplce[4]; + uint32_t *_srcblend; + uint32_t *_destblend; + }; + + class DrawWall1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWall4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallMasked1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallMasked4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallAdd1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallAdd4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallAddClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallAddClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallRevSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallRevSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + + class PalSkyCommand : public DrawerCommand + { + public: + PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom); + FString DebugInfo() override { return "PalSkyCommand"; } + + protected: + uint32_t solid_top; + uint32_t solid_bottom; + + uint8_t *_dest; + int _count; + int _pitch; + const uint8_t *_bufplce[4]; + const uint8_t *_bufplce2[4]; + int _bufheight[4]; + uint32_t _vince[4]; + uint32_t _vplce[4]; + }; + + class DrawSingleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; + class DrawSingleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; + class DrawDoubleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; + class DrawDoubleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; + + class PalColumnCommand : public DrawerCommand + { + public: + PalColumnCommand(); + FString DebugInfo() override { return "PalColumnCommand"; } + + protected: + int _count; + uint8_t *_dest; + int _pitch; + fixed_t _iscale; + fixed_t _texturefrac; + const uint8_t *_colormap; + const uint8_t *_source; + const uint8_t *_translation; + int _color; + uint32_t *_srcblend; + uint32_t *_destblend; + uint32_t _srccolor; + }; + + class DrawColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnAddClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnRevSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnTlatedAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnShadedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnAddClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnAddClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnSubClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnRevSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnRevSubClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + + class DrawFuzzColumnPalCommand : public DrawerCommand + { + public: + DrawFuzzColumnPalCommand(); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "DrawFuzzColumnPalCommand"; } + + private: + int _yl; + int _yh; + int _x; + uint8_t *_destorg; + int _pitch; + int _fuzzpos; + int _fuzzviewheight; + }; + + class PalSpanCommand : public DrawerCommand + { + public: + PalSpanCommand(); + FString DebugInfo() override { return "PalSpanCommand"; } + + protected: + const uint8_t *_source; + const uint8_t *_colormap; + dsfixed_t _xfrac; + dsfixed_t _yfrac; + int _y; + int _x1; + int _x2; + uint8_t *_destorg; + dsfixed_t _xstep; + dsfixed_t _ystep; + int _xbits; + int _ybits; + uint32_t *_srcblend; + uint32_t *_destblend; + int _color; + }; + + class DrawSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanMaskedPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanTranslucentPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanMaskedTranslucentPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanAddClampPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanMaskedAddClampPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class FillSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + + class DrawTiltedSpanPalCommand : public DrawerCommand + { + public: + DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "DrawTiltedSpanPalCommand"; } + + private: + void CalcTiltedLighting(double lval, double lend, int width, DrawerThread *thread); + + int y; + int x1; + int x2; + FVector3 plane_sz; + FVector3 plane_su; + FVector3 plane_sv; + bool plane_shade; + int planeshade; + float planelightfloat; + fixed_t pviewx; + fixed_t pviewy; + + const uint8_t *_colormap; + uint8_t *_destorg; + int _ybits; + int _xbits; + const uint8_t *_source; + uint8_t *basecolormapdata; + }; + + class DrawColoredSpanPalCommand : public PalSpanCommand + { + public: + DrawColoredSpanPalCommand(int y, int x1, int x2); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "DrawColoredSpanPalCommand"; } + + private: + int y; + int x1; + int x2; + int color; + uint8_t *destorg; + }; + + class DrawSlabPalCommand : public PalSpanCommand + { + public: + DrawSlabPalCommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, const uint8_t *colormap); + void Execute(DrawerThread *thread) override; + + private: + int _dx; + fixed_t _v; + int _dy; + fixed_t _vi; + const uint8_t *_vvptr; + uint8_t *_p; + const uint8_t *_colormap; + int _pitch; + int _start_y; + }; + + class DrawFogBoundaryLinePalCommand : public PalSpanCommand + { + public: + DrawFogBoundaryLinePalCommand(int y, int x1, int x2); + void Execute(DrawerThread *thread) override; + + private: + int y, x1, x2; + const uint8_t *_colormap; + uint8_t *_destorg; + }; + + class RtInitColsPalCommand : public DrawerCommand + { + public: + RtInitColsPalCommand(uint8_t *buff); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "RtInitColsPalCommand"; } + + private: + uint8_t *buff; + }; + + class PalColumnHorizCommand : public DrawerCommand + { + public: + PalColumnHorizCommand(); + + protected: + const uint8_t *_source; + fixed_t _iscale; + fixed_t _texturefrac; + int _count; + int _color; + int _x; + int _yl; + }; + + class DrawColumnHorizPalCommand : public PalColumnHorizCommand + { + public: + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "DrawColumnHorizPalCommand"; } + }; + + class FillColumnHorizPalCommand : public PalColumnHorizCommand + { + public: + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "FillColumnHorizPalCommand"; } + }; + + class PalRtCommand : public DrawerCommand + { + public: + PalRtCommand(int hx, int sx, int yl, int yh); + FString DebugInfo() override { return "PalRtCommand"; } + + protected: + int hx, sx, yl, yh; + uint8_t *_destorg; + int _pitch; + const uint8_t *_colormap; + const uint32_t *_srcblend; + const uint32_t *_destblend; + const uint8_t *_translation; + int _color; + }; + + class DrawColumnRt1CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; +} diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp deleted file mode 100644 index a4f581d12a..0000000000 --- a/src/r_drawt.cpp +++ /dev/null @@ -1,1118 +0,0 @@ -/* -** r_drawt.cpp -** Faster column drawers for modern processors -** -**--------------------------------------------------------------------------- -** Copyright 1998-2006 Randy Heit -** All rights reserved. -** -** Redistribution and use in source and binary forms, with or without -** modification, are permitted provided that the following conditions -** are met: -** -** 1. Redistributions of source code must retain the above copyright -** notice, this list of conditions and the following disclaimer. -** 2. Redistributions in binary form must reproduce the above copyright -** notice, this list of conditions and the following disclaimer in the -** documentation and/or other materials provided with the distribution. -** 3. The name of the author may not be used to endorse or promote products -** derived from this software without specific prior written permission. -** -** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**--------------------------------------------------------------------------- -** -** These functions stretch columns into a temporary buffer and then -** map them to the screen. On modern machines, this is faster than drawing -** them directly to the screen. -** -** Will I be able to even understand any of this if I come back to it later? -** Let's hope so. :-) -*/ - -#include "templates.h" -#include "doomtype.h" -#include "doomdef.h" -#include "r_defs.h" -#include "r_draw.h" -#include "r_main.h" -#include "r_things.h" -#include "v_video.h" - -// I should have commented this stuff better. -// -// dc_temp is the buffer R_DrawColumnHoriz writes into. -// dc_tspans points into it. -// dc_ctspan points into dc_tspans. -// horizspan also points into dc_tspans. - -// dc_ctspan is advanced while drawing into dc_temp. -// horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen. - -BYTE dc_tempbuff[MAXHEIGHT*4]; -BYTE *dc_temp; -unsigned int dc_tspans[4][MAXHEIGHT]; -unsigned int *dc_ctspan[4]; -unsigned int *horizspan[4]; - -#ifdef X86_ASM -extern "C" void R_SetupShadedCol(); -extern "C" void R_SetupAddCol(); -extern "C" void R_SetupAddClampCol(); -#endif - -// Copies one span at hx to the screen at sx. -void rt_copy1col (int hx, int sx, int yl, int yh) -{ - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - - if (count & 1) { - *dest = *source; - source += 4; - dest += pitch; - } - if (count & 2) { - dest[0] = source[0]; - dest[pitch] = source[4]; - source += 8; - dest += pitch*2; - } - if (!(count >>= 2)) - return; - - do { - dest[0] = source[0]; - dest[pitch] = source[4]; - dest[pitch*2] = source[8]; - dest[pitch*3] = source[12]; - source += 16; - dest += pitch*4; - } while (--count); -} - -// Copies all four spans to the screen starting at sx. -void rt_copy4cols (int sx, int yl, int yh) -{ - int *source; - int *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = (int *)(ylookup[yl] + sx + dc_destorg); - source = (int *)(&dc_temp[yl*4]); - pitch = dc_pitch/sizeof(int); - - if (count & 1) { - *dest = *source; - source += 4/sizeof(int); - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = source[0]; - dest[pitch] = source[4/sizeof(int)]; - source += 8/sizeof(int); - dest += pitch*2; - } while (--count); -} - -// Maps one span at hx to the screen at sx. -void rt_map1col (int hx, int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - - if (count & 1) { - *dest = colormap[*source]; - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = colormap[source[0]]; - dest[pitch] = colormap[source[4]]; - source += 8; - dest += pitch*2; - } while (--count); -} - -// Maps all four spans to the screen starting at sx. -void rt_map4cols (int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - - if (count & 1) { - dest[0] = colormap[source[0]]; - dest[1] = colormap[source[1]]; - dest[2] = colormap[source[2]]; - dest[3] = colormap[source[3]]; - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = colormap[source[0]]; - dest[1] = colormap[source[1]]; - dest[2] = colormap[source[2]]; - dest[3] = colormap[source[3]]; - dest[pitch] = colormap[source[4]]; - dest[pitch+1] = colormap[source[5]]; - dest[pitch+2] = colormap[source[6]]; - dest[pitch+3] = colormap[source[7]]; - source += 8; - dest += pitch*2; - } while (--count); -} - -void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) -{ - int count = yh - yl + 1; - BYTE *source = &dc_temp[yl*4 + hx]; - - // Things we do to hit the compiler's optimizer with a clue bat: - // 1. Parallelism is explicitly spelled out by using a separate - // C instruction for each assembly instruction. GCC lets me - // have four temporaries, but VC++ spills to the stack with - // more than two. Two is probably optimal, anyway. - // 2. The results of the translation lookups are explicitly - // stored in byte-sized variables. This causes the VC++ code - // to use byte mov instructions in most cases; for apparently - // random reasons, it will use movzx for some places. GCC - // ignores this and uses movzx always. - - // Do 8 rows at a time. - for (int count8 = count >> 3; count8; --count8) - { - int c0, c1; - BYTE b0, b1; - - c0 = source[0]; c1 = source[4]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[4] = b1; - - c0 = source[8]; c1 = source[12]; - b0 = translation[c0]; b1 = translation[c1]; - source[8] = b0; source[12] = b1; - - c0 = source[16]; c1 = source[20]; - b0 = translation[c0]; b1 = translation[c1]; - source[16] = b0; source[20] = b1; - - c0 = source[24]; c1 = source[28]; - b0 = translation[c0]; b1 = translation[c1]; - source[24] = b0; source[28] = b1; - - source += 32; - } - // Finish by doing 1 row at a time. - for (count &= 7; count; --count, source += 4) - { - source[0] = translation[source[0]]; - } -} - -void rt_Translate4cols(const BYTE *translation, int yl, int yh) -{ - int count = yh - yl + 1; - BYTE *source = &dc_temp[yl*4]; - int c0, c1; - BYTE b0, b1; - - // Do 2 rows at a time. - for (int count8 = count >> 1; count8; --count8) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - - c0 = source[4]; c1 = source[5]; - b0 = translation[c0]; b1 = translation[c1]; - source[4] = b0; source[5] = b1; - - c0 = source[6]; c1 = source[7]; - b0 = translation[c0]; b1 = translation[c1]; - source[6] = b0; source[7] = b1; - - source += 8; - } - // Do the final row if count was odd. - if (count & 1) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - } -} - -// Translates one span at hx to the screen at sx. -void rt_tlate1col (int hx, int sx, int yl, int yh) -{ - rt_Translate1col(dc_translation, hx, yl, yh); - rt_map1col(hx, sx, yl, yh); -} - -// Translates all four spans to the screen starting at sx. -void rt_tlate4cols (int sx, int yl, int yh) -{ - rt_Translate4cols(dc_translation, yl, yh); - rt_map4cols(sx, yl, yh); -} - -// Adds one span at hx to the screen at sx without clamping. -void rt_add1col (int hx, int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD fg = colormap[*source]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - source += 4; - dest += pitch; - } while (--count); -} - -// Adds all four spans to the screen starting at sx without clamping. -void rt_add4cols_c (int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD fg = colormap[source[0]]; - DWORD bg = dest[0]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[0] = RGB32k.All[fg & (fg>>15)]; - - fg = colormap[source[1]]; - bg = dest[1]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[1] = RGB32k.All[fg & (fg>>15)]; - - - fg = colormap[source[2]]; - bg = dest[2]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[2] = RGB32k.All[fg & (fg>>15)]; - - fg = colormap[source[3]]; - bg = dest[3]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[3] = RGB32k.All[fg & (fg>>15)]; - - source += 4; - dest += pitch; - } while (--count); -} - -// Translates and adds one span at hx to the screen at sx without clamping. -void rt_tlateadd1col (int hx, int sx, int yl, int yh) -{ - rt_Translate1col(dc_translation, hx, yl, yh); - rt_add1col(hx, sx, yl, yh); -} - -// Translates and adds all four spans to the screen starting at sx without clamping. -void rt_tlateadd4cols (int sx, int yl, int yh) -{ - rt_Translate4cols(dc_translation, yl, yh); - rt_add4cols(sx, yl, yh); -} - -// Shades one span at hx to the screen at sx. -void rt_shaded1col (int hx, int sx, int yl, int yh) -{ - DWORD *fgstart; - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - fgstart = &Col2RGB8[0][dc_color]; - colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - - do { - DWORD val = colormap[*source]; - DWORD fg = fgstart[val<<8]; - val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; - *dest = RGB32k.All[val & (val>>15)]; - source += 4; - dest += pitch; - } while (--count); -} - -// Shades all four spans to the screen starting at sx. -void rt_shaded4cols_c (int sx, int yl, int yh) -{ - DWORD *fgstart; - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - fgstart = &Col2RGB8[0][dc_color]; - colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - - do { - DWORD val; - - val = colormap[source[0]]; - val = (Col2RGB8[64-val][dest[0]] + fgstart[val<<8]) | 0x1f07c1f; - dest[0] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[1]]; - val = (Col2RGB8[64-val][dest[1]] + fgstart[val<<8]) | 0x1f07c1f; - dest[1] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[2]]; - val = (Col2RGB8[64-val][dest[2]] + fgstart[val<<8]) | 0x1f07c1f; - dest[2] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[3]]; - val = (Col2RGB8[64-val][dest[3]] + fgstart[val<<8]) | 0x1f07c1f; - dest[3] = RGB32k.All[val & (val>>15)]; - - source += 4; - dest += pitch; - } while (--count); -} - -// Adds one span at hx to the screen at sx with clamping. -void rt_addclamp1col (int hx, int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[(a>>15) & a]; - source += 4; - dest += pitch; - } while (--count); -} - -// Adds all four spans to the screen starting at sx with clamping. -void rt_addclamp4cols_c (int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[1]]] + bg2rgb[dest[1]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[2]]] + bg2rgb[dest[2]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[3]]] + bg2rgb[dest[3]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[3] = RGB32k.All[(a>>15) & a]; - - source += 4; - dest += pitch; - } while (--count); -} - -// Translates and adds one span at hx to the screen at sx with clamping. -void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh) -{ - rt_Translate1col(dc_translation, hx, yl, yh); - rt_addclamp1col(hx, sx, yl, yh); -} - -// Translates and adds all four spans to the screen starting at sx with clamping. -void rt_tlateaddclamp4cols (int sx, int yl, int yh) -{ - rt_Translate4cols(dc_translation, yl, yh); - rt_addclamp4cols(sx, yl, yh); -} - -// Subtracts one span at hx to the screen at sx with clamping. -void rt_subclamp1col (int hx, int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - source += 4; - dest += pitch; - } while (--count); -} - -// Subtracts all four spans to the screen starting at sx with clamping. -void rt_subclamp4cols (int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[1]]] | 0x40100400) - bg2rgb[dest[1]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[2]]] | 0x40100400) - bg2rgb[dest[2]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[3]]] | 0x40100400) - bg2rgb[dest[3]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[3] = RGB32k.All[(a>>15) & a]; - - source += 4; - dest += pitch; - } while (--count); -} - -// Translates and subtracts one span at hx to the screen at sx with clamping. -void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh) -{ - rt_Translate1col(dc_translation, hx, yl, yh); - rt_subclamp1col(hx, sx, yl, yh); -} - -// Translates and subtracts all four spans to the screen starting at sx with clamping. -void rt_tlatesubclamp4cols (int sx, int yl, int yh) -{ - rt_Translate4cols(dc_translation, yl, yh); - rt_subclamp4cols(sx, yl, yh); -} - -// Subtracts one span at hx from the screen at sx with clamping. -void rt_revsubclamp1col (int hx, int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - source += 4; - dest += pitch; - } while (--count); -} - -// Subtracts all four spans from the screen starting at sx with clamping. -void rt_revsubclamp4cols (int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[1]] | 0x40100400) - fg2rgb[colormap[source[1]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[2]] | 0x40100400) - fg2rgb[colormap[source[2]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[3]] | 0x40100400) - fg2rgb[colormap[source[3]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[3] = RGB32k.All[(a>>15) & a]; - - source += 4; - dest += pitch; - } while (--count); -} - -// Translates and subtracts one span at hx from the screen at sx with clamping. -void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh) -{ - rt_Translate1col(dc_translation, hx, yl, yh); - rt_revsubclamp1col(hx, sx, yl, yh); -} - -// Translates and subtracts all four spans from the screen starting at sx with clamping. -void rt_tlaterevsubclamp4cols (int sx, int yl, int yh) -{ - rt_Translate4cols(dc_translation, yl, yh); - rt_revsubclamp4cols(sx, yl, yh); -} - -// Reorder the posts so that they get drawn top-to-bottom instead of bottom-to-top. -void rt_flip_posts() -{ - unsigned int *front = horizspan[dc_x & 3]; - unsigned int *back = dc_ctspan[dc_x & 3] - 2; - - while (front < back) - { - swapvalues(front[0], back[0]); - swapvalues(front[1], back[1]); - front += 2; - back -= 2; - } -} - -// Copies all spans in all four columns to the screen starting at sx. -// sx should be dword-aligned. -void rt_draw4cols (int sx) -{ - int x, bad; - unsigned int maxtop, minbot, minnexttop; - - // Place a dummy "span" in each column. These don't get - // drawn. They're just here to avoid special cases in the - // max/min calculations below. - for (x = 0; x < 4; ++x) - { - dc_ctspan[x][0] = screen->GetHeight()+1; - dc_ctspan[x][1] = screen->GetHeight(); - } - -#ifdef X86_ASM - // Setup assembly routines for changed colormaps or other parameters. - if (hcolfunc_post4 == rt_shaded4cols) - { - R_SetupShadedCol(); - } - else if (hcolfunc_post4 == rt_addclamp4cols || hcolfunc_post4 == rt_tlateaddclamp4cols) - { - R_SetupAddClampCol(); - } - else if (hcolfunc_post4 == rt_add4cols || hcolfunc_post4 == rt_tlateadd4cols) - { - R_SetupAddCol(); - } -#endif - - for (;;) - { - // If a column is out of spans, mark it as such - bad = 0; - minnexttop = 0xffffffff; - for (x = 0; x < 4; ++x) - { - if (horizspan[x] >= dc_ctspan[x]) - { - bad |= 1 << x; - } - else if ((horizspan[x]+2)[0] < minnexttop) - { - minnexttop = (horizspan[x]+2)[0]; - } - } - // Once all columns are out of spans, we're done - if (bad == 15) - { - return; - } - - // Find the largest shared area for the spans in each column - maxtop = MAX (MAX (horizspan[0][0], horizspan[1][0]), - MAX (horizspan[2][0], horizspan[3][0])); - minbot = MIN (MIN (horizspan[0][1], horizspan[1][1]), - MIN (horizspan[2][1], horizspan[3][1])); - - // If there is no shared area with these spans, draw each span - // individually and advance to the next spans until we reach a shared area. - // However, only draw spans down to the highest span in the next set of - // spans. If we allow the entire height of a span to be drawn, it could - // prevent any more shared areas from being drawn in these four columns. - // - // Example: Suppose we have the following arrangement: - // A CD - // A CD - // B D - // B D - // aB D - // aBcD - // aBcD - // aBc - // - // If we draw the entire height of the spans, we end up drawing this first: - // A CD - // A CD - // B D - // B D - // B D - // B D - // B D - // B D - // B - // - // This leaves only the "a" and "c" columns to be drawn, and they are not - // part of a shared area, but if we can include B and D with them, we can - // get a shared area. So we cut off everything in the first set just - // above the "a" column and end up drawing this first: - // A CD - // A CD - // B D - // B D - // - // Then the next time through, we have the following arrangement with an - // easily shared area to draw: - // aB D - // aBcD - // aBcD - // aBc - if (bad != 0 || maxtop > minbot) - { - int drawcount = 0; - for (x = 0; x < 4; ++x) - { - if (!(bad & 1)) - { - if (horizspan[x][1] < minnexttop) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], horizspan[x][1]); - horizspan[x] += 2; - drawcount++; - } - else if (minnexttop > horizspan[x][0]) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], minnexttop-1); - horizspan[x][0] = minnexttop; - drawcount++; - } - } - bad >>= 1; - } - // Drawcount *should* always be non-zero. The reality is that some situations - // can make this not true. Unfortunately, I'm not sure what those situations are. - if (drawcount == 0) - { - return; - } - continue; - } - - // Draw any span fragments above the shared area. - for (x = 0; x < 4; ++x) - { - if (maxtop > horizspan[x][0]) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], maxtop-1); - } - } - - // Draw the shared area. - hcolfunc_post4 (sx, maxtop, minbot); - - // For each column, if part of the span is past the shared area, - // set its top to just below the shared area. Otherwise, advance - // to the next span in that column. - for (x = 0; x < 4; ++x) - { - if (minbot < horizspan[x][1]) - { - horizspan[x][0] = minbot+1; - } - else - { - horizspan[x] += 2; - } - } - } -} - -// Before each pass through a rendering loop that uses these routines, -// call this function to set up the span pointers. -void rt_initcols (BYTE *buff) -{ - int y; - - dc_temp = buff == NULL ? dc_tempbuff : buff; - for (y = 3; y >= 0; y--) - horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; -} - -// Stretches a column into a temporary buffer which is later -// drawn to the screen along with up to three other columns. -void R_DrawColumnHorizP_C (void) -{ - int count = dc_count; - BYTE *dest; - fixed_t fracstep; - fixed_t frac; - - if (count <= 0) - return; - - { - int x = dc_x & 3; - unsigned int **span; - - span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - dest = &dc_temp[x + 4*dc_yl]; - } - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - - if (count & 1) { - *dest = source[frac>>FRACBITS]; dest += 4; frac += fracstep; - } - if (count & 2) { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest += 8; - } - if (count & 4) { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest[8] = source[frac>>FRACBITS]; frac += fracstep; - dest[12]= source[frac>>FRACBITS]; frac += fracstep; - dest += 16; - } - count >>= 3; - if (!count) return; - - do - { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest[8] = source[frac>>FRACBITS]; frac += fracstep; - dest[12]= source[frac>>FRACBITS]; frac += fracstep; - dest[16]= source[frac>>FRACBITS]; frac += fracstep; - dest[20]= source[frac>>FRACBITS]; frac += fracstep; - dest[24]= source[frac>>FRACBITS]; frac += fracstep; - dest[28]= source[frac>>FRACBITS]; frac += fracstep; - dest += 32; - } while (--count); - } -} - -// [RH] Just fills a column with a given color -void R_FillColumnHorizP (void) -{ - int count = dc_count; - BYTE color = dc_color; - BYTE *dest; - - if (count <= 0) - return; - - { - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - dest = &dc_temp[x + 4*dc_yl]; - } - - if (count & 1) { - *dest = color; - dest += 4; - } - if (!(count >>= 1)) - return; - do { - dest[0] = color; dest[4] = color; - dest += 8; - } while (--count); -} diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp new file mode 100644 index 0000000000..3356592d25 --- /dev/null +++ b/src/r_drawt_pal.cpp @@ -0,0 +1,867 @@ +/* +** r_drawt.cpp +** Faster column drawers for modern processors +** +**--------------------------------------------------------------------------- +** Copyright 1998-2006 Randy Heit +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +** These functions stretch columns into a temporary buffer and then +** map them to the screen. On modern machines, this is faster than drawing +** them directly to the screen. +** +** Will I be able to even understand any of this if I come back to it later? +** Let's hope so. :-) +*/ + +#include "templates.h" +#include "doomtype.h" +#include "doomdef.h" +#include "r_defs.h" +#include "r_draw.h" +#include "r_main.h" +#include "r_things.h" +#include "v_video.h" +#include "r_draw_pal.h" + +// I should have commented this stuff better. +// +// dc_temp is the buffer R_DrawColumnHoriz writes into. +// dc_tspans points into it. +// dc_ctspan points into dc_tspans. +// horizspan also points into dc_tspans. + +// dc_ctspan is advanced while drawing into dc_temp. +// horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen. + +namespace swrenderer +{ + RtInitColsPalCommand::RtInitColsPalCommand(uint8_t *buff) : buff(buff) + { + } + + void RtInitColsPalCommand::Execute(DrawerThread *thread) + { + thread->dc_temp = buff == nullptr ? thread->dc_temp_buff : buff; + } + + ///////////////////////////////////////////////////////////////////// + + PalColumnHorizCommand::PalColumnHorizCommand() + { + using namespace drawerargs; + + _source = dc_source; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _color = dc_color; + _x = dc_x; + _yl = dc_yl; + } + + void DrawColumnHorizPalCommand::Execute(DrawerThread *thread) + { + int count = _count; + uint8_t *dest; + fixed_t fracstep; + fixed_t frac; + + count = thread->count_for_thread(_yl, count); + if (count <= 0) + return; + + fracstep = _iscale; + frac = _texturefrac; + + const uint8_t *source = _source; + + int x = _x & 3; + dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4]; + frac += fracstep * thread->skipped_by_thread(_yl); + fracstep *= thread->num_cores; + + if (count & 1) { + *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; + } + if (count & 2) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest += 8; + } + if (count & 4) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest += 16; + } + count >>= 3; + if (!count) return; + + do + { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest[16] = source[frac >> FRACBITS]; frac += fracstep; + dest[20] = source[frac >> FRACBITS]; frac += fracstep; + dest[24] = source[frac >> FRACBITS]; frac += fracstep; + dest[28] = source[frac >> FRACBITS]; frac += fracstep; + dest += 32; + } while (--count); + } + + void FillColumnHorizPalCommand::Execute(DrawerThread *thread) + { + int count = _count; + uint8_t color = _color; + uint8_t *dest; + + count = thread->count_for_thread(_yl, count); + if (count <= 0) + return; + + int x = _x & 3; + dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4]; + + if (count & 1) { + *dest = color; + dest += 4; + } + if (!(count >>= 1)) + return; + do { + dest[0] = color; dest[4] = color; + dest += 8; + } while (--count); + } + + ///////////////////////////////////////////////////////////////////// + + PalRtCommand::PalRtCommand(int hx, int sx, int yl, int yh) : hx(hx), sx(sx), yl(yl), yh(yh) + { + using namespace drawerargs; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; + _srcblend = dc_srcblend; + _destblend = dc_destblend; + _translation = dc_translation; + _color = dc_color; + } + + void DrawColumnRt1CopyPalCommand::Execute(DrawerThread *thread) + { + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh - yl + 1; + + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + + if (count & 1) { + *dest = *source; + source += 4; + dest += pitch; + } + if (count & 2) { + dest[0] = source[0]; + dest[pitch] = source[4]; + source += 8; + dest += pitch*2; + } + if (!(count >>= 2)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4]; + dest[pitch*2] = source[8]; + dest[pitch*3] = source[12]; + source += 16; + dest += pitch*4; + } while (--count); + } + + void DrawColumnRt4CopyPalCommand::Execute(DrawerThread *thread) + { + int *source; + int *dest; + int count; + int pitch; + + count = yh - yl + 1; + + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + dest = (int *)(ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg); + source = (int *)(&thread->dc_temp[thread->temp_line_for_thread(yl)*4]); + pitch = _pitch*thread->num_cores/sizeof(int); + + if (count & 1) { + *dest = *source; + source += 4/sizeof(int); + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4/sizeof(int)]; + source += 8/sizeof(int); + dest += pitch*2; + } while (--count); + } + + void DrawColumnRt1PalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh - yl + 1; + + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + colormap = _colormap; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl) *4 + hx]; + pitch = _pitch*thread->num_cores; + + if (count & 1) { + *dest = colormap[*source]; + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = colormap[source[0]]; + dest[pitch] = colormap[source[4]]; + source += 8; + dest += pitch*2; + } while (--count); + } + + void DrawColumnRt4PalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh - yl + 1; + + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + colormap = _colormap; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch*thread->num_cores; + + if (count & 1) { + dest[0] = colormap[source[0]]; + dest[1] = colormap[source[1]]; + dest[2] = colormap[source[2]]; + dest[3] = colormap[source[3]]; + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = colormap[source[0]]; + dest[1] = colormap[source[1]]; + dest[2] = colormap[source[2]]; + dest[3] = colormap[source[3]]; + dest[pitch] = colormap[source[4]]; + dest[pitch+1] = colormap[source[5]]; + dest[pitch+2] = colormap[source[6]]; + dest[pitch+3] = colormap[source[7]]; + source += 8; + dest += pitch*2; + } while (--count); + } + + void DrawColumnRt1TranslatedPalCommand::Execute(DrawerThread *thread) + { + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + const uint8_t *translation = _translation; + + // Things we do to hit the compiler's optimizer with a clue bat: + // 1. Parallelism is explicitly spelled out by using a separate + // C instruction for each assembly instruction. GCC lets me + // have four temporaries, but VC++ spills to the stack with + // more than two. Two is probably optimal, anyway. + // 2. The results of the translation lookups are explicitly + // stored in byte-sized variables. This causes the VC++ code + // to use byte mov instructions in most cases; for apparently + // random reasons, it will use movzx for some places. GCC + // ignores this and uses movzx always. + + // Do 8 rows at a time. + for (int count8 = count >> 3; count8; --count8) + { + int c0, c1; + uint8_t b0, b1; + + c0 = source[0]; c1 = source[4]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[4] = b1; + + c0 = source[8]; c1 = source[12]; + b0 = translation[c0]; b1 = translation[c1]; + source[8] = b0; source[12] = b1; + + c0 = source[16]; c1 = source[20]; + b0 = translation[c0]; b1 = translation[c1]; + source[16] = b0; source[20] = b1; + + c0 = source[24]; c1 = source[28]; + b0 = translation[c0]; b1 = translation[c1]; + source[24] = b0; source[28] = b1; + + source += 32; + } + // Finish by doing 1 row at a time. + for (count &= 7; count; --count, source += 4) + { + source[0] = translation[source[0]]; + } + } + + void DrawColumnRt4TranslatedPalCommand::Execute(DrawerThread *thread) + { + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + const uint8_t *translation = _translation; + int c0, c1; + uint8_t b0, b1; + + // Do 2 rows at a time. + for (int count8 = count >> 1; count8; --count8) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + + c0 = source[4]; c1 = source[5]; + b0 = translation[c0]; b1 = translation[c1]; + source[4] = b0; source[5] = b1; + + c0 = source[6]; c1 = source[7]; + b0 = translation[c0]; b1 = translation[c1]; + source[6] = b0; source[7] = b1; + + source += 8; + } + // Do the final row if count was odd. + if (count & 1) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + } + } + + void DrawColumnRt1AddPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t fg = colormap[*source]; + uint32_t bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg>>15)]; + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt4AddPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t fg = colormap[source[0]]; + uint32_t bg = dest[0]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[0] = RGB32k.All[fg & (fg>>15)]; + + fg = colormap[source[1]]; + bg = dest[1]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[1] = RGB32k.All[fg & (fg>>15)]; + + + fg = colormap[source[2]]; + bg = dest[2]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[2] = RGB32k.All[fg & (fg>>15)]; + + fg = colormap[source[3]]; + bg = dest[3]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[3] = RGB32k.All[fg & (fg>>15)]; + + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt1ShadedPalCommand::Execute(DrawerThread *thread) + { + uint32_t *fgstart; + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + fgstart = &Col2RGB8[0][_color]; + colormap = _colormap; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + + do { + uint32_t val = colormap[*source]; + uint32_t fg = fgstart[val<<8]; + val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; + *dest = RGB32k.All[val & (val>>15)]; + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt4ShadedPalCommand::Execute(DrawerThread *thread) + { + uint32_t *fgstart; + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + fgstart = &Col2RGB8[0][_color]; + colormap = _colormap; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; + + do { + uint32_t val; + + val = colormap[source[0]]; + val = (Col2RGB8[64-val][dest[0]] + fgstart[val<<8]) | 0x1f07c1f; + dest[0] = RGB32k.All[val & (val>>15)]; + + val = colormap[source[1]]; + val = (Col2RGB8[64-val][dest[1]] + fgstart[val<<8]) | 0x1f07c1f; + dest[1] = RGB32k.All[val & (val>>15)]; + + val = colormap[source[2]]; + val = (Col2RGB8[64-val][dest[2]] + fgstart[val<<8]) | 0x1f07c1f; + dest[2] = RGB32k.All[val & (val>>15)]; + + val = colormap[source[3]]; + val = (Col2RGB8[64-val][dest[3]] + fgstart[val<<8]) | 0x1f07c1f; + dest[3] = RGB32k.All[val & (val>>15)]; + + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt1AddClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt4AddClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + + do { + uint32_t a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[1]]] + bg2rgb[dest[1]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[2]]] + bg2rgb[dest[2]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[3]]] + bg2rgb[dest[3]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt1SubClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt4SubClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[1]]] | 0x40100400) - bg2rgb[dest[1]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[2]]] | 0x40100400) - bg2rgb[dest[2]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[3]]] | 0x40100400) - bg2rgb[dest[3]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt1RevSubClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt4RevSubClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[1]] | 0x40100400) - fg2rgb[colormap[source[1]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[2]] | 0x40100400) - fg2rgb[colormap[source[2]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[3]] | 0x40100400) - fg2rgb[colormap[source[3]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); + } +} diff --git a/src/r_main.cpp b/src/r_main.cpp index c69c22c7ba..4bf15d4b99 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -58,6 +58,38 @@ #include "v_font.h" #include "r_data/colormaps.h" #include "p_maputl.h" +#include "r_thread.h" + +CVAR (String, r_viewsize, "", CVAR_NOSET) +CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) + +CUSTOM_CVAR (Int, r_columnmethod, 1, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) +{ + if (self != 0 && self != 1) + { + self = 1; + } + else + { // Trigger the change + setsizeneeded = true; + } +} + +CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE) +CVAR(Bool, r_highlight_portals, false, CVAR_ARCHIVE) + +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) + +extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; +extern cycle_t FrameCycles; + +extern bool r_showviewer; + +cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; + +namespace swrenderer +{ + using namespace drawerargs; // MACROS ------------------------------------------------------------------ @@ -86,9 +118,8 @@ static void R_ShutdownRenderer(); extern short *openings; extern bool r_fakingunderwater; -extern "C" int fuzzviewheight; +extern int fuzzviewheight; extern subsector_t *InSubsector; -extern bool r_showviewer; // PRIVATE DATA DECLARATIONS ----------------------------------------------- @@ -100,9 +131,6 @@ bool r_dontmaplines; // PUBLIC DATA DEFINITIONS ------------------------------------------------- -CVAR (String, r_viewsize, "", CVAR_NOSET) -CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) - double r_BaseVisibility; double r_WallVisibility; double r_FloorVisibility; @@ -157,8 +185,6 @@ void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); void (*hcolfunc_post4) (int sx, int yl, int yh); -cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; - // PRIVATE DATA DEFINITIONS ------------------------------------------------ static int lastcenteryfrac; @@ -361,26 +387,6 @@ void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, R_SetVisibility(R_GetVisibility()); } -//========================================================================== -// -// CVAR r_columnmethod -// -// Selects which version of the seg renderers to use. -// -//========================================================================== - -CUSTOM_CVAR (Int, r_columnmethod, 1, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) -{ - if (self != 0 && self != 1) - { - self = 1; - } - else - { // Trigger the change - setsizeneeded = true; - } -} - //========================================================================== // // R_Init @@ -455,8 +461,6 @@ void R_CopyStackedViewParameters() // //========================================================================== -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) - void R_SetupColormap(player_t *player) { realfixedcolormap = NULL; @@ -574,9 +578,6 @@ void R_SetupFreelook() // //========================================================================== -CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE) -CVAR(Bool, r_highlight_portals, false, CVAR_ARCHIVE) - void R_HighlightPortal (PortalDrawseg* pds) { // [ZZ] NO OVERFLOW CHECKS HERE @@ -811,9 +812,6 @@ void R_SetupBuffer () { dc_pitch = pitch; R_InitFuzzTable (pitch); -#if defined(X86_ASM) || defined(X64_ASM) - ASM_PatchPitch (); -#endif } dc_destorg = lineptr; for (int i = 0; i < RenderTarget->GetHeight(); i++) @@ -853,10 +851,10 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // [RH] Show off segs if r_drawflat is 1 if (r_drawflat) { - hcolfunc_pre = R_FillColumnHorizP; + hcolfunc_pre = R_FillColumnHoriz; hcolfunc_post1 = rt_copy1col; hcolfunc_post4 = rt_copy4cols; - colfunc = R_FillColumnP; + colfunc = R_FillColumn; spanfunc = R_FillSpan; } else @@ -950,6 +948,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, { const bool savedviewactive = viewactive; + R_BeginDrawerCommands(); + viewwidth = width; RenderTarget = canvas; bRenderingToCanvas = true; @@ -961,6 +961,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, R_RenderActorView (actor, dontmaplines); + R_EndDrawerCommands(); + RenderTarget = screen; bRenderingToCanvas = false; R_ExecuteSetViewSize (); @@ -991,8 +993,6 @@ void R_MultiresInit () // Displays statistics about rendering times // //========================================================================== -extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; -extern cycle_t FrameCycles; ADD_STAT (fps) { @@ -1072,3 +1072,5 @@ CCMD (clearscancycles) bestscancycles = HUGE_VAL; } #endif + +} \ No newline at end of file diff --git a/src/r_main.h b/src/r_main.h index 24103393d4..87b56163b0 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -28,23 +28,26 @@ #include "v_palette.h" #include "r_data/colormaps.h" +extern double ViewCos; +extern double ViewSin; +extern int viewwindowx; +extern int viewwindowy; typedef BYTE lighttable_t; // This could be wider for >8 bit display. +namespace swrenderer +{ + // // POV related. // extern bool bRenderingToCanvas; -extern double ViewCos; -extern double ViewSin; extern fixed_t viewingrangerecip; extern double FocalLengthX, FocalLengthY; extern double InvZtoScale; extern double WallTMapScale2; -extern int viewwindowx; -extern int viewwindowy; extern double CenterX; extern double CenterY; @@ -142,5 +145,6 @@ extern DAngle stacked_angle; extern void R_CopyStackedViewParameters(); +} #endif // __R_MAIN_H__ diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 810aa0003c..8a5ee2263a 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -63,10 +63,14 @@ #pragma warning(disable:4244) #endif +CVAR(Bool, tilt, false, 0); +CVAR(Bool, r_skyboxes, true, 0) + EXTERN_CVAR(Int, r_skymode) -//EXTERN_CVAR (Int, tx) -//EXTERN_CVAR (Int, ty) +namespace swrenderer +{ + using namespace drawerargs; extern subsector_t *InSubsector; @@ -132,15 +136,12 @@ extern "C" { // spanend holds the end of a plane span in each screen row // short spanend[MAXHEIGHT]; -BYTE *tiltlighting[MAXWIDTH]; int planeshade; FVector3 plane_sz, plane_su, plane_sv; float planelightfloat; bool plane_shade; fixed_t pviewx, pviewy; - -void R_DrawTiltedPlane_ASM (int y, int x1); } float yslope[MAXHEIGHT]; @@ -148,13 +149,6 @@ static fixed_t xscale, yscale; static double xstepscale, ystepscale; static double basexfrac, baseyfrac; -#ifdef X86_ASM -extern "C" void R_SetSpanSource_ASM (const BYTE *flat); -extern "C" void R_SetSpanSize_ASM (int xbits, int ybits); -extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); -extern "C" void R_SetTiltedSpanSource_ASM (const BYTE *flat); -extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; -#endif void R_DrawSinglePlane (visplane_t *, fixed_t alpha, bool additive, bool masked); //========================================================================== @@ -249,11 +243,6 @@ void R_MapPlane (int y, int x1) GlobVis * fabs(CenterY - y), planeshade) << COLORMAPSHIFT); } -#ifdef X86_ASM - if (ds_colormap != ds_curcolormap) - R_SetSpanColormap_ASM (ds_colormap); -#endif - ds_y = y; ds_x1 = x1; ds_x2 = x2; @@ -261,239 +250,15 @@ void R_MapPlane (int y, int x1) spanfunc (); } -//========================================================================== -// -// R_CalcTiltedLighting -// -// Calculates the lighting for one row of a tilted plane. If the definition -// of GETPALOOKUP changes, this needs to change, too. -// -//========================================================================== - -extern "C" { -void R_CalcTiltedLighting (double lval, double lend, int width) -{ - double lstep; - BYTE *lightfiller; - BYTE *basecolormapdata = basecolormap->Maps; - int i = 0; - - if (width == 0 || lval == lend) - { // Constant lighting - lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); - } - else - { - lstep = (lend - lval) / width; - if (lval >= MAXLIGHTVIS) - { // lval starts "too bright". - lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); - for (; i <= width && lval >= MAXLIGHTVIS; ++i) - { - tiltlighting[i] = lightfiller; - lval += lstep; - } - } - if (lend >= MAXLIGHTVIS) - { // lend ends "too bright". - lightfiller = basecolormapdata + (GETPALOOKUP(lend, planeshade) << COLORMAPSHIFT); - for (; width > i && lend >= MAXLIGHTVIS; --width) - { - tiltlighting[width] = lightfiller; - lend -= lstep; - } - } - if (width > 0) - { - lval = FIXED2DBL(planeshade) - lval; - lend = FIXED2DBL(planeshade) - lend; - lstep = (lend - lval) / width; - if (lstep < 0) - { // Going from dark to light - if (lval < 1.) - { // All bright - lightfiller = basecolormapdata; - } - else - { - if (lval >= NUMCOLORMAPS) - { // Starts beyond the dark end - BYTE *clight = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - while (lval >= NUMCOLORMAPS && i <= width) - { - tiltlighting[i++] = clight; - lval += lstep; - } - if (i > width) - return; - } - while (i <= width && lval >= 0) - { - tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); - lval += lstep; - } - lightfiller = basecolormapdata; - } - } - else - { // Going from light to dark - if (lval >= (NUMCOLORMAPS-1)) - { // All dark - lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - } - else - { - while (lval < 0 && i <= width) - { - tiltlighting[i++] = basecolormapdata; - lval += lstep; - } - if (i > width) - return; - while (i <= width && lval < (NUMCOLORMAPS-1)) - { - tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); - lval += lstep; - } - lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - } - } - } - } - for (; i <= width; i++) - { - tiltlighting[i] = lightfiller; - } -} -} // extern "C" - //========================================================================== // // R_MapTiltedPlane // //========================================================================== -void R_MapTiltedPlane(int y, int x1) +void R_MapTiltedPlane (int y, int x1) { - int x2 = spanend[y]; - int width = x2 - x1; - double iz, uz, vz; - BYTE *fb; - DWORD u, v; - int i; - - iz = plane_sz[2] + plane_sz[1] * (centery - y) + plane_sz[0] * (x1 - centerx); - - // Lighting is simple. It's just linear interpolation from start to end - if (plane_shade) - { - uz = (iz + plane_sz[0] * width) * planelightfloat; - vz = iz * planelightfloat; - R_CalcTiltedLighting(vz, uz, width); - } - - uz = plane_su[2] + plane_su[1] * (centery - y) + plane_su[0] * (x1 - centerx); - vz = plane_sv[2] + plane_sv[1] * (centery - y) + plane_sv[0] * (x1 - centerx); - - fb = ylookup[y] + x1 + dc_destorg; - - BYTE vshift = 32 - ds_ybits; - BYTE ushift = vshift - ds_xbits; - int umask = ((1 << ds_xbits) - 1) << ds_ybits; - -#if 0 // The "perfect" reference version of this routine. Pretty slow. - // Use it only to see how things are supposed to look. - i = 0; - do - { - double z = 1.f/iz; - - u = SQWORD(uz*z) + pviewx; - v = SQWORD(vz*z) + pviewy; - ds_colormap = tiltlighting[i]; - fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; - iz += plane_sz[0]; - uz += plane_su[0]; - vz += plane_sv[0]; - } while (--width >= 0); -#else -//#define SPANSIZE 32 -//#define INVSPAN 0.03125f -//#define SPANSIZE 8 -//#define INVSPAN 0.125f -#define SPANSIZE 16 -#define INVSPAN 0.0625f - - double startz = 1.f/iz; - double startu = uz*startz; - double startv = vz*startz; - double izstep, uzstep, vzstep; - - izstep = plane_sz[0] * SPANSIZE; - uzstep = plane_su[0] * SPANSIZE; - vzstep = plane_sv[0] * SPANSIZE; - x1 = 0; - width++; - - while (width >= SPANSIZE) - { - iz += izstep; - uz += uzstep; - vz += vzstep; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - DWORD stepu = SQWORD((endu - startu) * INVSPAN); - DWORD stepv = SQWORD((endv - startv) * INVSPAN); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (i = SPANSIZE-1; i >= 0; i--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - startu = endu; - startv = endv; - width -= SPANSIZE; - } - if (width > 0) - { - if (width == 1) - { - u = SQWORD(startu); - v = SQWORD(startv); - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - } - else - { - double left = width; - iz += plane_sz[0] * left; - uz += plane_su[0] * left; - vz += plane_sv[0] * left; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - left = 1.f/left; - DWORD stepu = SQWORD((endu - startu) * left); - DWORD stepv = SQWORD((endv - startv) * left); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (; width != 0; width--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - } - } -#endif + R_DrawTiltedSpan(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } //========================================================================== @@ -502,9 +267,9 @@ void R_MapTiltedPlane(int y, int x1) // //========================================================================== -void R_MapColoredPlane (int y, int x1) +void R_MapColoredPlane(int y, int x1) { - memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1); + R_DrawColoredSpan(y, x1, spanend[y]); } //========================================================================== @@ -1179,9 +944,6 @@ static void R_DrawSkyStriped (visplane_t *pl) // //========================================================================== -CVAR (Bool, tilt, false, 0); -//CVAR (Int, pa, 0, 0) - int R_DrawPlanes () { visplane_t *pl; @@ -1317,7 +1079,6 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske // 9. Put the camera back where it was to begin with. // //========================================================================== -CVAR (Bool, r_skyboxes, true, 0) static int numskyboxes; void R_DrawPortals () @@ -1665,13 +1426,6 @@ void R_DrawSkyPlane (visplane_t *pl) void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { -#ifdef X86_ASM - if (ds_source != ds_cursource) - { - R_SetSpanSource_ASM (ds_source); - } -#endif - if (alpha <= 0) { return; @@ -1896,14 +1650,6 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t else ds_colormap = basecolormap->Maps, plane_shade = true; - if (!plane_shade) - { - for (int i = 0; i < viewwidth; ++i) - { - tiltlighting[i] = ds_colormap; - } - } - // Hack in support for 1 x Z and Z x 1 texture sizes if (ds_ybits == 0) { @@ -1913,13 +1659,8 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t { plane_su[2] = plane_su[1] = plane_su[0] = 0; } -#if defined(X86_ASM) - if (ds_source != ds_curtiltedsource) - R_SetTiltedSpanSource_ASM (ds_source); - R_MapVisPlane (pl, R_DrawTiltedPlane_ASM); -#else + R_MapVisPlane (pl, R_MapTiltedPlane); -#endif } //========================================================================== @@ -2023,3 +1764,5 @@ bool R_PlaneInitData () return true; } + +} \ No newline at end of file diff --git a/src/r_plane.h b/src/r_plane.h index d4db3dc09c..0e133a7cd2 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -27,6 +27,9 @@ class ASkyViewpoint; +namespace swrenderer +{ + // // The infamous visplane // @@ -113,4 +116,6 @@ bool R_PlaneInitData (void); extern visplane_t* floorplane; extern visplane_t* ceilingplane; +} + #endif // __R_PLANE_H__ diff --git a/src/r_segs.cpp b/src/r_segs.cpp index ac5683b9b2..d4520e91de 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -57,10 +57,13 @@ CVAR(Bool, r_np2, true, 0) +CVAR(Bool, r_fogboundary, true, 0) +CVAR(Bool, r_drawmirrors, true, 0) EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); -//CVAR (Int, ty, 8, 0) -//CVAR (Int, tx, 8, 0) +namespace swrenderer +{ + using namespace drawerargs; #define HEIGHTBITS 12 #define HEIGHTSHIFT (FRACBITS-HEIGHTBITS) @@ -141,16 +144,6 @@ void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat); static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask); -//============================================================================= -// -// CVAR r_fogboundary -// -// If true, makes fog look more "real" by shading the walls separating two -// sectors with different fog. -//============================================================================= - -CVAR(Bool, r_fogboundary, true, 0) - inline bool IsFogBoundary (sector_t *front, sector_t *back) { return r_fogboundary && fixedcolormap == NULL && front->ColorMap->Fade && @@ -158,14 +151,6 @@ inline bool IsFogBoundary (sector_t *front, sector_t *back) (front->GetTexture(sector_t::ceiling) != skyflatnum || back->GetTexture(sector_t::ceiling) != skyflatnum); } -//============================================================================= -// -// CVAR r_drawmirrors -// -// Set to false to disable rendering of mirrors -//============================================================================= - -CVAR(Bool, r_drawmirrors, true, 0) // // R_RenderMaskedSegRange @@ -2994,3 +2979,5 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, done: WallC = savecoord; } + +} \ No newline at end of file diff --git a/src/r_segs.h b/src/r_segs.h index 1fc428c964..8610bc6f29 100644 --- a/src/r_segs.h +++ b/src/r_segs.h @@ -23,6 +23,9 @@ #ifndef __R_SEGS_H__ #define __R_SEGS_H__ +namespace swrenderer +{ + struct drawseg_t; void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2); @@ -70,4 +73,6 @@ extern int CurrentPortalUniq; extern bool CurrentPortalInSkybox; extern TArray WallPortals; +} + #endif diff --git a/src/r_state.h b/src/r_state.h index b66ad57eb7..cd4aee4be3 100644 --- a/src/r_state.h +++ b/src/r_state.h @@ -80,7 +80,7 @@ extern int numgamesubsectors; extern AActor* camera; // [RH] camera instead of viewplayer extern sector_t* viewsector; // [RH] keep track of sector viewing from -extern angle_t xtoviewangle[MAXWIDTH+1]; +namespace swrenderer { extern angle_t xtoviewangle[MAXWIDTH+1]; } extern DAngle FieldOfView; int R_FindSkin (const char *name, int pclass); // [RH] Find a skin diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 3c33134301..87bce4013a 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -42,13 +42,20 @@ #include "r_3dfloors.h" #include "textures/textures.h" #include "r_data/voxels.h" +#include "r_thread.h" +namespace swrenderer +{ void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio); void R_SetupColormap(player_t *); void R_SetupFreelook(); void R_InitRenderer(); +} + +using namespace swrenderer; + //========================================================================== // // DCanvas :: Init @@ -154,9 +161,11 @@ void FSoftwareRenderer::Precache(BYTE *texhitlist, TMap &act void FSoftwareRenderer::RenderView(player_t *player) { + R_BeginDrawerCommands(); R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); + R_EndDrawerCommands(); } //========================================================================== diff --git a/src/r_things.cpp b/src/r_things.cpp index e7d130fa85..4ba47d63d7 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -64,6 +64,21 @@ #include "r_data/voxels.h" #include "p_local.h" #include "p_maputl.h" +#include "r_thread.h" + +EXTERN_CVAR(Bool, st_scale) +EXTERN_CVAR(Bool, r_shadercolormaps) +EXTERN_CVAR(Int, r_drawfuzz) +EXTERN_CVAR(Bool, r_deathcamera); +EXTERN_CVAR(Bool, r_drawplayersprites) +EXTERN_CVAR(Bool, r_drawvoxels) + +CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); +//CVAR(Bool, r_splitsprites, true, CVAR_ARCHIVE) + +namespace swrenderer +{ + using namespace drawerargs; // [RH] A c-buffer. Used for keeping track of offscreen voxel spans. @@ -95,12 +110,6 @@ extern float MaskedScaleY; #define BASEXCENTER (160) #define BASEYCENTER (100) -EXTERN_CVAR (Bool, st_scale) -EXTERN_CVAR(Bool, r_shadercolormaps) -EXTERN_CVAR(Int, r_drawfuzz) -EXTERN_CVAR(Bool, r_deathcamera); -CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); - // // Sprite rotation 0 is facing the viewer, // rotation 1 is one angle turn CLOCKWISE around the axis. @@ -132,9 +141,6 @@ FTexture *WallSpriteTile; short zeroarray[MAXWIDTH]; short screenheightarray[MAXWIDTH]; -EXTERN_CVAR (Bool, r_drawplayersprites) -EXTERN_CVAR (Bool, r_drawvoxels) - // // INITIALIZATION FUNCTIONS // @@ -639,7 +645,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop { return; } - if (colfunc == fuzzcolfunc || colfunc == R_FillColumnP) + if (colfunc == fuzzcolfunc || colfunc == R_FillColumn) { flags = DVF_OFFSCREEN | DVF_SPANSONLY; } @@ -1758,8 +1764,6 @@ static int sd_comparex (const void *arg1, const void *arg2) return (*(drawseg_t **)arg2)->x2 - (*(drawseg_t **)arg1)->x2; } -CVAR (Bool, r_splitsprites, true, CVAR_ARCHIVE) - // Split up vissprites that intersect drawsegs void R_SplitVisSprites () { @@ -2628,7 +2632,7 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) } } -void R_DrawParticle (vissprite_t *vis) +void R_DrawParticle_C (vissprite_t *vis) { DWORD *bg2rgb; int spacing; @@ -2642,6 +2646,8 @@ void R_DrawParticle (vissprite_t *vis) R_DrawMaskedSegsBehindParticle (vis); + DrawerCommandQueue::WaitForWorkers(); + // vis->renderflags holds translucency level (0-255) { fixed_t fglevel, bglevel; @@ -3237,3 +3243,5 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly) OffscreenBufferWidth = width; OffscreenBufferHeight = height; } + +} \ No newline at end of file diff --git a/src/r_things.h b/src/r_things.h index 53b887b181..bf32b655f2 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -25,6 +25,12 @@ #include "r_bsp.h" +struct particle_t; +struct FVoxel; + +namespace swrenderer +{ + // A vissprite_t is a thing // that will be drawn during a refresh. // I.e. a sprite object that is partly visible. @@ -95,9 +101,7 @@ struct vissprite_t vissprite_t() {} }; -struct particle_t; - -void R_DrawParticle (vissprite_t *); +void R_DrawParticle_C (vissprite_t *); void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); extern int MaxVisSprites; @@ -146,5 +150,6 @@ void R_DrawVoxel(const FVector3 &viewpos, FAngle viewangle, void R_ClipVisSprite (vissprite_t *vis, int xl, int xh); +} #endif diff --git a/src/r_thread.cpp b/src/r_thread.cpp new file mode 100644 index 0000000000..c96f14e74b --- /dev/null +++ b/src/r_thread.cpp @@ -0,0 +1,297 @@ +/* +** Renderer multithreading framework +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "r_local.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_thread.h" + +CVAR(Bool, r_multithreaded, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + +void R_BeginDrawerCommands() +{ + DrawerCommandQueue::Begin(); +} + +void R_EndDrawerCommands() +{ + DrawerCommandQueue::End(); +} + +///////////////////////////////////////////////////////////////////////////// + +DrawerCommandQueue *DrawerCommandQueue::Instance() +{ + static DrawerCommandQueue queue; + return &queue; +} + +DrawerCommandQueue::DrawerCommandQueue() +{ +} + +DrawerCommandQueue::~DrawerCommandQueue() +{ + StopThreads(); +} + +void* DrawerCommandQueue::AllocMemory(size_t size) +{ + // Make sure allocations remain 16-byte aligned + size = (size + 15) / 16 * 16; + + auto queue = Instance(); + if (queue->memorypool_pos + size > memorypool_size) + return nullptr; + + void *data = queue->memorypool + queue->memorypool_pos; + queue->memorypool_pos += size; + return data; +} + +void DrawerCommandQueue::Begin() +{ + auto queue = Instance(); + queue->Finish(); + queue->threaded_render++; +} + +void DrawerCommandQueue::End() +{ + auto queue = Instance(); + queue->Finish(); + if (queue->threaded_render > 0) + queue->threaded_render--; +} + +void DrawerCommandQueue::WaitForWorkers() +{ + Instance()->Finish(); +} + +void DrawerCommandQueue::Finish() +{ + auto queue = Instance(); + if (queue->commands.empty()) + return; + + // Give worker threads something to do: + + std::unique_lock start_lock(queue->start_mutex); + queue->active_commands.swap(queue->commands); + queue->run_id++; + start_lock.unlock(); + + queue->StartThreads(); + queue->start_condition.notify_all(); + + // Do one thread ourselves: + + DrawerThread thread; + thread.core = 0; + thread.num_cores = (int)(queue->threads.size() + 1); + + struct TryCatchData + { + DrawerCommandQueue *queue; + DrawerThread *thread; + size_t command_index; + } data; + + data.queue = queue; + data.thread = &thread; + data.command_index = 0; + VectoredTryCatch(&data, + [](void *data) + { + TryCatchData *d = (TryCatchData*)data; + + for (int pass = 0; pass < d->queue->num_passes; pass++) + { + d->thread->pass_start_y = pass * d->queue->rows_in_pass; + d->thread->pass_end_y = (pass + 1) * d->queue->rows_in_pass; + if (pass + 1 == d->queue->num_passes) + d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT); + + size_t size = d->queue->active_commands.size(); + for (d->command_index = 0; d->command_index < size; d->command_index++) + { + auto &command = d->queue->active_commands[d->command_index]; + command->Execute(d->thread); + } + } + }, + [](void *data, const char *reason, bool fatal) + { + TryCatchData *d = (TryCatchData*)data; + ReportDrawerError(d->queue->active_commands[d->command_index], true, reason, fatal); + }); + + // Wait for everyone to finish: + + std::unique_lock end_lock(queue->end_mutex); + queue->end_condition.wait(end_lock, [&]() { return queue->finished_threads == queue->threads.size(); }); + + if (!queue->thread_error.IsEmpty()) + { + static bool first = true; + if (queue->thread_error_fatal) + I_FatalError("%s", queue->thread_error.GetChars()); + else if (first) + Printf("%s\n", queue->thread_error.GetChars()); + first = false; + } + + // Clean up batch: + + for (auto &command : queue->active_commands) + command->~DrawerCommand(); + queue->active_commands.clear(); + queue->memorypool_pos = 0; + queue->finished_threads = 0; +} + +void DrawerCommandQueue::StartThreads() +{ + if (!threads.empty()) + return; + + int num_threads = std::thread::hardware_concurrency(); + if (num_threads == 0) + num_threads = 4; + + threads.resize(num_threads - 1); + + for (int i = 0; i < num_threads - 1; i++) + { + DrawerCommandQueue *queue = this; + DrawerThread *thread = &threads[i]; + thread->core = i + 1; + thread->num_cores = num_threads; + thread->thread = std::thread([=]() + { + int run_id = 0; + while (true) + { + // Wait until we are signalled to run: + std::unique_lock start_lock(queue->start_mutex); + queue->start_condition.wait(start_lock, [&]() { return queue->run_id != run_id || queue->shutdown_flag; }); + if (queue->shutdown_flag) + break; + run_id = queue->run_id; + start_lock.unlock(); + + // Do the work: + + struct TryCatchData + { + DrawerCommandQueue *queue; + DrawerThread *thread; + size_t command_index; + } data; + + data.queue = queue; + data.thread = thread; + data.command_index = 0; + VectoredTryCatch(&data, + [](void *data) + { + TryCatchData *d = (TryCatchData*)data; + + for (int pass = 0; pass < d->queue->num_passes; pass++) + { + d->thread->pass_start_y = pass * d->queue->rows_in_pass; + d->thread->pass_end_y = (pass + 1) * d->queue->rows_in_pass; + if (pass + 1 == d->queue->num_passes) + d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT); + + size_t size = d->queue->active_commands.size(); + for (d->command_index = 0; d->command_index < size; d->command_index++) + { + auto &command = d->queue->active_commands[d->command_index]; + command->Execute(d->thread); + } + } + }, + [](void *data, const char *reason, bool fatal) + { + TryCatchData *d = (TryCatchData*)data; + ReportDrawerError(d->queue->active_commands[d->command_index], true, reason, fatal); + }); + + // Notify main thread that we finished: + std::unique_lock end_lock(queue->end_mutex); + queue->finished_threads++; + end_lock.unlock(); + queue->end_condition.notify_all(); + } + }); + } +} + +void DrawerCommandQueue::StopThreads() +{ + std::unique_lock lock(start_mutex); + shutdown_flag = true; + lock.unlock(); + start_condition.notify_all(); + for (auto &thread : threads) + thread.thread.join(); + threads.clear(); + lock.lock(); + shutdown_flag = false; +} + +void DrawerCommandQueue::ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal) +{ + if (worker_thread) + { + std::unique_lock end_lock(Instance()->end_mutex); + if (Instance()->thread_error.IsEmpty() || (!Instance()->thread_error_fatal && fatal)) + { + Instance()->thread_error = reason + (FString)": " + command->DebugInfo(); + Instance()->thread_error_fatal = fatal; + } + } + else + { + static bool first = true; + if (fatal) + I_FatalError("%s: %s", reason, command->DebugInfo().GetChars()); + else if (first) + Printf("%s: %s\n", reason, command->DebugInfo().GetChars()); + first = false; + } +} + +void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data, const char *reason, bool fatal)) +{ + tryBlock(data); +} diff --git a/src/r_thread.h b/src/r_thread.h new file mode 100644 index 0000000000..7962dfc208 --- /dev/null +++ b/src/r_thread.h @@ -0,0 +1,235 @@ +/* +** Renderer multithreading framework +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "r_draw.h" +#include +#include +#include +#include +#include + +// Use multiple threads when drawing +EXTERN_CVAR(Bool, r_multithreaded) + +// Redirect drawer commands to worker threads +void R_BeginDrawerCommands(); + +// Wait until all drawers finished executing +void R_EndDrawerCommands(); + +// Worker data for each thread executing drawer commands +class DrawerThread +{ +public: + std::thread thread; + + // Thread line index of this thread + int core = 0; + + // Number of active threads + int num_cores = 1; + + // Range of rows processed this pass + int pass_start_y = 0; + int pass_end_y = MAXHEIGHT; + + // Working buffer used by Rt drawers + uint8_t dc_temp_buff[MAXHEIGHT * 4]; + uint8_t *dc_temp = nullptr; + + // Working buffer used by Rt drawers, true color edition + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; + uint32_t *dc_temp_rgba = nullptr; + + // Working buffer used by the tilted (sloped) span drawer + const uint8_t *tiltlighting[MAXWIDTH]; + + // Checks if a line is rendered by this thread + bool line_skipped_by_thread(int line) + { + return line < pass_start_y || line >= pass_end_y || line % num_cores != core; + } + + // The number of lines to skip to reach the first line to be rendered by this thread + int skipped_by_thread(int first_line) + { + int pass_skip = MAX(pass_start_y - first_line, 0); + int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; + return pass_skip + core_skip; + } + + // The number of lines to be rendered by this thread + int count_for_thread(int first_line, int count) + { + int lines_until_pass_end = MAX(pass_end_y - first_line, 0); + count = MIN(count, lines_until_pass_end); + int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + return MAX(c, 0); + } + + // Calculate the dest address for the first line to be rendered by this thread + template + T *dest_for_thread(int first_line, int pitch, T *dest) + { + return dest + skipped_by_thread(first_line) * pitch; + } + + // The first line in the dc_temp buffer used this thread + int temp_line_for_thread(int first_line) + { + return (first_line + skipped_by_thread(first_line)) / num_cores; + } +}; + +// Task to be executed by each worker thread +class DrawerCommand +{ +protected: + int _dest_y; + + void DetectRangeError(uint32_t *&dest, int &dest_y, int &count) + { +#if defined(_MSC_VER) && defined(_DEBUG) + if (dest_y < 0 || count < 0 || dest_y + count > swrenderer::drawerargs::dc_destheight) + __debugbreak(); // Buffer overrun detected! +#endif + + if (dest_y < 0) + { + count += dest_y; + dest_y = 0; + dest = (uint32_t*)swrenderer::drawerargs::dc_destorg; + } + else if (dest_y >= swrenderer::drawerargs::dc_destheight) + { + dest_y = 0; + count = 0; + } + + if (count < 0 || count > MAXHEIGHT) count = 0; + if (dest_y + count >= swrenderer::drawerargs::dc_destheight) + count = swrenderer::drawerargs::dc_destheight - dest_y; + } + +public: + DrawerCommand() + { + _dest_y = static_cast((swrenderer::drawerargs::dc_dest - swrenderer::drawerargs::dc_destorg) / (swrenderer::drawerargs::dc_pitch)); + } + + virtual ~DrawerCommand() { } + + virtual void Execute(DrawerThread *thread) = 0; + virtual FString DebugInfo() = 0; +}; + +void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data, const char *reason, bool fatal)); + +// Manages queueing up commands and executing them on worker threads +class DrawerCommandQueue +{ + enum { memorypool_size = 16 * 1024 * 1024 }; + char memorypool[memorypool_size]; + size_t memorypool_pos = 0; + + std::vector commands; + + std::vector threads; + + std::mutex start_mutex; + std::condition_variable start_condition; + std::vector active_commands; + bool shutdown_flag = false; + int run_id = 0; + + std::mutex end_mutex; + std::condition_variable end_condition; + size_t finished_threads = 0; + FString thread_error; + bool thread_error_fatal = false; + + int threaded_render = 0; + DrawerThread single_core_thread; + int num_passes = 1; + int rows_in_pass = MAXHEIGHT; + + void StartThreads(); + void StopThreads(); + void Finish(); + + static DrawerCommandQueue *Instance(); + static void ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal); + + DrawerCommandQueue(); + ~DrawerCommandQueue(); + +public: + // Allocate memory valid for the duration of a command execution + static void* AllocMemory(size_t size); + + // Queue command to be executed by drawer worker threads + template + static void QueueCommand(Types &&... args) + { + auto queue = Instance(); + if (queue->threaded_render == 0 || !r_multithreaded) + { + T command(std::forward(args)...); + VectoredTryCatch(&command, + [](void *data) + { + T *c = (T*)data; + c->Execute(&Instance()->single_core_thread); + }, + [](void *data, const char *reason, bool fatal) + { + T *c = (T*)data; + ReportDrawerError(c, false, reason, fatal); + }); + } + else + { + void *ptr = AllocMemory(sizeof(T)); + if (!ptr) // Out of memory - render what we got + { + queue->Finish(); + ptr = AllocMemory(sizeof(T)); + if (!ptr) + return; + } + T *command = new (ptr)T(std::forward(args)...); + queue->commands.push_back(command); + } + } + + // Redirects all drawing commands to worker threads until End is called + // Begin/End blocks can be nested. + static void Begin(); + + // End redirection and wait until all worker threads finished executing + static void End(); + + // Waits until all worker threads finished executing + static void WaitForWorkers(); +}; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 8483b9844a..f86a94bcdf 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -132,6 +132,9 @@ void DCanvas::DrawTexture (FTexture *img, double x, double y, int tags_first, .. void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) { #ifndef NO_SWRENDER + using namespace swrenderer; + using namespace drawerargs; + FTexture::Span unmaskedSpan[2]; const FTexture::Span **spanptr, *spans; static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; @@ -1285,6 +1288,9 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, FDynamicColormap *colormap, int lightlevel, int bottomclip) { #ifndef NO_SWRENDER + using namespace swrenderer; + using namespace drawerargs; + // Use an equation similar to player sprites to determine shade fixed_t shade = LIGHT2SHADE(lightlevel) - 12*FRACUNIT; float topy, boty, leftx, rightx; @@ -1352,7 +1358,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // Setup constant texture mapping parameters. R_SetupSpanBits(tex); R_SetSpanColormap(colormap != NULL ? &colormap->Maps[clamp(shade >> FRACBITS, 0, NUMCOLORMAPS-1) * 256] : identitymap); - R_SetSpanSource(tex->GetPixels()); + R_SetSpanSource(tex); if (ds_xbits != 0) { scalex = double(1u << (32 - ds_xbits)) / scalex; diff --git a/src/v_palette.cpp b/src/v_palette.cpp index 934a57dd3c..aa39ba7913 100644 --- a/src/v_palette.cpp +++ b/src/v_palette.cpp @@ -106,20 +106,11 @@ CCMD (bumpgamma) /* Palette management stuff */ /****************************/ -extern "C" BYTE BestColor_MMX (DWORD rgb, const DWORD *pal); - int BestColor (const uint32 *pal_in, int r, int g, int b, int first, int num) { -#ifdef X86_ASM - if (CPU.bMMX) - { - int pre = 256 - num - first; - return BestColor_MMX (((first+pre)<<24)|(r<<16)|(g<<8)|b, pal_in-pre) - pre; - } -#endif const PalEntry *pal = (const PalEntry *)pal_in; int bestcolor = first; - int bestdist = 257*257+257*257+257*257; + int bestdist = 257 * 257 + 257 * 257 + 257 * 257; for (int color = first; color < num; color++) { @@ -384,8 +375,8 @@ void InitPalette () R_InitColormaps (); } -extern "C" void DoBlending_MMX (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a); -extern void DoBlending_SSE2 (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a); +void DoBlending_MMX (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a); +void DoBlending_SSE2 (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a); void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a) { @@ -395,6 +386,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in { memcpy (to, from, count * sizeof(DWORD)); } + return; } else if (a == 256) { @@ -405,6 +397,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in { to[i] = t; } + return; } #if defined(_M_X64) || defined(_M_IX86) || defined(__i386__) || defined(__amd64__) else if (CPU.bSSE2) @@ -423,7 +416,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in } } #endif -#ifdef X86_ASM +#if defined(_M_IX86) || defined(__i386__) else if (CPU.bMMX) { if (count >= 4) diff --git a/src/v_video.h b/src/v_video.h index 971aa6c13d..b72f670947 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -516,10 +516,6 @@ void V_RefreshViewBorder (); void V_SetBorderNeedRefresh(); -#if defined(X86_ASM) || defined(X64_ASM) -extern "C" void ASM_PatchPitch (void); -#endif - int CheckRatio (int width, int height, int *trueratio=NULL); static inline int CheckRatio (double width, double height) { return CheckRatio(int(width), int(height)); } inline bool IsRatioWidescreen(int ratio) { return (ratio & 3) != 0; } diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index 0e8dd3dec9..026bbc63bb 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -1375,17 +1375,16 @@ void D3DFB::Draw3DPart(bool copy3d) D3DCOLOR color0, color1; if (Accel2D) { - if (realfixedcolormap == NULL) + auto &map = swrenderer::realfixedcolormap; + if (map == NULL) { color0 = 0; color1 = 0xFFFFFFF; } else { - color0 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeStart[0]/2, - realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0); - color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2, - realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1); + color0 = D3DCOLOR_COLORVALUE(map->ColorizeStart[0] / 2, map->ColorizeStart[1] / 2, map->ColorizeStart[2] / 2, 0); + color1 = D3DCOLOR_COLORVALUE(map->ColorizeEnd[0] / 2, map->ColorizeEnd[1] / 2, map->ColorizeEnd[2] / 2, 1); SetPixelShader(Shaders[SHADER_SpecialColormapPal]); } } diff --git a/src/x86.cpp b/src/x86.cpp index f6c878da61..17c946ac0f 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -227,10 +227,9 @@ void DumpCPUInfo(const CPUInfo *cpu) } } -#if 0 -// Compiler output for this function is crap compared to the assembly -// version, which is why it isn't used. -void DoBlending_MMX2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a) +#if !defined(__amd64__) && !defined(_M_X64) + +void DoBlending_MMX(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a) { __m64 blendcolor; __m64 blendalpha; @@ -272,9 +271,6 @@ void DoBlending_MMX2(const PalEntry *from, PalEntry *to, int count, int r, int g } #endif -#ifdef X86_ASM -extern "C" void DoBlending_MMX(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a); -#endif void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a) { @@ -288,17 +284,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g unaligned = ((size_t)from | (size_t)to) & 0xF; -#ifdef X86_ASM - // For unaligned accesses, the assembly MMX version is slightly faster. - // Note that using unaligned SSE loads and stores is still faster than - // the compiler-generated MMX version. - if (unaligned) - { - DoBlending_MMX(from, to, count, r, g, b, a); - return; - } -#endif - #if defined(__amd64__) || defined(_M_X64) long long color; @@ -326,7 +311,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g zero = _mm_setzero_si128(); -#ifndef X86_ASM if (unaligned) { for (count >>= 2; count > 0; --count) @@ -346,7 +330,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g } } else -#endif { for (count >>= 2; count > 0; --count) {