From d9a334c69487b7cce3205275dfe6820cae16761d Mon Sep 17 00:00:00 2001 From: Shpoike Date: Thu, 19 Jan 2023 04:44:24 +0000 Subject: [PATCH] Add halfrate shading to the vulkan renderer, where supported by headers+drivers. Fonts are exempt. --- engine/client/renderer.c | 2 ++ engine/gl/gl_font.c | 1 + engine/gl/gl_shader.c | 22 ++++++++++++++++++++-- engine/gl/shader.h | 2 +- engine/vk/vk_backend.c | 27 ++++++++++++++++++++++++++- engine/vk/vk_init.c | 15 ++++++++++++--- engine/vk/vkrenderer.h | 5 ++++- 7 files changed, 66 insertions(+), 8 deletions(-) diff --git a/engine/client/renderer.c b/engine/client/renderer.c index 0f65ad489..e9b0ce3a9 100644 --- a/engine/client/renderer.c +++ b/engine/client/renderer.c @@ -117,6 +117,7 @@ cvar_t gl_shadeq1_name = CVARD ("gl_shadeq1_name", "*", "Rename all surfac extern cvar_t r_vertexlight; extern cvar_t r_forceprogramify; extern cvar_t r_glsl_precache; +extern cvar_t r_halfrate; extern cvar_t dpcompat_nopremulpics; #ifdef PSKMODELS cvar_t dpcompat_psa_ungroup = CVAR ("dpcompat_psa_ungroup", "0"); @@ -1020,6 +1021,7 @@ void Renderer_Init(void) Cvar_Register (&r_forceprogramify, GLRENDEREROPTIONS); Cvar_Register (&r_glsl_precache, GLRENDEREROPTIONS); + Cvar_Register (&r_halfrate, GRAPHICALNICETIES); #ifdef HAVE_LEGACY Cvar_Register (&dpcompat_nopremulpics, GLRENDEREROPTIONS); #endif diff --git a/engine/gl/gl_font.c b/engine/gl/gl_font.c index 4e9a2fa42..9dea5fb50 100644 --- a/engine/gl/gl_font.c +++ b/engine/gl/gl_font.c @@ -473,6 +473,7 @@ void Font_Init(void) fontplanes.shader = R_RegisterShader("ftefont", SUF_2D, "{\n" + "fullrate\n" //don't hurt readability of text. "if $nofixed\n" "program default2d\n" "endif\n" diff --git a/engine/gl/gl_shader.c b/engine/gl/gl_shader.c index 4a8b63539..13b47cc33 100644 --- a/engine/gl/gl_shader.c +++ b/engine/gl/gl_shader.c @@ -46,6 +46,7 @@ cvar_t r_forceprogramify = CVARAFD("r_forceprogramify", "0", "dpcompat_makeshitu cvar_t dpcompat_nopremulpics = CVARFD("dpcompat_nopremulpics", "0", CVAR_SHADERSYSTEM, "By default FTE uses premultiplied alpha for hud/2d images, while DP does not (which results in halos with low-res content). Unfortunately DDS files would need to be recompressed, resulting in visible issues."); #endif cvar_t r_glsl_precache = CVARFD("r_glsl_precache", "0", CVAR_SHADERSYSTEM, "Force all relevant glsl permutations to load upfront."); +cvar_t r_halfrate = CVARFD("r_halfrate", "0", CVAR_SHADERSYSTEM, "Use half-rate shading (where supported by gpu)."); extern cvar_t r_glsl_offsetmapping_reliefmapping; extern cvar_t r_drawflat; @@ -964,9 +965,17 @@ static void Shader_NoMipMaps (parsestate_t *ps, const char **ptr) static void Shader_Affine (parsestate_t *ps, const char **ptr) { shader_t *shader = ps->s; - shader->flags |= SBITS_AFFINE; + int i; + for (i = 0; i < countof(shader->passes); i++) + shader->passes[i].shaderbits |= SBITS_AFFINE; +} +static void Shader_FullRate (parsestate_t *ps, const char **ptr) +{ + shader_t *shader = ps->s; + int i; + for (i = 0; i < countof(shader->passes); i++) + shader->passes[i].shaderbits |= SBITS_MISC_FULLRATE; } - static void Shader_NoPicMip (parsestate_t *ps, const char **ptr) { @@ -2872,6 +2881,7 @@ static shaderkey_t shaderkeys[] = {"deferredlight", Shader_Deferredlight, "fte"}, //(sort = prelight) // {"lpp_light", Shader_Deferredlight, "fte"}, //(sort = prelight) {"affine", Shader_Affine, "fte"}, //some hardware is horribly slow, and can benefit from certain hints. + {"fullrate", Shader_FullRate, "fte"}, //blocks half-rate shading on this surface. {"bemode", Shader_BEMode, "fte"}, @@ -5854,6 +5864,14 @@ done:; } } } + + if (!r_halfrate.ival) + { + for (i = 0; i < s->numpasses; i++) + { + s->passes[i].shaderbits |= SBITS_MISC_FULLRATE; + } + } } /* static void Shader_UpdateRegistration (void) diff --git a/engine/gl/shader.h b/engine/gl/shader.h index 131c9ef9f..902ff9ee7 100644 --- a/engine/gl/shader.h +++ b/engine/gl/shader.h @@ -169,6 +169,7 @@ enum SBITS_TESSELLATION = 0x00100000, SBITS_AFFINE = 0x00200000, + SBITS_MISC_FULLRATE = 0x00400000, //don't use half-rate shading (for text/ui) //provided for the backend to hack about with SBITS_LINES = 0x80000000 @@ -653,7 +654,6 @@ struct shader_s SHADER_HASLIGHTMAP = 1 << 16, SHADER_HASTOPBOTTOM = 1 << 17, SHADER_HASREFLECTCUBE = 1 << 18, //shader has a T_GEN_REFLECTCUBE pass (otherwise we can skip surf envmaps for better batching) -// SHADER_STATICDATA = 1 << 18, //set if true: no deforms, no tcgen, rgbgen=identitylighting, alphagen=identity, tmu0=st + tmu1=lm(if available) for every pass, no norms SHADER_HASREFLECT = 1 << 19, //says that we need to generate a reflection image first SHADER_HASREFRACT = 1 << 20, //says that we need to generate a refraction image first SHADER_HASREFRACTDEPTH = 1 << 21, //refraction generation needs to generate a depth texture too. diff --git a/engine/vk/vk_backend.c b/engine/vk/vk_backend.c index 29105f9fb..c49801b5f 100644 --- a/engine/vk/vk_backend.c +++ b/engine/vk/vk_backend.c @@ -2155,6 +2155,9 @@ static void BE_CreatePipeline(program_t *p, unsigned int shaderflags, unsigned i VkGraphicsPipelineCreateInfo pipeCreateInfo = {VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO}; VkPipelineShaderStageCreateInfo shaderStages[2] = {{0}}; VkPipelineRasterizationStateRasterizationOrderAMD ro = {VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD}; //long enough names for you? +#ifdef VK_KHR_fragment_shading_rate + VkPipelineFragmentShadingRateStateCreateInfoKHR shadingrate = {VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR}; +#endif struct specdata_s { int alphamode; @@ -2511,6 +2514,28 @@ static void BE_CreatePipeline(program_t *p, unsigned int shaderflags, unsigned i // pipeCreateInfo.flags = VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT; +#ifdef VK_KHR_fragment_shading_rate + if (vk.khr_fragment_shading_rate) + { + //three ways to specify rates... we need to set which one wins here. + shadingrate.combinerOps[0] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR;//pipeline vs primitive + shadingrate.combinerOps[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR;//previous vs attachment + if (blendflags & SBITS_MISC_FULLRATE) + { + shadingrate.fragmentSize.width = 1; + shadingrate.fragmentSize.height = 1; + } + else + { //actually this is more quater-rate. oh well. + shadingrate.fragmentSize.width = 2; + shadingrate.fragmentSize.height = 2; + } + + shadingrate.pNext = pipeCreateInfo.pNext; + pipeCreateInfo.pNext = &shadingrate; + } +#endif + err = vkCreateGraphicsPipelines(vk.device, vk.pipelinecache, 1, &pipeCreateInfo, vkallocationcb, &pipe->pipeline); DebugSetName(VK_OBJECT_TYPE_PIPELINE, (uint64_t)pipe->pipeline, p->name); @@ -2530,7 +2555,7 @@ static void BE_BindPipeline(program_t *p, unsigned int shaderflags, unsigned int blendflags &= 0 | SBITS_SRCBLEND_BITS | SBITS_DSTBLEND_BITS | SBITS_MASK_BITS | SBITS_ATEST_BITS | SBITS_MISC_DEPTHWRITE | SBITS_MISC_NODEPTHTEST | SBITS_DEPTHFUNC_BITS - | SBITS_LINES + | SBITS_LINES | SBITS_MISC_FULLRATE ; shaderflags &= 0 | SHADER_CULL_FRONT | SHADER_CULL_BACK diff --git a/engine/vk/vk_init.c b/engine/vk/vk_init.c index f2937bc05..23825d78c 100644 --- a/engine/vk/vk_init.c +++ b/engine/vk/vk_init.c @@ -22,6 +22,9 @@ static cvar_t vk_khr_get_memory_requirements2 = CVARFD("vk_khr_get_memory_requir static cvar_t vk_khr_dedicated_allocation = CVARFD("vk_khr_dedicated_allocation", "", CVAR_VIDEOLATCH, "Flag vulkan memory allocations as dedicated, where applicable."); static cvar_t vk_khr_push_descriptor = CVARFD("vk_khr_push_descriptor", "", CVAR_VIDEOLATCH, "Enables better descriptor streaming."); static cvar_t vk_amd_rasterization_order = CVARFD("vk_amd_rasterization_order", "", CVAR_VIDEOLATCH, "Enables the use of relaxed rasterization ordering, for a small speedup at the minor risk of a little zfighting."); +#ifdef VK_KHR_fragment_shading_rate +static cvar_t vK_khr_fragment_shading_rate = CVARFD("vK_khr_fragment_shading_rate","", CVAR_VIDEOLATCH, "Enables the use of variable shading rates."); +#endif #ifdef VK_EXT_astc_decode_mode static cvar_t vk_ext_astc_decode_mode = CVARFD("vk_ext_astc_decode_mode", "", CVAR_VIDEOLATCH, "Enables reducing texture cache sizes for LDR ASTC-compressed textures."); #endif @@ -42,9 +45,12 @@ void VK_RegisterVulkanCvars(void) Cvar_Register (&vk_usememorypools, VKRENDEREROPTIONS); Cvar_Register (&vk_khr_get_memory_requirements2,VKRENDEREROPTIONS); - Cvar_Register (&vk_khr_dedicated_allocation,VKRENDEREROPTIONS); - Cvar_Register (&vk_khr_push_descriptor, VKRENDEREROPTIONS); - Cvar_Register (&vk_amd_rasterization_order, VKRENDEREROPTIONS); + Cvar_Register (&vk_khr_dedicated_allocation, VKRENDEREROPTIONS); + Cvar_Register (&vk_khr_push_descriptor, VKRENDEREROPTIONS); + Cvar_Register (&vk_amd_rasterization_order, VKRENDEREROPTIONS); +#ifdef VK_KHR_fragment_shading_rate + Cvar_Register (&vK_khr_fragment_shading_rate, VKRENDEREROPTIONS); +#endif #ifdef VK_EXT_astc_decode_mode Cvar_Register (&vk_ext_astc_decode_mode, VKRENDEREROPTIONS); #endif @@ -4708,6 +4714,9 @@ qboolean VK_Init(rendererstate_t *info, const char **sysextnames, qboolean (*cre {&vk.khr_dedicated_allocation, VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME, &vk_khr_dedicated_allocation, true, NULL, NULL}, {&vk.khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, &vk_khr_push_descriptor, true, NULL, NULL}, {&vk.amd_rasterization_order, VK_AMD_RASTERIZATION_ORDER_EXTENSION_NAME, &vk_amd_rasterization_order, false, NULL, NULL}, +#ifdef VK_KHR_fragment_shading_rate + {&vk.khr_fragment_shading_rate, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, &vK_khr_fragment_shading_rate, true, NULL, NULL}, +#endif #ifdef VK_EXT_astc_decode_mode {&vk.ext_astc_decode_mode, VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME, &vk_ext_astc_decode_mode, true, NULL, NULL}, #endif diff --git a/engine/vk/vkrenderer.h b/engine/vk/vkrenderer.h index f8dba7c36..c455780cf 100644 --- a/engine/vk/vkrenderer.h +++ b/engine/vk/vkrenderer.h @@ -28,7 +28,7 @@ #endif #define VK_NO_PROTOTYPES -#include <../vulkan/vulkan.h> +#include #if defined(_MSC_VER) && !defined(UINT64_MAX) #define UINT64_MAX _UI64_MAX @@ -281,6 +281,9 @@ extern struct vulkaninfo_s qboolean khr_push_descriptor; //more efficient descriptor streaming qboolean amd_rasterization_order; //allows primitives to draw in any order qboolean ext_astc_decode_mode; //small perf boost +#ifdef VK_KHR_fragment_shading_rate + qboolean khr_fragment_shading_rate; //small perf boost. probably more useful for battery. +#endif VkInstance instance; VkDevice device;