1
0
Fork 0
forked from fte/fteqw

Add halfrate shading to the vulkan renderer, where supported by headers+drivers. Fonts are exempt.

This commit is contained in:
Shpoike 2023-01-19 04:44:24 +00:00
parent 5786b43a7d
commit d9a334c694
7 changed files with 66 additions and 8 deletions

View file

@ -117,6 +117,7 @@ cvar_t gl_shadeq1_name = CVARD ("gl_shadeq1_name", "*", "Rename all surfac
extern cvar_t r_vertexlight;
extern cvar_t r_forceprogramify;
extern cvar_t r_glsl_precache;
extern cvar_t r_halfrate;
extern cvar_t dpcompat_nopremulpics;
#ifdef PSKMODELS
cvar_t dpcompat_psa_ungroup = CVAR ("dpcompat_psa_ungroup", "0");
@ -1020,6 +1021,7 @@ void Renderer_Init(void)
Cvar_Register (&r_forceprogramify, GLRENDEREROPTIONS);
Cvar_Register (&r_glsl_precache, GLRENDEREROPTIONS);
Cvar_Register (&r_halfrate, GRAPHICALNICETIES);
#ifdef HAVE_LEGACY
Cvar_Register (&dpcompat_nopremulpics, GLRENDEREROPTIONS);
#endif

View file

@ -473,6 +473,7 @@ void Font_Init(void)
fontplanes.shader = R_RegisterShader("ftefont", SUF_2D,
"{\n"
"fullrate\n" //don't hurt readability of text.
"if $nofixed\n"
"program default2d\n"
"endif\n"

View file

@ -46,6 +46,7 @@ cvar_t r_forceprogramify = CVARAFD("r_forceprogramify", "0", "dpcompat_makeshitu
cvar_t dpcompat_nopremulpics = CVARFD("dpcompat_nopremulpics", "0", CVAR_SHADERSYSTEM, "By default FTE uses premultiplied alpha for hud/2d images, while DP does not (which results in halos with low-res content). Unfortunately DDS files would need to be recompressed, resulting in visible issues.");
#endif
cvar_t r_glsl_precache = CVARFD("r_glsl_precache", "0", CVAR_SHADERSYSTEM, "Force all relevant glsl permutations to load upfront.");
cvar_t r_halfrate = CVARFD("r_halfrate", "0", CVAR_SHADERSYSTEM, "Use half-rate shading (where supported by gpu).");
extern cvar_t r_glsl_offsetmapping_reliefmapping;
extern cvar_t r_drawflat;
@ -964,9 +965,17 @@ static void Shader_NoMipMaps (parsestate_t *ps, const char **ptr)
static void Shader_Affine (parsestate_t *ps, const char **ptr)
{
shader_t *shader = ps->s;
shader->flags |= SBITS_AFFINE;
int i;
for (i = 0; i < countof(shader->passes); i++)
shader->passes[i].shaderbits |= SBITS_AFFINE;
}
static void Shader_FullRate (parsestate_t *ps, const char **ptr)
{
shader_t *shader = ps->s;
int i;
for (i = 0; i < countof(shader->passes); i++)
shader->passes[i].shaderbits |= SBITS_MISC_FULLRATE;
}
static void Shader_NoPicMip (parsestate_t *ps, const char **ptr)
{
@ -2872,6 +2881,7 @@ static shaderkey_t shaderkeys[] =
{"deferredlight", Shader_Deferredlight, "fte"}, //(sort = prelight)
// {"lpp_light", Shader_Deferredlight, "fte"}, //(sort = prelight)
{"affine", Shader_Affine, "fte"}, //some hardware is horribly slow, and can benefit from certain hints.
{"fullrate", Shader_FullRate, "fte"}, //blocks half-rate shading on this surface.
{"bemode", Shader_BEMode, "fte"},
@ -5854,6 +5864,14 @@ done:;
}
}
}
if (!r_halfrate.ival)
{
for (i = 0; i < s->numpasses; i++)
{
s->passes[i].shaderbits |= SBITS_MISC_FULLRATE;
}
}
}
/*
static void Shader_UpdateRegistration (void)

View file

@ -169,6 +169,7 @@ enum
SBITS_TESSELLATION = 0x00100000,
SBITS_AFFINE = 0x00200000,
SBITS_MISC_FULLRATE = 0x00400000, //don't use half-rate shading (for text/ui)
//provided for the backend to hack about with
SBITS_LINES = 0x80000000
@ -653,7 +654,6 @@ struct shader_s
SHADER_HASLIGHTMAP = 1 << 16,
SHADER_HASTOPBOTTOM = 1 << 17,
SHADER_HASREFLECTCUBE = 1 << 18, //shader has a T_GEN_REFLECTCUBE pass (otherwise we can skip surf envmaps for better batching)
// SHADER_STATICDATA = 1 << 18, //set if true: no deforms, no tcgen, rgbgen=identitylighting, alphagen=identity, tmu0=st + tmu1=lm(if available) for every pass, no norms
SHADER_HASREFLECT = 1 << 19, //says that we need to generate a reflection image first
SHADER_HASREFRACT = 1 << 20, //says that we need to generate a refraction image first
SHADER_HASREFRACTDEPTH = 1 << 21, //refraction generation needs to generate a depth texture too.

View file

@ -2155,6 +2155,9 @@ static void BE_CreatePipeline(program_t *p, unsigned int shaderflags, unsigned i
VkGraphicsPipelineCreateInfo pipeCreateInfo = {VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO};
VkPipelineShaderStageCreateInfo shaderStages[2] = {{0}};
VkPipelineRasterizationStateRasterizationOrderAMD ro = {VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD}; //long enough names for you?
#ifdef VK_KHR_fragment_shading_rate
VkPipelineFragmentShadingRateStateCreateInfoKHR shadingrate = {VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR};
#endif
struct specdata_s
{
int alphamode;
@ -2511,6 +2514,28 @@ static void BE_CreatePipeline(program_t *p, unsigned int shaderflags, unsigned i
// pipeCreateInfo.flags = VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT;
#ifdef VK_KHR_fragment_shading_rate
if (vk.khr_fragment_shading_rate)
{
//three ways to specify rates... we need to set which one wins here.
shadingrate.combinerOps[0] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR;//pipeline vs primitive
shadingrate.combinerOps[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR;//previous vs attachment
if (blendflags & SBITS_MISC_FULLRATE)
{
shadingrate.fragmentSize.width = 1;
shadingrate.fragmentSize.height = 1;
}
else
{ //actually this is more quater-rate. oh well.
shadingrate.fragmentSize.width = 2;
shadingrate.fragmentSize.height = 2;
}
shadingrate.pNext = pipeCreateInfo.pNext;
pipeCreateInfo.pNext = &shadingrate;
}
#endif
err = vkCreateGraphicsPipelines(vk.device, vk.pipelinecache, 1, &pipeCreateInfo, vkallocationcb, &pipe->pipeline);
DebugSetName(VK_OBJECT_TYPE_PIPELINE, (uint64_t)pipe->pipeline, p->name);
@ -2530,7 +2555,7 @@ static void BE_BindPipeline(program_t *p, unsigned int shaderflags, unsigned int
blendflags &= 0
| SBITS_SRCBLEND_BITS | SBITS_DSTBLEND_BITS | SBITS_MASK_BITS | SBITS_ATEST_BITS
| SBITS_MISC_DEPTHWRITE | SBITS_MISC_NODEPTHTEST | SBITS_DEPTHFUNC_BITS
| SBITS_LINES
| SBITS_LINES | SBITS_MISC_FULLRATE
;
shaderflags &= 0
| SHADER_CULL_FRONT | SHADER_CULL_BACK

View file

@ -22,6 +22,9 @@ static cvar_t vk_khr_get_memory_requirements2 = CVARFD("vk_khr_get_memory_requir
static cvar_t vk_khr_dedicated_allocation = CVARFD("vk_khr_dedicated_allocation", "", CVAR_VIDEOLATCH, "Flag vulkan memory allocations as dedicated, where applicable.");
static cvar_t vk_khr_push_descriptor = CVARFD("vk_khr_push_descriptor", "", CVAR_VIDEOLATCH, "Enables better descriptor streaming.");
static cvar_t vk_amd_rasterization_order = CVARFD("vk_amd_rasterization_order", "", CVAR_VIDEOLATCH, "Enables the use of relaxed rasterization ordering, for a small speedup at the minor risk of a little zfighting.");
#ifdef VK_KHR_fragment_shading_rate
static cvar_t vK_khr_fragment_shading_rate = CVARFD("vK_khr_fragment_shading_rate","", CVAR_VIDEOLATCH, "Enables the use of variable shading rates.");
#endif
#ifdef VK_EXT_astc_decode_mode
static cvar_t vk_ext_astc_decode_mode = CVARFD("vk_ext_astc_decode_mode", "", CVAR_VIDEOLATCH, "Enables reducing texture cache sizes for LDR ASTC-compressed textures.");
#endif
@ -45,6 +48,9 @@ void VK_RegisterVulkanCvars(void)
Cvar_Register (&vk_khr_dedicated_allocation, VKRENDEREROPTIONS);
Cvar_Register (&vk_khr_push_descriptor, VKRENDEREROPTIONS);
Cvar_Register (&vk_amd_rasterization_order, VKRENDEREROPTIONS);
#ifdef VK_KHR_fragment_shading_rate
Cvar_Register (&vK_khr_fragment_shading_rate, VKRENDEREROPTIONS);
#endif
#ifdef VK_EXT_astc_decode_mode
Cvar_Register (&vk_ext_astc_decode_mode, VKRENDEREROPTIONS);
#endif
@ -4708,6 +4714,9 @@ qboolean VK_Init(rendererstate_t *info, const char **sysextnames, qboolean (*cre
{&vk.khr_dedicated_allocation, VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME, &vk_khr_dedicated_allocation, true, NULL, NULL},
{&vk.khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, &vk_khr_push_descriptor, true, NULL, NULL},
{&vk.amd_rasterization_order, VK_AMD_RASTERIZATION_ORDER_EXTENSION_NAME, &vk_amd_rasterization_order, false, NULL, NULL},
#ifdef VK_KHR_fragment_shading_rate
{&vk.khr_fragment_shading_rate, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, &vK_khr_fragment_shading_rate, true, NULL, NULL},
#endif
#ifdef VK_EXT_astc_decode_mode
{&vk.ext_astc_decode_mode, VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME, &vk_ext_astc_decode_mode, true, NULL, NULL},
#endif

View file

@ -28,7 +28,7 @@
#endif
#define VK_NO_PROTOTYPES
#include <../vulkan/vulkan.h>
#include <vulkan/vulkan.h>
#if defined(_MSC_VER) && !defined(UINT64_MAX)
#define UINT64_MAX _UI64_MAX
@ -281,6 +281,9 @@ extern struct vulkaninfo_s
qboolean khr_push_descriptor; //more efficient descriptor streaming
qboolean amd_rasterization_order; //allows primitives to draw in any order
qboolean ext_astc_decode_mode; //small perf boost
#ifdef VK_KHR_fragment_shading_rate
qboolean khr_fragment_shading_rate; //small perf boost. probably more useful for battery.
#endif
VkInstance instance;
VkDevice device;