implemented depth fade

improved PSO change tracking
embedded more common uber pixel shader permutations
This commit is contained in:
myT 2023-09-17 01:56:35 +02:00
parent aa51fae5a6
commit 00840e10d8
13 changed files with 339 additions and 109 deletions

View file

@ -3723,3 +3723,43 @@ void Com_ParseHexColor( float* c, const char* text, qbool hasAlpha )
c[3] = 1.0f;
}
}
static uint32_t asuint( float x )
{
return *(uint32_t*)&x;
}
static float asfloat( uint32_t x )
{
return *(float*)&x;
}
// IEEE-754 16-bit floating-point format (without infinity)
// "Accuracy and performance of the lattice Boltzmann method with 64-bit, 32-bit, and customized 16-bit number formats"
// x86 intrinsic: _cvtsh_ss
float f16tof32( uint16_t x )
{
const uint32_t e = (x & 0x7C00) >> 10; // exponent
const uint32_t m = (x & 0x03FF) << 13; // mantissa
const uint32_t v = asuint((float)m) >> 23; // log2 bit hack to count leading zeros in denormalized format
const float r = asfloat((x & 0x8000) << 16 | (e != 0) * ((e + 112) << 23 | m) | ((e == 0) & (m != 0)) * ((v - 37) << 23 | ((m << (150 - v)) & 0x007FE000))); // sign : normalized : denormalized
return r;
}
// IEEE-754 16-bit floating-point format (without infinity)
// "Accuracy and performance of the lattice Boltzmann method with 64-bit, 32-bit, and customized 16-bit number formats"
// x86 intrinsic: _cvtss_sh
uint16_t f32tof16( float x )
{
const uint32_t b = asuint(x) + 0x00001000; // round-to-nearest-even: add last bit after truncated mantissa
const uint32_t e = (b & 0x7F800000) >> 23; // exponent
const uint32_t m = b & 0x007FFFFF; // mantissa; in line below: 0x007FF000 = 0x00800000-0x00001000 = decimal indicator flag - initial rounding
const uint16_t r = (b & 0x80000000) >> 16 | (e > 112) * ((((e - 112) << 10) & 0x7C00) | m >> 13) | ((e < 113) & (e > 101)) * ((((0x007FF000 + m) >> (125 - e)) + 1) >> 1) | (e > 143) * 0x7FFF; // sign : normalized : denormalized : saturate
return r;
}

View file

@ -1298,6 +1298,10 @@ printHelpResult_t Com_PrintHelp( const char* name, printf_t print, qbool printNo
#endif
float f16tof32( uint16_t x );
uint16_t f32tof16( float x );
// the smallest power of 2 accepted is 1
template<typename T>
static T IsPowerOfTwo( T x )

View file

@ -48,23 +48,24 @@ struct WorldVertexRC
struct WorldPixelRC
{
// general
uint32_t stageIndices[8];
uint32_t stageIndices[8]; // sampler: 16 - texture: 16
float greyscale;
float pad0;
float pad1;
float pad2;
// r_shaderTrace - dynamically enabled
uint32_t shaderTrace;
uint32_t shaderIndex;
uint32_t frameIndex;
uint32_t centerPixel; // x | (y << 16)
uint32_t shaderTrace; // shader index: 14 - frame index: 2 - enable: 1
uint16_t centerPixelX;
uint16_t centerPixelY;
// r_depthFade - statically enabled
uint16_t hFadeDistance;
uint16_t hFadeOffset;
uint32_t depthFadeColorTex; // texture index: 12 - color bias: 4 - color scale: 4
// r_dither - statically enabled
float frameSeed;
float noiseScale;
float invGamma;
float invBrightness;
uint16_t hFrameSeed;
uint16_t hNoiseScale;
uint16_t hInvGamma;
uint16_t hInvBrightness;
};
#pragma pack(pop)
@ -310,6 +311,7 @@ struct BatchType
{
Standard,
DynamicLight,
DepthFade,
Count
};
};
@ -558,6 +560,7 @@ struct PSODesc
cullType_t cullType;
bool polygonOffset;
bool clampDepth;
bool depthFade;
};
#pragma pack(pop)
@ -740,6 +743,14 @@ inline void CmdSetViewportAndScissor(const viewParms_t& vp)
CmdSetViewportAndScissor(vp.viewportX, vp.viewportY, vp.viewportWidth, vp.viewportHeight);
}
inline bool IsDepthFadeEnabled(const shader_t& shader)
{
return
r_depthFade->integer != 0 &&
shader.dfType > DFT_NONE &&
shader.dfType < DFT_TBD;
}
const image_t* GetBundleImage(const textureBundle_t& bundle);
uint32_t GetSamplerIndex(textureWrap_t wrap, TextureFilter::Id filter, uint32_t minLOD = 0);
uint32_t GetSamplerIndex(const image_t* image);

View file

@ -159,7 +159,6 @@ static bool IsCommutativeBlendState(unsigned int stateBits)
return false;
}
static cullType_t GetMirrorredCullType(cullType_t cullType)
{
switch(cullType)
@ -409,8 +408,11 @@ void GRP::ProcessShader(shader_t& shader)
if(shader.isOpaque)
{
Q_assert(IsDepthFadeEnabled(shader) == false);
// @TODO: fix up cache.stageStateBits[0] based on depth state from follow-up states
CachedPSO cache = {};
cache.desc.depthFade = false;
cache.desc.polygonOffset = !!shader.polygonOffset;
cache.desc.clampDepth = clampDepth;
cache.stageStateBits[0] = shader.stages[0]->stateBits & (~GLS_POLYMODE_LINE);
@ -432,6 +434,7 @@ void GRP::ProcessShader(shader_t& shader)
{
CachedPSO cache = {};
cache.desc.depthFade = IsDepthFadeEnabled(shader);
cache.desc.polygonOffset = !!shader.polygonOffset;
cache.desc.clampDepth = clampDepth;
cache.stageCount = 0;
@ -617,6 +620,9 @@ void GRP::DrawGUI()
ImGui::EndTable();
}
ImGui::Text("PSO count: %d", (int)grp.psoCount);
ImGui::Text("PSO changes: %d", (int)grp.world.psoChangeCount);
}
ImGui::End();
}
@ -720,8 +726,11 @@ uint32_t GRP::CreatePSO(CachedPSO& cache, const char* name)
for(uint32_t i = 0; i < uberPixelShaderCacheSize; ++i)
{
const UberPixelShaderState& state = uberPixelShaderStates[i];
const int dither = (state.globalState & UBERPS_DITHER_BIT) != 0 ? 1 : 0;
const bool depthFade = (state.globalState & UBERPS_DEPTHFADE_BIT) != 0;
if(cache.stageCount != (uint32_t)state.stageCount ||
r_dither->integer != (state.globalState & 1))
r_dither->integer != dither ||
cache.desc.depthFade != depthFade)
{
continue;
}
@ -760,6 +769,12 @@ uint32_t GRP::CreatePSO(CachedPSO& cache, const char* name)
macros[macroCount].value = "1";
macroCount++;
}
if(cache.desc.depthFade)
{
macros[macroCount].name = "DEPTH_FADE";
macros[macroCount].value = "1";
macroCount++;
}
for(int s = 0; s < cache.stageCount; ++s)
{
macros[macroCount].name = va("STAGE%d_BITS", s);
@ -776,13 +791,31 @@ uint32_t GRP::CreatePSO(CachedPSO& cache, const char* name)
pixelShaderByteCode = uberPixelShaderByteCodes[uberPixelShaderIndex];
}
// important missing entries can be copy-pasted into UBER_SHADER_PS_LIST
#if 0
Sys_DebugPrintf("PS: ");
if(uberPixelShaderIndex < 0)
{
unsigned int flags = 0;
if(r_dither->integer)
{
flags |= UBERPS_DITHER_BIT;
}
if(cache.desc.depthFade)
{
flags |= UBERPS_DEPTHFADE_BIT;
}
Sys_DebugPrintf("\tshader: %s\n", name);
ri.Printf(PRINT_ALL, "^2 shader: %s\n", name);
Sys_DebugPrintf("\tPS(%d_%X", (int)cache.stageCount, flags);
ri.Printf(PRINT_ALL, " PS(%d_%X", (int)cache.stageCount, flags);
for(int s = 0; s < cache.stageCount; ++s)
{
Sys_DebugPrintf(va("%X ", (int)cache.stageStateBits[s] & pixelShaderStateBits));
Sys_DebugPrintf("_%X", (unsigned int)(cache.stageStateBits[s] & pixelShaderStateBits));
ri.Printf(PRINT_ALL, "_%X", (unsigned int)(cache.stageStateBits[s] & pixelShaderStateBits));
}
Sys_DebugPrintf(") \\\n");
ri.Printf(PRINT_ALL, ") \\\n");
}
Sys_DebugPrintf("\n");
#endif
uint32_t a = 0;
@ -797,6 +830,12 @@ uint32_t GRP::CreatePSO(CachedPSO& cache, const char* name)
desc.vertexLayout.AddAttribute(a++, ShaderSemantic::TexCoord, DataType::Float32, 2, 0);
desc.vertexLayout.AddAttribute(a++, ShaderSemantic::Color, DataType::UNorm8, 4, 0);
}
if(cache.desc.depthFade)
{
desc.depthStencil.DisableDepth();
}
else
{
desc.depthStencil.depthStencilFormat = TextureFormat::Depth32_Float;
desc.depthStencil.depthComparison =
(cache.stageStateBits[0] & GLS_DEPTHFUNC_EQUAL) != 0 ?
@ -804,6 +843,7 @@ uint32_t GRP::CreatePSO(CachedPSO& cache, const char* name)
ComparisonFunction::GreaterEqual;
desc.depthStencil.enableDepthTest = (cache.stageStateBits[0] & GLS_DEPTHTEST_DISABLE) == 0;
desc.depthStencil.enableDepthWrites = (cache.stageStateBits[0] & GLS_DEPTHMASK_TRUE) != 0;
}
desc.rasterizer.cullMode = cache.desc.cullType;
desc.rasterizer.polygonOffset = cache.desc.polygonOffset;
desc.rasterizer.clampDepth = cache.desc.clampDepth;

View file

@ -549,6 +549,7 @@ void World::DrawPrePass(const drawSceneViewCommand_t& cmd)
CmdBindRenderTargets(0, NULL, &depthTexture);
CmdBindRootSignature(zppRootSignature);
CmdBindPipeline(zppPipeline);
psoChangeCount++;
CmdBindDescriptorTable(zppRootSignature, zppDescriptorTable);
ZPPVertexRC vertexRC;
@ -564,6 +565,19 @@ void World::DrawPrePass(const drawSceneViewCommand_t& cmd)
void World::BeginBatch(const shader_t* shader, bool hasStaticGeo, BatchType::Id type)
{
if(type == BatchType::DepthFade && batchType != BatchType::DepthFade)
{
TextureBarrier barrier(depthTexture, ResourceStates::PixelShaderAccessBit);
CmdBarrier(1, &barrier);
CmdBindRenderTargets(1, &grp.renderTarget, NULL);
}
else if(type != BatchType::DepthFade && batchType == BatchType::DepthFade)
{
TextureBarrier barrier(depthTexture, ResourceStates::DepthWriteBit);
CmdBarrier(1, &barrier);
CmdBindRenderTargets(1, &grp.renderTarget, &depthTexture);
}
tess.numVertexes = 0;
tess.numIndexes = 0;
tess.depthFade = DFT_NONE;
@ -733,14 +747,17 @@ void World::EndBatch()
WorldPixelRC pixelRC = {};
pixelRC.greyscale = tess.greyscale;
pixelRC.frameSeed = grp.frameSeed;
pixelRC.noiseScale = r_ditherStrength->value;
pixelRC.invBrightness = 1.0f / r_brightness->value;
pixelRC.invGamma = 1.0f / r_gamma->value;
pixelRC.shaderTrace = (uint32_t)!!tr.traceWorldShader;
pixelRC.shaderIndex = (uint32_t)shader->index;
pixelRC.frameIndex = RHI::GetFrameIndex();
pixelRC.centerPixel = (glConfig.vidWidth / 2) | ((glConfig.vidHeight / 2) << 16);
pixelRC.hFrameSeed = f32tof16(grp.frameSeed);
pixelRC.hNoiseScale = f32tof16(r_ditherStrength->value);
pixelRC.hInvGamma = f32tof16(1.0f / r_gamma->value);
pixelRC.hInvBrightness = f32tof16(1.0f / r_brightness->value);
pixelRC.shaderTrace = (uint32_t)!!tr.traceWorldShader | (RHI::GetFrameIndex() << 1) | ((uint32_t)shader->index << 3);
pixelRC.centerPixelX = glConfig.vidWidth / 2;
pixelRC.centerPixelY = glConfig.vidHeight / 2;
pixelRC.hFadeDistance = f32tof16(tess.shader->dfInvDist);
pixelRC.hFadeOffset = f32tof16(tess.shader->dfBias);
pixelRC.depthFadeColorTex = (uint32_t)r_depthFadeScaleAndBias[tess.shader->dfType] | (depthTextureIndex << 8);
for(int s = 0; s < pipeline.numStages; ++s)
{
const image_t* image = GetBundleImage(shader->stages[pipeline.firstStage + s]->bundle);
@ -803,7 +820,7 @@ void World::EndSkyBatch()
void World::RestartBatch()
{
EndBatch();
BeginBatch(tess.shader, batchHasStaticGeo, BatchType::Standard);
BeginBatch(tess.shader, batchHasStaticGeo, batchType);
}
void World::DrawGUI()
@ -1065,6 +1082,7 @@ void World::DrawSceneView(const drawSceneViewCommand_t& cmd)
CmdBindDescriptorTable(grp.uberRootSignature, grp.descriptorTable);
CmdBindRenderTargets(1, &grp.renderTarget, &depthTexture);
batchPSO = RHI_MAKE_NULL_HANDLE();
batchType = BatchType::Standard;
const drawSurf_t* drawSurfs = cmd.drawSurfs;
const int surfCount = cmd.numDrawSurfs;
@ -1107,6 +1125,7 @@ void World::DrawSceneView(const drawSceneViewCommand_t& cmd)
CmdBindDescriptorTable(grp.uberRootSignature, grp.descriptorTable);
CmdBindRenderTargets(1, &grp.renderTarget, &depthTexture);
batchPSO = RHI_MAKE_NULL_HANDLE();
batchType = BatchType::Standard;
boundVertexBuffers = BufferFamily::Invalid;
boundIndexBuffer = BufferFamily::Invalid;
@ -1142,6 +1161,7 @@ void World::DrawSceneView(const drawSceneViewCommand_t& cmd)
continue;
}
const BatchType::Id type = IsDepthFadeEnabled(*shader) ? BatchType::DepthFade : BatchType::Standard;
if(staticChanged || shaderChanged || entityChanged)
{
oldShader = shader;
@ -1149,7 +1169,7 @@ void World::DrawSceneView(const drawSceneViewCommand_t& cmd)
oldHasStaticGeo = hasStaticGeo;
EndSkyBatch();
EndBatch();
BeginBatch(shader, hasStaticGeo, BatchType::Standard);
BeginBatch(shader, hasStaticGeo, type);
tess.greyscale = drawSurf->greyscale;
batchShadingRate = lowShadingRate;
}
@ -1166,7 +1186,7 @@ void World::DrawSceneView(const drawSceneViewCommand_t& cmd)
if(tess.numIndexes + chunk.indexCount > SHADER_MAX_INDEXES)
{
EndBatch();
BeginBatch(tess.shader, batchHasStaticGeo, BatchType::Standard);
BeginBatch(tess.shader, batchHasStaticGeo, type);
batchShadingRate = lowShadingRate;
}
@ -1271,7 +1291,12 @@ void World::DrawFog()
SCOPED_RENDER_PASS("Fog", 0.25f, 0.125f, 0.0f);
// fog 0 is invalid, it must be skipped
int insideIndex = -1;
if(tr.world->numfogs > 1)
{
CmdBindPipeline(fogOutsidePipeline);
psoChangeCount++;
CmdBindRootSignature(fogRootSignature);
CmdBindDescriptorTable(fogRootSignature, fogDescriptorTable);
@ -1283,7 +1308,6 @@ void World::DrawFog()
const TextureBarrier depthReadBarrier(depthTexture, ResourceStates::PixelShaderAccessBit);
CmdBarrier(1, &depthReadBarrier);
int insideIndex = -1;
for(int f = 1; f < tr.world->numfogs; ++f)
{
const fog_t& fog = tr.world->fogs[f];
@ -1305,6 +1329,7 @@ void World::DrawFog()
break;
}
}
}
for(int f = 1; f < tr.world->numfogs; ++f)
{
@ -1333,6 +1358,7 @@ void World::DrawFog()
if(insideIndex > 0)
{
CmdBindPipeline(fogInsidePipeline);
psoChangeCount++;
CmdBindRootSignature(fogRootSignature);
CmdBindDescriptorTable(fogRootSignature, fogDescriptorTable);

View file

@ -65,3 +65,23 @@ float4 Dither(float4 color, float3 position, float seed, float noiseScale, float
return color + float4(noise, noise, noise, 0.0);
}
// from NVIDIA's 2007 "Soft Particles" whitepaper by Tristan Lorach
float Contrast(float d, float power)
{
bool aboveHalf = d > 0.5;
float base = saturate(2.0 * (aboveHalf ? (1.0 - d) : d));
float r = 0.5 * pow(base, power);
return aboveHalf ? (1.0 - r) : r;
}
float GetBitAsFloat(uint bits, uint bitIndex)
{
return (bits & (1u << bitIndex)) ? 1.0 : 0.0;
}
float2 UnpackHalf2(uint data)
{
return float2(f16tof32(data), f16tof32(data >> 16));
}

View file

@ -99,6 +99,8 @@ struct VOut
STAGE_ATTRIBS(7)
#endif
float clipDist : SV_ClipDistance0;
float2 proj2232 : PROJ;
float depthVS : DEPTHVS;
};
#undef STAGE_ATTRIBS
@ -150,6 +152,8 @@ VOut vs(VIn input)
STAGE_ATTRIBS(7)
#endif
output.clipDist = dot(positionVS, clipPlane);
output.proj2232 = float2(-projectionMatrix[2][2], projectionMatrix[2][3]);
output.depthVS = -positionVS.z;
return output;
}
@ -166,26 +170,21 @@ VOut vs(VIn input)
cbuffer RootConstants
{
// general
uint4 stageIndices0; // low 16 = texture, high 16 = sampler
uint4 stageIndices0; // sampler: 16 - texture: 16
uint4 stageIndices1;
float greyscale;
float pad0;
float pad1;
float pad2;
// shader trace
uint shaderTrace;
uint shaderIndex;
uint frameIndex;
uint centerPixel; // x | (y << 16)
uint shaderTrace; // shader index: 14 - frame index: 2 - enable: 1
uint centerPixel; // y: 16 - x: 16
// depth fade
uint halfDistOffset; // low: distance - high: offset
uint depthFadeColorTex; // texture index: 12 - color bias: 4 - color scale: 4
#if DITHER
// dither
float frameSeed;
float noiseScale;
float invGamma;
float invBrightness;
#endif
uint halfSeedNoise; // low: frame seed - high: noise scale
uint halfInvGammaBright; // low: inv gamma - high: inv brightness
};
Texture2D textures2D[4096] : register(t0);
@ -335,16 +334,33 @@ float4 ps(VOut input) : SV_Target
dst = MakeGreyscale(dst, greyscale);
#if DITHER
dst = Dither(dst, input.position.xyz, frameSeed, noiseScale, invBrightness, invGamma);
float2 seedNoise = UnpackHalf2(halfSeedNoise);
float2 invGammaBright = UnpackHalf2(halfInvGammaBright);
dst = Dither(dst, input.position.xyz, seedNoise.x, seedNoise.y, invGammaBright.y, invGammaBright.x);
#endif
if(shaderTrace)
#if DEPTH_FADE
#define BIT(Index) GetBitAsFloat(depthFadeColorTex, Index)
float2 distOffset = UnpackHalf2(halfDistOffset);
float4 fadeColorScale = float4(BIT(0), BIT(1), BIT(2), BIT(3));
float4 fadeColorBias = float4(BIT(4), BIT(5), BIT(6), BIT(7));
float zwDepth = textures2D[depthFadeColorTex >> 8].Load(int3(input.position.xy, 0)).x;// stored depth, z/w
float depthS = LinearDepth(zwDepth, input.proj2232.x, input.proj2232.y); // stored depth, linear
float depthP = input.depthVS - distOffset.y; // fragment depth
float fadeScale = Contrast((depthS - depthP) * distOffset.x, 2.0);
dst = lerp(dst * fadeColorScale + fadeColorBias, dst, fadeScale);
#undef BIT
#endif
if(shaderTrace & 1)
{
// we only store the shader index of 1 pixel
uint2 fragmentCoords = uint2(input.position.xy);
uint2 centerCoords = uint2(centerPixel & 0xFFFF, centerPixel >> 16);
if(all(fragmentCoords == centerCoords))
{
uint frameIndex = (shaderTrace >> 1) & 3;
uint shaderIndex = shaderTrace >> 3;
shaderIndexBuffer.Store(frameIndex * 4, shaderIndex);
}
}

View file

@ -32,7 +32,6 @@ to do:
- use Application Verifier to catch issues
- tone mapping: look at https://github.com/h3r2tic/tony-mc-mapface
- ubershader PS: run-time alpha test evaluation to reduce PSO count?
- r_depthFade
- when creating the root signature, validate that neither of the tables have any gap
- use root signature 1.1 to use the hints that help the drivers optimize out static resources
- is it possible to force Resource Binding Tier 2 somehow? are we supposed to run on old HW to test? :(
@ -2729,7 +2728,7 @@ namespace RHI
glInfo.maxTextureSize = MAX_TEXTURE_SIZE;
glInfo.maxAnisotropy = 16;
glInfo.depthFadeSupport = qfalse;
glInfo.depthFadeSupport = qtrue;
rhi.initialized = true;

View file

@ -424,11 +424,10 @@ static const cvarTableItem_t r_cvars[] =
&r_lightmapGreyscale, "r_lightmapGreyscale", "0", CVAR_ARCHIVE | CVAR_LATCH, CVART_FLOAT, "0", "1", "how desaturated the lightmap looks",
"Lightmap desaturation", CVARCAT_GRAPHICS, "Desaturates the lightmap data", ""
},
// @TODO:
//{
//&r_depthFade, "r_depthFade", "1", CVAR_ARCHIVE | CVAR_LATCH, CVART_BOOL, NULL, NULL, help_r_depthFade,
//"", CVARCAT_GRAPHICS | CVARCAT_PERFORMANCE, "", ""
//},
{
&r_depthFade, "r_depthFade", "1", CVAR_ARCHIVE | CVAR_LATCH, CVART_BOOL, NULL, NULL, help_r_depthFade,
"Depth fade", CVARCAT_GRAPHICS | CVARCAT_PERFORMANCE, "Prevents transparent surfaces from creating sharp edges when \"cutting\" through opaque geometry", ""
},
{
&r_dither, "r_dither", "0", CVAR_ARCHIVE | CVAR_LATCH, CVART_BOOL, NULL, NULL, help_r_dither,
"Dither", CVARCAT_GRAPHICS | CVARCAT_PERFORMANCE, "Adds noise to fight color banding artifacts", ""

View file

@ -341,16 +341,15 @@ typedef struct {
typedef enum {
DFT_NONE, // disabled
DFT_BLEND, // std alpha blend -> fade color = (R G B 0)
DFT_ADD, // additive -> fade color = (0 0 0 A)
DFT_MULT, // multiplicative -> fade color = (1 1 1 A)
DFT_PMA, // pre-mult alpha -> fade color = (0 0 0 0)
DFT_BLEND, // standard alpha blend
DFT_ADD, // additive
DFT_MULT, // multiplicative
DFT_PMA, // pre-multiplied alpha
DFT_TBD, // to be determined, i.e. fix up later
DFT_COUNT
} depthFadeType_t;
extern const float r_depthFadeScale[DFT_COUNT][4];
extern const float r_depthFadeBias [DFT_COUNT][4];
extern const uint8_t r_depthFadeScaleAndBias[DFT_COUNT];
struct pipeline_t {
int firstStage;

View file

@ -23,24 +23,15 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
// tr_shader.c -- this file deals with the parsing and definition of shaders
const float r_depthFadeScale[DFT_COUNT][4] =
{
{ 0.0f, 0.0f, 0.0f, 0.0f }, // DFT_NONE
{ 1.0f, 1.0f, 1.0f, 0.0f }, // DFT_BLEND
{ 0.0f, 0.0f, 0.0f, 1.0f }, // DFT_ADD
{ 0.0f, 0.0f, 0.0f, 1.0f }, // DFT_MULT
{ 0.0f, 0.0f, 0.0f, 0.0f }, // DFT_PMA
{ 0.0f, 0.0f, 0.0f, 0.0f } // DFT_TBD
};
const float r_depthFadeBias[DFT_COUNT][4] =
const uint8_t r_depthFadeScaleAndBias[DFT_COUNT] =
{
{ 0.0f, 0.0f, 0.0f, 0.0f }, // DFT_NONE
{ 0.0f, 0.0f, 0.0f, 0.0f }, // DFT_BLEND
{ 0.0f, 0.0f, 0.0f, 0.0f }, // DFT_ADD
{ 1.0f, 1.0f, 1.0f, 0.0f }, // DFT_MULT
{ 0.0f, 0.0f, 0.0f, 0.0f }, // DFT_PMA
{ 0.0f, 0.0f, 0.0f, 0.0f } // DFT_TBD
0x00, // DFT_NONE R G B A R G B A
0x07, // DFT_BLEND - scale = (1, 1, 1, 0) - bias = (0, 0, 0, 0)
0x08, // DFT_ADD - scale = (0, 0, 0, 1) - bias = (0, 0, 0, 0)
0x78, // DFT_MULT - scale = (0, 0, 0, 1) - bias = (1, 1, 1, 0)
0x00, // DFT_PMA - scale = (0, 0, 0, 0) - bias = (0, 0, 0, 0)
0x00 // DFT_TBD
};
static char* s_shaderText = 0;

View file

@ -6,6 +6,8 @@
PS(1_0_23) \
PS(1_0_25) \
PS(1_0_40000000) \
PS(1_0_40000022) \
PS(1_0_40000065) \
PS(1_0_41) \
PS(1_0_62) \
PS(1_0_65) \
@ -16,14 +18,26 @@
PS(1_1_23) \
PS(1_1_25) \
PS(1_1_40000000) \
PS(1_1_40000022) \
PS(1_1_40000065) \
PS(1_1_41) \
PS(1_1_62) \
PS(1_1_65) \
PS(1_1_83) \
PS(1_2_22) \
PS(1_2_25) \
PS(1_2_62) \
PS(1_2_65) \
PS(1_3_22) \
PS(1_3_25) \
PS(1_3_62) \
PS(1_3_65) \
PS(2_0_0_13) \
PS(2_0_0_20000000) \
PS(2_0_0_22) \
PS(2_0_0_23) \
PS(2_0_0_53) \
PS(2_0_0_62) \
PS(2_0_0_65) \
PS(2_0_0_78) \
PS(2_0_0_83) \
@ -33,61 +47,128 @@
PS(2_1_0_13) \
PS(2_1_0_20000000) \
PS(2_1_0_22) \
PS(2_1_0_23) \
PS(2_1_0_53) \
PS(2_1_0_62) \
PS(2_1_0_65) \
PS(2_1_0_78) \
PS(2_1_0_83) \
PS(2_1_13_13) \
PS(2_1_22_22) \
PS(2_1_40000000_13) \
PS(2_2_13_13) \
PS(2_2_22_22) \
PS(2_3_13_13) \
PS(2_3_22_22) \
PS(3_0_0_0_22) \
PS(3_0_0_0_65) \
PS(3_0_0_13_22) \
PS(3_0_0_13_61) \
PS(3_0_0_22_13) \
PS(3_0_0_22_22) \
PS(3_0_0_22_62) \
PS(3_0_0_22_65) \
PS(3_0_0_23_23) \
PS(3_0_0_40000065_13) \
PS(3_0_0_56_13) \
PS(3_0_0_62_13) \
PS(3_0_0_64_13) \
PS(3_0_0_65_13) \
PS(3_0_0_65_22) \
PS(3_0_0_65_83) \
PS(3_0_0_83_13) \
PS(3_0_22_22_22) \
PS(3_1_0_0_22) \
PS(3_1_0_0_65) \
PS(3_1_0_13_22) \
PS(3_1_0_13_61) \
PS(3_1_0_22_13) \
PS(3_1_0_22_22) \
PS(3_1_0_22_62) \
PS(3_1_0_22_65) \
PS(3_1_0_23_23) \
PS(3_1_0_40000065_13) \
PS(3_1_0_56_13) \
PS(3_1_0_62_13) \
PS(3_1_0_64_13) \
PS(3_1_0_65_13) \
PS(3_1_0_65_22) \
PS(3_1_0_65_83) \
PS(3_1_0_83_13) \
PS(3_1_22_22_22) \
PS(3_2_22_22_22) \
PS(3_3_22_22_22) \
PS(4_0_0_0_40000062_83) \
PS(4_0_0_13_22_22) \
PS(4_0_0_13_65_65) \
PS(4_0_0_22_22_13) \
PS(4_0_0_22_65_13) \
PS(4_0_0_22_65_83) \
PS(4_0_0_51_52_13) \
PS(4_0_0_53_22_13) \
PS(4_0_0_56_13_22) \
PS(4_0_0_56_56_13) \
PS(4_0_0_62_13_22) \
PS(4_0_0_65_13_22) \
PS(4_0_0_65_22_62) \
PS(4_0_0_65_65_22) \
PS(4_0_0_65_65_83) \
PS(4_0_22_22_22_22) \
PS(4_0_25_25_25_25) \
PS(4_0_40000000_65_13_13) \
PS(4_1_0_0_40000062_83) \
PS(4_1_0_13_22_22) \
PS(4_1_0_13_65_65) \
PS(4_1_0_22_22_13) \
PS(4_1_0_22_65_13) \
PS(4_1_0_22_65_83) \
PS(4_1_0_51_52_13) \
PS(4_1_0_53_22_13) \
PS(4_1_0_56_13_22) \
PS(4_1_0_56_56_13) \
PS(4_1_0_62_13_22) \
PS(4_1_0_65_13_22) \
PS(4_1_0_65_22_62) \
PS(4_1_0_65_65_22) \
PS(4_1_0_65_65_83) \
PS(4_1_22_22_22_22) \
PS(4_1_25_25_25_25) \
PS(4_1_40000000_65_13_13) \
PS(4_2_25_25_25_25) \
PS(4_3_25_25_25_25) \
PS(5_0_0_13_22_22_22) \
PS(5_0_0_22_22_13_22) \
PS(5_0_0_22_22_22_40000065) \
PS(5_0_0_22_22_65_83) \
PS(5_0_0_22_23_65_83) \
PS(5_0_0_22_65_83_22) \
PS(5_0_0_65_13_22_22) \
PS(5_0_0_65_13_22_25) \
PS(5_0_22_22_22_22_22) \
PS(5_1_0_13_22_22_22) \
PS(5_1_0_22_22_13_22) \
PS(5_1_0_22_22_22_40000065) \
PS(5_1_0_22_22_65_83) \
PS(5_1_22_22_22_22_22)
PS(5_1_0_22_23_65_83) \
PS(5_1_0_22_65_83_22) \
PS(5_1_0_65_13_22_22) \
PS(5_1_0_65_13_22_25) \
PS(5_1_22_22_22_22_22) \
PS(5_2_22_22_22_22_22) \
PS(5_3_22_22_22_22_22) \
PS(6_0_0_22_22_22_65_13) \
PS(6_1_0_22_22_22_65_13)
struct UberPixelShaderState
{
int stageStates[8] = {};
int stageCount = 0;
int globalState = 0;
int globalState = 0; // bit mask of UBERPS_*_BIT macros
};
#define UBERPS_DITHER_BIT 1
#define UBERPS_DEPTHFADE_BIT 2
static bool ParseUberPixelShaderState(UberPixelShaderState& state, const char* stateString)
{
const char* scanStrings[8] =

View file

@ -193,10 +193,14 @@ void CompileUberPS(const char* stateString)
extras[extraCount++] = va("-Vn g_ps_%s", stateString);
extras[extraCount++] = "-D USE_INCLUDES=1";
extras[extraCount++] = "-D PIXEL_SHADER=1";
if(state.globalState & 1)
if(state.globalState & UBERPS_DITHER_BIT)
{
extras[extraCount++] = "-D DITHER=1";
}
if(state.globalState & UBERPS_DEPTHFADE_BIT)
{
extras[extraCount++] = "-D DEPTH_FADE=1";
}
extras[extraCount++] = va("-D STAGE_COUNT=%d", state.stageCount);
for(int s = 0; s < state.stageCount; ++s)
{