mirror of
https://bitbucket.org/CPMADevs/cnq3
synced 2024-11-10 06:31:48 +00:00
implemented depth fade
improved PSO change tracking embedded more common uber pixel shader permutations
This commit is contained in:
parent
aa51fae5a6
commit
00840e10d8
13 changed files with 339 additions and 109 deletions
|
@ -3723,3 +3723,43 @@ void Com_ParseHexColor( float* c, const char* text, qbool hasAlpha )
|
|||
c[3] = 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static uint32_t asuint( float x )
|
||||
{
|
||||
return *(uint32_t*)&x;
|
||||
}
|
||||
|
||||
|
||||
static float asfloat( uint32_t x )
|
||||
{
|
||||
return *(float*)&x;
|
||||
}
|
||||
|
||||
|
||||
// IEEE-754 16-bit floating-point format (without infinity)
|
||||
// "Accuracy and performance of the lattice Boltzmann method with 64-bit, 32-bit, and customized 16-bit number formats"
|
||||
// x86 intrinsic: _cvtsh_ss
|
||||
float f16tof32( uint16_t x )
|
||||
{
|
||||
const uint32_t e = (x & 0x7C00) >> 10; // exponent
|
||||
const uint32_t m = (x & 0x03FF) << 13; // mantissa
|
||||
const uint32_t v = asuint((float)m) >> 23; // log2 bit hack to count leading zeros in denormalized format
|
||||
const float r = asfloat((x & 0x8000) << 16 | (e != 0) * ((e + 112) << 23 | m) | ((e == 0) & (m != 0)) * ((v - 37) << 23 | ((m << (150 - v)) & 0x007FE000))); // sign : normalized : denormalized
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
// IEEE-754 16-bit floating-point format (without infinity)
|
||||
// "Accuracy and performance of the lattice Boltzmann method with 64-bit, 32-bit, and customized 16-bit number formats"
|
||||
// x86 intrinsic: _cvtss_sh
|
||||
uint16_t f32tof16( float x )
|
||||
{
|
||||
const uint32_t b = asuint(x) + 0x00001000; // round-to-nearest-even: add last bit after truncated mantissa
|
||||
const uint32_t e = (b & 0x7F800000) >> 23; // exponent
|
||||
const uint32_t m = b & 0x007FFFFF; // mantissa; in line below: 0x007FF000 = 0x00800000-0x00001000 = decimal indicator flag - initial rounding
|
||||
const uint16_t r = (b & 0x80000000) >> 16 | (e > 112) * ((((e - 112) << 10) & 0x7C00) | m >> 13) | ((e < 113) & (e > 101)) * ((((0x007FF000 + m) >> (125 - e)) + 1) >> 1) | (e > 143) * 0x7FFF; // sign : normalized : denormalized : saturate
|
||||
|
||||
return r;
|
||||
}
|
||||
|
|
|
@ -1298,6 +1298,10 @@ printHelpResult_t Com_PrintHelp( const char* name, printf_t print, qbool printNo
|
|||
#endif
|
||||
|
||||
|
||||
float f16tof32( uint16_t x );
|
||||
uint16_t f32tof16( float x );
|
||||
|
||||
|
||||
// the smallest power of 2 accepted is 1
|
||||
template<typename T>
|
||||
static T IsPowerOfTwo( T x )
|
||||
|
|
|
@ -48,23 +48,24 @@ struct WorldVertexRC
|
|||
struct WorldPixelRC
|
||||
{
|
||||
// general
|
||||
uint32_t stageIndices[8];
|
||||
uint32_t stageIndices[8]; // sampler: 16 - texture: 16
|
||||
float greyscale;
|
||||
float pad0;
|
||||
float pad1;
|
||||
float pad2;
|
||||
|
||||
// r_shaderTrace - dynamically enabled
|
||||
uint32_t shaderTrace;
|
||||
uint32_t shaderIndex;
|
||||
uint32_t frameIndex;
|
||||
uint32_t centerPixel; // x | (y << 16)
|
||||
uint32_t shaderTrace; // shader index: 14 - frame index: 2 - enable: 1
|
||||
uint16_t centerPixelX;
|
||||
uint16_t centerPixelY;
|
||||
|
||||
// r_depthFade - statically enabled
|
||||
uint16_t hFadeDistance;
|
||||
uint16_t hFadeOffset;
|
||||
uint32_t depthFadeColorTex; // texture index: 12 - color bias: 4 - color scale: 4
|
||||
|
||||
// r_dither - statically enabled
|
||||
float frameSeed;
|
||||
float noiseScale;
|
||||
float invGamma;
|
||||
float invBrightness;
|
||||
uint16_t hFrameSeed;
|
||||
uint16_t hNoiseScale;
|
||||
uint16_t hInvGamma;
|
||||
uint16_t hInvBrightness;
|
||||
};
|
||||
|
||||
#pragma pack(pop)
|
||||
|
@ -310,6 +311,7 @@ struct BatchType
|
|||
{
|
||||
Standard,
|
||||
DynamicLight,
|
||||
DepthFade,
|
||||
Count
|
||||
};
|
||||
};
|
||||
|
@ -558,6 +560,7 @@ struct PSODesc
|
|||
cullType_t cullType;
|
||||
bool polygonOffset;
|
||||
bool clampDepth;
|
||||
bool depthFade;
|
||||
};
|
||||
|
||||
#pragma pack(pop)
|
||||
|
@ -740,6 +743,14 @@ inline void CmdSetViewportAndScissor(const viewParms_t& vp)
|
|||
CmdSetViewportAndScissor(vp.viewportX, vp.viewportY, vp.viewportWidth, vp.viewportHeight);
|
||||
}
|
||||
|
||||
inline bool IsDepthFadeEnabled(const shader_t& shader)
|
||||
{
|
||||
return
|
||||
r_depthFade->integer != 0 &&
|
||||
shader.dfType > DFT_NONE &&
|
||||
shader.dfType < DFT_TBD;
|
||||
}
|
||||
|
||||
const image_t* GetBundleImage(const textureBundle_t& bundle);
|
||||
uint32_t GetSamplerIndex(textureWrap_t wrap, TextureFilter::Id filter, uint32_t minLOD = 0);
|
||||
uint32_t GetSamplerIndex(const image_t* image);
|
||||
|
|
|
@ -159,7 +159,6 @@ static bool IsCommutativeBlendState(unsigned int stateBits)
|
|||
return false;
|
||||
}
|
||||
|
||||
|
||||
static cullType_t GetMirrorredCullType(cullType_t cullType)
|
||||
{
|
||||
switch(cullType)
|
||||
|
@ -409,8 +408,11 @@ void GRP::ProcessShader(shader_t& shader)
|
|||
|
||||
if(shader.isOpaque)
|
||||
{
|
||||
Q_assert(IsDepthFadeEnabled(shader) == false);
|
||||
|
||||
// @TODO: fix up cache.stageStateBits[0] based on depth state from follow-up states
|
||||
CachedPSO cache = {};
|
||||
cache.desc.depthFade = false;
|
||||
cache.desc.polygonOffset = !!shader.polygonOffset;
|
||||
cache.desc.clampDepth = clampDepth;
|
||||
cache.stageStateBits[0] = shader.stages[0]->stateBits & (~GLS_POLYMODE_LINE);
|
||||
|
@ -432,6 +434,7 @@ void GRP::ProcessShader(shader_t& shader)
|
|||
{
|
||||
CachedPSO cache = {};
|
||||
|
||||
cache.desc.depthFade = IsDepthFadeEnabled(shader);
|
||||
cache.desc.polygonOffset = !!shader.polygonOffset;
|
||||
cache.desc.clampDepth = clampDepth;
|
||||
cache.stageCount = 0;
|
||||
|
@ -617,6 +620,9 @@ void GRP::DrawGUI()
|
|||
|
||||
ImGui::EndTable();
|
||||
}
|
||||
|
||||
ImGui::Text("PSO count: %d", (int)grp.psoCount);
|
||||
ImGui::Text("PSO changes: %d", (int)grp.world.psoChangeCount);
|
||||
}
|
||||
ImGui::End();
|
||||
}
|
||||
|
@ -720,8 +726,11 @@ uint32_t GRP::CreatePSO(CachedPSO& cache, const char* name)
|
|||
for(uint32_t i = 0; i < uberPixelShaderCacheSize; ++i)
|
||||
{
|
||||
const UberPixelShaderState& state = uberPixelShaderStates[i];
|
||||
const int dither = (state.globalState & UBERPS_DITHER_BIT) != 0 ? 1 : 0;
|
||||
const bool depthFade = (state.globalState & UBERPS_DEPTHFADE_BIT) != 0;
|
||||
if(cache.stageCount != (uint32_t)state.stageCount ||
|
||||
r_dither->integer != (state.globalState & 1))
|
||||
r_dither->integer != dither ||
|
||||
cache.desc.depthFade != depthFade)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
@ -760,6 +769,12 @@ uint32_t GRP::CreatePSO(CachedPSO& cache, const char* name)
|
|||
macros[macroCount].value = "1";
|
||||
macroCount++;
|
||||
}
|
||||
if(cache.desc.depthFade)
|
||||
{
|
||||
macros[macroCount].name = "DEPTH_FADE";
|
||||
macros[macroCount].value = "1";
|
||||
macroCount++;
|
||||
}
|
||||
for(int s = 0; s < cache.stageCount; ++s)
|
||||
{
|
||||
macros[macroCount].name = va("STAGE%d_BITS", s);
|
||||
|
@ -776,13 +791,31 @@ uint32_t GRP::CreatePSO(CachedPSO& cache, const char* name)
|
|||
pixelShaderByteCode = uberPixelShaderByteCodes[uberPixelShaderIndex];
|
||||
}
|
||||
|
||||
// important missing entries can be copy-pasted into UBER_SHADER_PS_LIST
|
||||
#if 0
|
||||
Sys_DebugPrintf("PS: ");
|
||||
for(int s = 0; s < cache.stageCount; ++s)
|
||||
if(uberPixelShaderIndex < 0)
|
||||
{
|
||||
Sys_DebugPrintf(va("%X ", (int)cache.stageStateBits[s] & pixelShaderStateBits));
|
||||
unsigned int flags = 0;
|
||||
if(r_dither->integer)
|
||||
{
|
||||
flags |= UBERPS_DITHER_BIT;
|
||||
}
|
||||
if(cache.desc.depthFade)
|
||||
{
|
||||
flags |= UBERPS_DEPTHFADE_BIT;
|
||||
}
|
||||
Sys_DebugPrintf("\tshader: %s\n", name);
|
||||
ri.Printf(PRINT_ALL, "^2 shader: %s\n", name);
|
||||
Sys_DebugPrintf("\tPS(%d_%X", (int)cache.stageCount, flags);
|
||||
ri.Printf(PRINT_ALL, " PS(%d_%X", (int)cache.stageCount, flags);
|
||||
for(int s = 0; s < cache.stageCount; ++s)
|
||||
{
|
||||
Sys_DebugPrintf("_%X", (unsigned int)(cache.stageStateBits[s] & pixelShaderStateBits));
|
||||
ri.Printf(PRINT_ALL, "_%X", (unsigned int)(cache.stageStateBits[s] & pixelShaderStateBits));
|
||||
}
|
||||
Sys_DebugPrintf(") \\\n");
|
||||
ri.Printf(PRINT_ALL, ") \\\n");
|
||||
}
|
||||
Sys_DebugPrintf("\n");
|
||||
#endif
|
||||
|
||||
uint32_t a = 0;
|
||||
|
@ -797,13 +830,20 @@ uint32_t GRP::CreatePSO(CachedPSO& cache, const char* name)
|
|||
desc.vertexLayout.AddAttribute(a++, ShaderSemantic::TexCoord, DataType::Float32, 2, 0);
|
||||
desc.vertexLayout.AddAttribute(a++, ShaderSemantic::Color, DataType::UNorm8, 4, 0);
|
||||
}
|
||||
desc.depthStencil.depthStencilFormat = TextureFormat::Depth32_Float;
|
||||
desc.depthStencil.depthComparison =
|
||||
(cache.stageStateBits[0] & GLS_DEPTHFUNC_EQUAL) != 0 ?
|
||||
ComparisonFunction::Equal :
|
||||
ComparisonFunction::GreaterEqual;
|
||||
desc.depthStencil.enableDepthTest = (cache.stageStateBits[0] & GLS_DEPTHTEST_DISABLE) == 0;
|
||||
desc.depthStencil.enableDepthWrites = (cache.stageStateBits[0] & GLS_DEPTHMASK_TRUE) != 0;
|
||||
if(cache.desc.depthFade)
|
||||
{
|
||||
desc.depthStencil.DisableDepth();
|
||||
}
|
||||
else
|
||||
{
|
||||
desc.depthStencil.depthStencilFormat = TextureFormat::Depth32_Float;
|
||||
desc.depthStencil.depthComparison =
|
||||
(cache.stageStateBits[0] & GLS_DEPTHFUNC_EQUAL) != 0 ?
|
||||
ComparisonFunction::Equal :
|
||||
ComparisonFunction::GreaterEqual;
|
||||
desc.depthStencil.enableDepthTest = (cache.stageStateBits[0] & GLS_DEPTHTEST_DISABLE) == 0;
|
||||
desc.depthStencil.enableDepthWrites = (cache.stageStateBits[0] & GLS_DEPTHMASK_TRUE) != 0;
|
||||
}
|
||||
desc.rasterizer.cullMode = cache.desc.cullType;
|
||||
desc.rasterizer.polygonOffset = cache.desc.polygonOffset;
|
||||
desc.rasterizer.clampDepth = cache.desc.clampDepth;
|
||||
|
|
|
@ -549,6 +549,7 @@ void World::DrawPrePass(const drawSceneViewCommand_t& cmd)
|
|||
CmdBindRenderTargets(0, NULL, &depthTexture);
|
||||
CmdBindRootSignature(zppRootSignature);
|
||||
CmdBindPipeline(zppPipeline);
|
||||
psoChangeCount++;
|
||||
CmdBindDescriptorTable(zppRootSignature, zppDescriptorTable);
|
||||
|
||||
ZPPVertexRC vertexRC;
|
||||
|
@ -564,6 +565,19 @@ void World::DrawPrePass(const drawSceneViewCommand_t& cmd)
|
|||
|
||||
void World::BeginBatch(const shader_t* shader, bool hasStaticGeo, BatchType::Id type)
|
||||
{
|
||||
if(type == BatchType::DepthFade && batchType != BatchType::DepthFade)
|
||||
{
|
||||
TextureBarrier barrier(depthTexture, ResourceStates::PixelShaderAccessBit);
|
||||
CmdBarrier(1, &barrier);
|
||||
CmdBindRenderTargets(1, &grp.renderTarget, NULL);
|
||||
}
|
||||
else if(type != BatchType::DepthFade && batchType == BatchType::DepthFade)
|
||||
{
|
||||
TextureBarrier barrier(depthTexture, ResourceStates::DepthWriteBit);
|
||||
CmdBarrier(1, &barrier);
|
||||
CmdBindRenderTargets(1, &grp.renderTarget, &depthTexture);
|
||||
}
|
||||
|
||||
tess.numVertexes = 0;
|
||||
tess.numIndexes = 0;
|
||||
tess.depthFade = DFT_NONE;
|
||||
|
@ -733,14 +747,17 @@ void World::EndBatch()
|
|||
|
||||
WorldPixelRC pixelRC = {};
|
||||
pixelRC.greyscale = tess.greyscale;
|
||||
pixelRC.frameSeed = grp.frameSeed;
|
||||
pixelRC.noiseScale = r_ditherStrength->value;
|
||||
pixelRC.invBrightness = 1.0f / r_brightness->value;
|
||||
pixelRC.invGamma = 1.0f / r_gamma->value;
|
||||
pixelRC.shaderTrace = (uint32_t)!!tr.traceWorldShader;
|
||||
pixelRC.shaderIndex = (uint32_t)shader->index;
|
||||
pixelRC.frameIndex = RHI::GetFrameIndex();
|
||||
pixelRC.centerPixel = (glConfig.vidWidth / 2) | ((glConfig.vidHeight / 2) << 16);
|
||||
pixelRC.hFrameSeed = f32tof16(grp.frameSeed);
|
||||
pixelRC.hNoiseScale = f32tof16(r_ditherStrength->value);
|
||||
pixelRC.hInvGamma = f32tof16(1.0f / r_gamma->value);
|
||||
pixelRC.hInvBrightness = f32tof16(1.0f / r_brightness->value);
|
||||
pixelRC.shaderTrace = (uint32_t)!!tr.traceWorldShader | (RHI::GetFrameIndex() << 1) | ((uint32_t)shader->index << 3);
|
||||
pixelRC.centerPixelX = glConfig.vidWidth / 2;
|
||||
pixelRC.centerPixelY = glConfig.vidHeight / 2;
|
||||
pixelRC.hFadeDistance = f32tof16(tess.shader->dfInvDist);
|
||||
pixelRC.hFadeOffset = f32tof16(tess.shader->dfBias);
|
||||
pixelRC.depthFadeColorTex = (uint32_t)r_depthFadeScaleAndBias[tess.shader->dfType] | (depthTextureIndex << 8);
|
||||
|
||||
for(int s = 0; s < pipeline.numStages; ++s)
|
||||
{
|
||||
const image_t* image = GetBundleImage(shader->stages[pipeline.firstStage + s]->bundle);
|
||||
|
@ -803,7 +820,7 @@ void World::EndSkyBatch()
|
|||
void World::RestartBatch()
|
||||
{
|
||||
EndBatch();
|
||||
BeginBatch(tess.shader, batchHasStaticGeo, BatchType::Standard);
|
||||
BeginBatch(tess.shader, batchHasStaticGeo, batchType);
|
||||
}
|
||||
|
||||
void World::DrawGUI()
|
||||
|
@ -1065,6 +1082,7 @@ void World::DrawSceneView(const drawSceneViewCommand_t& cmd)
|
|||
CmdBindDescriptorTable(grp.uberRootSignature, grp.descriptorTable);
|
||||
CmdBindRenderTargets(1, &grp.renderTarget, &depthTexture);
|
||||
batchPSO = RHI_MAKE_NULL_HANDLE();
|
||||
batchType = BatchType::Standard;
|
||||
|
||||
const drawSurf_t* drawSurfs = cmd.drawSurfs;
|
||||
const int surfCount = cmd.numDrawSurfs;
|
||||
|
@ -1107,6 +1125,7 @@ void World::DrawSceneView(const drawSceneViewCommand_t& cmd)
|
|||
CmdBindDescriptorTable(grp.uberRootSignature, grp.descriptorTable);
|
||||
CmdBindRenderTargets(1, &grp.renderTarget, &depthTexture);
|
||||
batchPSO = RHI_MAKE_NULL_HANDLE();
|
||||
batchType = BatchType::Standard;
|
||||
boundVertexBuffers = BufferFamily::Invalid;
|
||||
boundIndexBuffer = BufferFamily::Invalid;
|
||||
|
||||
|
@ -1142,6 +1161,7 @@ void World::DrawSceneView(const drawSceneViewCommand_t& cmd)
|
|||
continue;
|
||||
}
|
||||
|
||||
const BatchType::Id type = IsDepthFadeEnabled(*shader) ? BatchType::DepthFade : BatchType::Standard;
|
||||
if(staticChanged || shaderChanged || entityChanged)
|
||||
{
|
||||
oldShader = shader;
|
||||
|
@ -1149,7 +1169,7 @@ void World::DrawSceneView(const drawSceneViewCommand_t& cmd)
|
|||
oldHasStaticGeo = hasStaticGeo;
|
||||
EndSkyBatch();
|
||||
EndBatch();
|
||||
BeginBatch(shader, hasStaticGeo, BatchType::Standard);
|
||||
BeginBatch(shader, hasStaticGeo, type);
|
||||
tess.greyscale = drawSurf->greyscale;
|
||||
batchShadingRate = lowShadingRate;
|
||||
}
|
||||
|
@ -1166,7 +1186,7 @@ void World::DrawSceneView(const drawSceneViewCommand_t& cmd)
|
|||
if(tess.numIndexes + chunk.indexCount > SHADER_MAX_INDEXES)
|
||||
{
|
||||
EndBatch();
|
||||
BeginBatch(tess.shader, batchHasStaticGeo, BatchType::Standard);
|
||||
BeginBatch(tess.shader, batchHasStaticGeo, type);
|
||||
batchShadingRate = lowShadingRate;
|
||||
}
|
||||
|
||||
|
@ -1271,39 +1291,44 @@ void World::DrawFog()
|
|||
|
||||
SCOPED_RENDER_PASS("Fog", 0.25f, 0.125f, 0.0f);
|
||||
|
||||
CmdBindPipeline(fogOutsidePipeline);
|
||||
CmdBindRootSignature(fogRootSignature);
|
||||
CmdBindDescriptorTable(fogRootSignature, fogDescriptorTable);
|
||||
|
||||
const uint32_t stride = sizeof(vec3_t);
|
||||
CmdBindVertexBuffers(1, &boxVertexBuffer, &stride, NULL);
|
||||
CmdBindIndexBuffer(boxIndexBuffer, IndexType::UInt32, 0);
|
||||
|
||||
CmdBindRenderTargets(1, &grp.renderTarget, NULL);
|
||||
const TextureBarrier depthReadBarrier(depthTexture, ResourceStates::PixelShaderAccessBit);
|
||||
CmdBarrier(1, &depthReadBarrier);
|
||||
|
||||
// fog 0 is invalid, it must be skipped
|
||||
int insideIndex = -1;
|
||||
for(int f = 1; f < tr.world->numfogs; ++f)
|
||||
if(tr.world->numfogs > 1)
|
||||
{
|
||||
const fog_t& fog = tr.world->fogs[f];
|
||||
CmdBindPipeline(fogOutsidePipeline);
|
||||
psoChangeCount++;
|
||||
CmdBindRootSignature(fogRootSignature);
|
||||
CmdBindDescriptorTable(fogRootSignature, fogDescriptorTable);
|
||||
|
||||
bool inside = true;
|
||||
for(int a = 0; a < 3; ++a)
|
||||
const uint32_t stride = sizeof(vec3_t);
|
||||
CmdBindVertexBuffers(1, &boxVertexBuffer, &stride, NULL);
|
||||
CmdBindIndexBuffer(boxIndexBuffer, IndexType::UInt32, 0);
|
||||
|
||||
CmdBindRenderTargets(1, &grp.renderTarget, NULL);
|
||||
const TextureBarrier depthReadBarrier(depthTexture, ResourceStates::PixelShaderAccessBit);
|
||||
CmdBarrier(1, &depthReadBarrier);
|
||||
|
||||
for(int f = 1; f < tr.world->numfogs; ++f)
|
||||
{
|
||||
if(backEnd.viewParms.orient.origin[a] <= fog.bounds[0][a] ||
|
||||
backEnd.viewParms.orient.origin[a] >= fog.bounds[1][a])
|
||||
const fog_t& fog = tr.world->fogs[f];
|
||||
|
||||
bool inside = true;
|
||||
for(int a = 0; a < 3; ++a)
|
||||
{
|
||||
inside = false;
|
||||
if(backEnd.viewParms.orient.origin[a] <= fog.bounds[0][a] ||
|
||||
backEnd.viewParms.orient.origin[a] >= fog.bounds[1][a])
|
||||
{
|
||||
inside = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(inside)
|
||||
{
|
||||
insideIndex = f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(inside)
|
||||
{
|
||||
insideIndex = f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for(int f = 1; f < tr.world->numfogs; ++f)
|
||||
|
@ -1333,6 +1358,7 @@ void World::DrawFog()
|
|||
if(insideIndex > 0)
|
||||
{
|
||||
CmdBindPipeline(fogInsidePipeline);
|
||||
psoChangeCount++;
|
||||
CmdBindRootSignature(fogRootSignature);
|
||||
CmdBindDescriptorTable(fogRootSignature, fogDescriptorTable);
|
||||
|
||||
|
|
|
@ -65,3 +65,23 @@ float4 Dither(float4 color, float3 position, float seed, float noiseScale, float
|
|||
|
||||
return color + float4(noise, noise, noise, 0.0);
|
||||
}
|
||||
|
||||
// from NVIDIA's 2007 "Soft Particles" whitepaper by Tristan Lorach
|
||||
float Contrast(float d, float power)
|
||||
{
|
||||
bool aboveHalf = d > 0.5;
|
||||
float base = saturate(2.0 * (aboveHalf ? (1.0 - d) : d));
|
||||
float r = 0.5 * pow(base, power);
|
||||
|
||||
return aboveHalf ? (1.0 - r) : r;
|
||||
}
|
||||
|
||||
float GetBitAsFloat(uint bits, uint bitIndex)
|
||||
{
|
||||
return (bits & (1u << bitIndex)) ? 1.0 : 0.0;
|
||||
}
|
||||
|
||||
float2 UnpackHalf2(uint data)
|
||||
{
|
||||
return float2(f16tof32(data), f16tof32(data >> 16));
|
||||
}
|
||||
|
|
|
@ -99,6 +99,8 @@ struct VOut
|
|||
STAGE_ATTRIBS(7)
|
||||
#endif
|
||||
float clipDist : SV_ClipDistance0;
|
||||
float2 proj2232 : PROJ;
|
||||
float depthVS : DEPTHVS;
|
||||
};
|
||||
|
||||
#undef STAGE_ATTRIBS
|
||||
|
@ -150,6 +152,8 @@ VOut vs(VIn input)
|
|||
STAGE_ATTRIBS(7)
|
||||
#endif
|
||||
output.clipDist = dot(positionVS, clipPlane);
|
||||
output.proj2232 = float2(-projectionMatrix[2][2], projectionMatrix[2][3]);
|
||||
output.depthVS = -positionVS.z;
|
||||
|
||||
return output;
|
||||
}
|
||||
|
@ -166,26 +170,21 @@ VOut vs(VIn input)
|
|||
cbuffer RootConstants
|
||||
{
|
||||
// general
|
||||
uint4 stageIndices0; // low 16 = texture, high 16 = sampler
|
||||
uint4 stageIndices0; // sampler: 16 - texture: 16
|
||||
uint4 stageIndices1;
|
||||
float greyscale;
|
||||
float pad0;
|
||||
float pad1;
|
||||
float pad2;
|
||||
|
||||
// shader trace
|
||||
uint shaderTrace;
|
||||
uint shaderIndex;
|
||||
uint frameIndex;
|
||||
uint centerPixel; // x | (y << 16)
|
||||
uint shaderTrace; // shader index: 14 - frame index: 2 - enable: 1
|
||||
uint centerPixel; // y: 16 - x: 16
|
||||
|
||||
// depth fade
|
||||
uint halfDistOffset; // low: distance - high: offset
|
||||
uint depthFadeColorTex; // texture index: 12 - color bias: 4 - color scale: 4
|
||||
|
||||
#if DITHER
|
||||
// dither
|
||||
float frameSeed;
|
||||
float noiseScale;
|
||||
float invGamma;
|
||||
float invBrightness;
|
||||
#endif
|
||||
uint halfSeedNoise; // low: frame seed - high: noise scale
|
||||
uint halfInvGammaBright; // low: inv gamma - high: inv brightness
|
||||
};
|
||||
|
||||
Texture2D textures2D[4096] : register(t0);
|
||||
|
@ -335,16 +334,33 @@ float4 ps(VOut input) : SV_Target
|
|||
dst = MakeGreyscale(dst, greyscale);
|
||||
|
||||
#if DITHER
|
||||
dst = Dither(dst, input.position.xyz, frameSeed, noiseScale, invBrightness, invGamma);
|
||||
float2 seedNoise = UnpackHalf2(halfSeedNoise);
|
||||
float2 invGammaBright = UnpackHalf2(halfInvGammaBright);
|
||||
dst = Dither(dst, input.position.xyz, seedNoise.x, seedNoise.y, invGammaBright.y, invGammaBright.x);
|
||||
#endif
|
||||
|
||||
if(shaderTrace)
|
||||
#if DEPTH_FADE
|
||||
#define BIT(Index) GetBitAsFloat(depthFadeColorTex, Index)
|
||||
float2 distOffset = UnpackHalf2(halfDistOffset);
|
||||
float4 fadeColorScale = float4(BIT(0), BIT(1), BIT(2), BIT(3));
|
||||
float4 fadeColorBias = float4(BIT(4), BIT(5), BIT(6), BIT(7));
|
||||
float zwDepth = textures2D[depthFadeColorTex >> 8].Load(int3(input.position.xy, 0)).x;// stored depth, z/w
|
||||
float depthS = LinearDepth(zwDepth, input.proj2232.x, input.proj2232.y); // stored depth, linear
|
||||
float depthP = input.depthVS - distOffset.y; // fragment depth
|
||||
float fadeScale = Contrast((depthS - depthP) * distOffset.x, 2.0);
|
||||
dst = lerp(dst * fadeColorScale + fadeColorBias, dst, fadeScale);
|
||||
#undef BIT
|
||||
#endif
|
||||
|
||||
if(shaderTrace & 1)
|
||||
{
|
||||
// we only store the shader index of 1 pixel
|
||||
uint2 fragmentCoords = uint2(input.position.xy);
|
||||
uint2 centerCoords = uint2(centerPixel & 0xFFFF, centerPixel >> 16);
|
||||
if(all(fragmentCoords == centerCoords))
|
||||
{
|
||||
uint frameIndex = (shaderTrace >> 1) & 3;
|
||||
uint shaderIndex = shaderTrace >> 3;
|
||||
shaderIndexBuffer.Store(frameIndex * 4, shaderIndex);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,7 +32,6 @@ to do:
|
|||
- use Application Verifier to catch issues
|
||||
- tone mapping: look at https://github.com/h3r2tic/tony-mc-mapface
|
||||
- ubershader PS: run-time alpha test evaluation to reduce PSO count?
|
||||
- r_depthFade
|
||||
- when creating the root signature, validate that neither of the tables have any gap
|
||||
- use root signature 1.1 to use the hints that help the drivers optimize out static resources
|
||||
- is it possible to force Resource Binding Tier 2 somehow? are we supposed to run on old HW to test? :(
|
||||
|
@ -2729,7 +2728,7 @@ namespace RHI
|
|||
|
||||
glInfo.maxTextureSize = MAX_TEXTURE_SIZE;
|
||||
glInfo.maxAnisotropy = 16;
|
||||
glInfo.depthFadeSupport = qfalse;
|
||||
glInfo.depthFadeSupport = qtrue;
|
||||
|
||||
rhi.initialized = true;
|
||||
|
||||
|
|
|
@ -424,11 +424,10 @@ static const cvarTableItem_t r_cvars[] =
|
|||
&r_lightmapGreyscale, "r_lightmapGreyscale", "0", CVAR_ARCHIVE | CVAR_LATCH, CVART_FLOAT, "0", "1", "how desaturated the lightmap looks",
|
||||
"Lightmap desaturation", CVARCAT_GRAPHICS, "Desaturates the lightmap data", ""
|
||||
},
|
||||
// @TODO:
|
||||
//{
|
||||
//&r_depthFade, "r_depthFade", "1", CVAR_ARCHIVE | CVAR_LATCH, CVART_BOOL, NULL, NULL, help_r_depthFade,
|
||||
//"", CVARCAT_GRAPHICS | CVARCAT_PERFORMANCE, "", ""
|
||||
//},
|
||||
{
|
||||
&r_depthFade, "r_depthFade", "1", CVAR_ARCHIVE | CVAR_LATCH, CVART_BOOL, NULL, NULL, help_r_depthFade,
|
||||
"Depth fade", CVARCAT_GRAPHICS | CVARCAT_PERFORMANCE, "Prevents transparent surfaces from creating sharp edges when \"cutting\" through opaque geometry", ""
|
||||
},
|
||||
{
|
||||
&r_dither, "r_dither", "0", CVAR_ARCHIVE | CVAR_LATCH, CVART_BOOL, NULL, NULL, help_r_dither,
|
||||
"Dither", CVARCAT_GRAPHICS | CVARCAT_PERFORMANCE, "Adds noise to fight color banding artifacts", ""
|
||||
|
|
|
@ -341,16 +341,15 @@ typedef struct {
|
|||
|
||||
typedef enum {
|
||||
DFT_NONE, // disabled
|
||||
DFT_BLEND, // std alpha blend -> fade color = (R G B 0)
|
||||
DFT_ADD, // additive -> fade color = (0 0 0 A)
|
||||
DFT_MULT, // multiplicative -> fade color = (1 1 1 A)
|
||||
DFT_PMA, // pre-mult alpha -> fade color = (0 0 0 0)
|
||||
DFT_BLEND, // standard alpha blend
|
||||
DFT_ADD, // additive
|
||||
DFT_MULT, // multiplicative
|
||||
DFT_PMA, // pre-multiplied alpha
|
||||
DFT_TBD, // to be determined, i.e. fix up later
|
||||
DFT_COUNT
|
||||
} depthFadeType_t;
|
||||
|
||||
extern const float r_depthFadeScale[DFT_COUNT][4];
|
||||
extern const float r_depthFadeBias [DFT_COUNT][4];
|
||||
extern const uint8_t r_depthFadeScaleAndBias[DFT_COUNT];
|
||||
|
||||
struct pipeline_t {
|
||||
int firstStage;
|
||||
|
|
|
@ -23,24 +23,15 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|||
|
||||
// tr_shader.c -- this file deals with the parsing and definition of shaders
|
||||
|
||||
const float r_depthFadeScale[DFT_COUNT][4] =
|
||||
{
|
||||
{ 0.0f, 0.0f, 0.0f, 0.0f }, // DFT_NONE
|
||||
{ 1.0f, 1.0f, 1.0f, 0.0f }, // DFT_BLEND
|
||||
{ 0.0f, 0.0f, 0.0f, 1.0f }, // DFT_ADD
|
||||
{ 0.0f, 0.0f, 0.0f, 1.0f }, // DFT_MULT
|
||||
{ 0.0f, 0.0f, 0.0f, 0.0f }, // DFT_PMA
|
||||
{ 0.0f, 0.0f, 0.0f, 0.0f } // DFT_TBD
|
||||
};
|
||||
|
||||
const float r_depthFadeBias[DFT_COUNT][4] =
|
||||
const uint8_t r_depthFadeScaleAndBias[DFT_COUNT] =
|
||||
{
|
||||
{ 0.0f, 0.0f, 0.0f, 0.0f }, // DFT_NONE
|
||||
{ 0.0f, 0.0f, 0.0f, 0.0f }, // DFT_BLEND
|
||||
{ 0.0f, 0.0f, 0.0f, 0.0f }, // DFT_ADD
|
||||
{ 1.0f, 1.0f, 1.0f, 0.0f }, // DFT_MULT
|
||||
{ 0.0f, 0.0f, 0.0f, 0.0f }, // DFT_PMA
|
||||
{ 0.0f, 0.0f, 0.0f, 0.0f } // DFT_TBD
|
||||
0x00, // DFT_NONE R G B A R G B A
|
||||
0x07, // DFT_BLEND - scale = (1, 1, 1, 0) - bias = (0, 0, 0, 0)
|
||||
0x08, // DFT_ADD - scale = (0, 0, 0, 1) - bias = (0, 0, 0, 0)
|
||||
0x78, // DFT_MULT - scale = (0, 0, 0, 1) - bias = (1, 1, 1, 0)
|
||||
0x00, // DFT_PMA - scale = (0, 0, 0, 0) - bias = (0, 0, 0, 0)
|
||||
0x00 // DFT_TBD
|
||||
};
|
||||
|
||||
static char* s_shaderText = 0;
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
PS(1_0_23) \
|
||||
PS(1_0_25) \
|
||||
PS(1_0_40000000) \
|
||||
PS(1_0_40000022) \
|
||||
PS(1_0_40000065) \
|
||||
PS(1_0_41) \
|
||||
PS(1_0_62) \
|
||||
PS(1_0_65) \
|
||||
|
@ -16,14 +18,26 @@
|
|||
PS(1_1_23) \
|
||||
PS(1_1_25) \
|
||||
PS(1_1_40000000) \
|
||||
PS(1_1_40000022) \
|
||||
PS(1_1_40000065) \
|
||||
PS(1_1_41) \
|
||||
PS(1_1_62) \
|
||||
PS(1_1_65) \
|
||||
PS(1_1_83) \
|
||||
PS(1_2_22) \
|
||||
PS(1_2_25) \
|
||||
PS(1_2_62) \
|
||||
PS(1_2_65) \
|
||||
PS(1_3_22) \
|
||||
PS(1_3_25) \
|
||||
PS(1_3_62) \
|
||||
PS(1_3_65) \
|
||||
PS(2_0_0_13) \
|
||||
PS(2_0_0_20000000) \
|
||||
PS(2_0_0_22) \
|
||||
PS(2_0_0_23) \
|
||||
PS(2_0_0_53) \
|
||||
PS(2_0_0_62) \
|
||||
PS(2_0_0_65) \
|
||||
PS(2_0_0_78) \
|
||||
PS(2_0_0_83) \
|
||||
|
@ -33,61 +47,128 @@
|
|||
PS(2_1_0_13) \
|
||||
PS(2_1_0_20000000) \
|
||||
PS(2_1_0_22) \
|
||||
PS(2_1_0_23) \
|
||||
PS(2_1_0_53) \
|
||||
PS(2_1_0_62) \
|
||||
PS(2_1_0_65) \
|
||||
PS(2_1_0_78) \
|
||||
PS(2_1_0_83) \
|
||||
PS(2_1_13_13) \
|
||||
PS(2_1_22_22) \
|
||||
PS(2_1_40000000_13) \
|
||||
PS(2_2_13_13) \
|
||||
PS(2_2_22_22) \
|
||||
PS(2_3_13_13) \
|
||||
PS(2_3_22_22) \
|
||||
PS(3_0_0_0_22) \
|
||||
PS(3_0_0_0_65) \
|
||||
PS(3_0_0_13_22) \
|
||||
PS(3_0_0_13_61) \
|
||||
PS(3_0_0_22_13) \
|
||||
PS(3_0_0_22_22) \
|
||||
PS(3_0_0_22_62) \
|
||||
PS(3_0_0_22_65) \
|
||||
PS(3_0_0_23_23) \
|
||||
PS(3_0_0_40000065_13) \
|
||||
PS(3_0_0_56_13) \
|
||||
PS(3_0_0_62_13) \
|
||||
PS(3_0_0_64_13) \
|
||||
PS(3_0_0_65_13) \
|
||||
PS(3_0_0_65_22) \
|
||||
PS(3_0_0_65_83) \
|
||||
PS(3_0_0_83_13) \
|
||||
PS(3_0_22_22_22) \
|
||||
PS(3_1_0_0_22) \
|
||||
PS(3_1_0_0_65) \
|
||||
PS(3_1_0_13_22) \
|
||||
PS(3_1_0_13_61) \
|
||||
PS(3_1_0_22_13) \
|
||||
PS(3_1_0_22_22) \
|
||||
PS(3_1_0_22_62) \
|
||||
PS(3_1_0_22_65) \
|
||||
PS(3_1_0_23_23) \
|
||||
PS(3_1_0_40000065_13) \
|
||||
PS(3_1_0_56_13) \
|
||||
PS(3_1_0_62_13) \
|
||||
PS(3_1_0_64_13) \
|
||||
PS(3_1_0_65_13) \
|
||||
PS(3_1_0_65_22) \
|
||||
PS(3_1_0_65_83) \
|
||||
PS(3_1_0_83_13) \
|
||||
PS(3_1_22_22_22) \
|
||||
PS(3_2_22_22_22) \
|
||||
PS(3_3_22_22_22) \
|
||||
PS(4_0_0_0_40000062_83) \
|
||||
PS(4_0_0_13_22_22) \
|
||||
PS(4_0_0_13_65_65) \
|
||||
PS(4_0_0_22_22_13) \
|
||||
PS(4_0_0_22_65_13) \
|
||||
PS(4_0_0_22_65_83) \
|
||||
PS(4_0_0_51_52_13) \
|
||||
PS(4_0_0_53_22_13) \
|
||||
PS(4_0_0_56_13_22) \
|
||||
PS(4_0_0_56_56_13) \
|
||||
PS(4_0_0_62_13_22) \
|
||||
PS(4_0_0_65_13_22) \
|
||||
PS(4_0_0_65_22_62) \
|
||||
PS(4_0_0_65_65_22) \
|
||||
PS(4_0_0_65_65_83) \
|
||||
PS(4_0_22_22_22_22) \
|
||||
PS(4_0_25_25_25_25) \
|
||||
PS(4_0_40000000_65_13_13) \
|
||||
PS(4_1_0_0_40000062_83) \
|
||||
PS(4_1_0_13_22_22) \
|
||||
PS(4_1_0_13_65_65) \
|
||||
PS(4_1_0_22_22_13) \
|
||||
PS(4_1_0_22_65_13) \
|
||||
PS(4_1_0_22_65_83) \
|
||||
PS(4_1_0_51_52_13) \
|
||||
PS(4_1_0_53_22_13) \
|
||||
PS(4_1_0_56_13_22) \
|
||||
PS(4_1_0_56_56_13) \
|
||||
PS(4_1_0_62_13_22) \
|
||||
PS(4_1_0_65_13_22) \
|
||||
PS(4_1_0_65_22_62) \
|
||||
PS(4_1_0_65_65_22) \
|
||||
PS(4_1_0_65_65_83) \
|
||||
PS(4_1_22_22_22_22) \
|
||||
PS(4_1_25_25_25_25) \
|
||||
PS(4_1_40000000_65_13_13) \
|
||||
PS(4_2_25_25_25_25) \
|
||||
PS(4_3_25_25_25_25) \
|
||||
PS(5_0_0_13_22_22_22) \
|
||||
PS(5_0_0_22_22_13_22) \
|
||||
PS(5_0_0_22_22_22_40000065) \
|
||||
PS(5_0_0_22_22_65_83) \
|
||||
PS(5_0_0_22_23_65_83) \
|
||||
PS(5_0_0_22_65_83_22) \
|
||||
PS(5_0_0_65_13_22_22) \
|
||||
PS(5_0_0_65_13_22_25) \
|
||||
PS(5_0_22_22_22_22_22) \
|
||||
PS(5_1_0_13_22_22_22) \
|
||||
PS(5_1_0_22_22_13_22) \
|
||||
PS(5_1_0_22_22_22_40000065) \
|
||||
PS(5_1_0_22_22_65_83) \
|
||||
PS(5_1_22_22_22_22_22)
|
||||
PS(5_1_0_22_23_65_83) \
|
||||
PS(5_1_0_22_65_83_22) \
|
||||
PS(5_1_0_65_13_22_22) \
|
||||
PS(5_1_0_65_13_22_25) \
|
||||
PS(5_1_22_22_22_22_22) \
|
||||
PS(5_2_22_22_22_22_22) \
|
||||
PS(5_3_22_22_22_22_22) \
|
||||
PS(6_0_0_22_22_22_65_13) \
|
||||
PS(6_1_0_22_22_22_65_13)
|
||||
|
||||
struct UberPixelShaderState
|
||||
{
|
||||
int stageStates[8] = {};
|
||||
int stageCount = 0;
|
||||
int globalState = 0;
|
||||
int globalState = 0; // bit mask of UBERPS_*_BIT macros
|
||||
};
|
||||
|
||||
#define UBERPS_DITHER_BIT 1
|
||||
#define UBERPS_DEPTHFADE_BIT 2
|
||||
|
||||
static bool ParseUberPixelShaderState(UberPixelShaderState& state, const char* stateString)
|
||||
{
|
||||
const char* scanStrings[8] =
|
||||
|
|
|
@ -193,10 +193,14 @@ void CompileUberPS(const char* stateString)
|
|||
extras[extraCount++] = va("-Vn g_ps_%s", stateString);
|
||||
extras[extraCount++] = "-D USE_INCLUDES=1";
|
||||
extras[extraCount++] = "-D PIXEL_SHADER=1";
|
||||
if(state.globalState & 1)
|
||||
if(state.globalState & UBERPS_DITHER_BIT)
|
||||
{
|
||||
extras[extraCount++] = "-D DITHER=1";
|
||||
}
|
||||
if(state.globalState & UBERPS_DEPTHFADE_BIT)
|
||||
{
|
||||
extras[extraCount++] = "-D DEPTH_FADE=1";
|
||||
}
|
||||
extras[extraCount++] = va("-D STAGE_COUNT=%d", state.stageCount);
|
||||
for(int s = 0; s < state.stageCount; ++s)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue