massively improved shader compilation time

This commit is contained in:
myT 2024-12-05 23:20:51 +01:00
parent b076ff8e8a
commit 3d879533bb
4 changed files with 618 additions and 187 deletions

View file

@ -31,24 +31,10 @@ along with Challenge Quake 3. If not, see <https://www.gnu.org/licenses/>.
#include "../renderer/grp_uber_shaders.h"
char repoPath[MAX_PATH];
char outputPath[MAX_PATH];
char bin2headerPath[MAX_PATH];
char dxcPath[MAX_PATH];
const char* targetVS = "vs_6_0";
const char* targetPS = "ps_6_0";
const char* targetCS = "cs_6_0";
#define PS(Data) #Data,
const char* uberShaderPixelStates[] =
{
UBER_SHADER_PS_LIST(PS)
};
#undef PS
#define PROFILE 0
// dxc options:
// -Zi embeds debug info
// -Qembed_debug embeds debug info in shader container
// -Vn header variable name
@ -57,9 +43,37 @@ const char* uberShaderPixelStates[] =
// -Wno-warning disables the warning
#define COST_UBER_PIXEL_SHADER (2002)
#define COST_SMAA_SHADER (2001)
char repoPath[MAX_PATH];
char shaderFolderPath[MAX_PATH];
char outputPath[MAX_PATH];
char bin2headerPath[MAX_PATH];
char dxcPath[MAX_PATH];
#if PROFILE
FILE* perfFile;
#endif
const char* targetVS = "vs_6_0";
const char* targetPS = "ps_6_0";
const char* targetCS = "cs_6_0";
#define PS(Data) #Data,
const char* uberShaderPixelStates[] =
{
UBER_SHADER_PS_LIST(PS)
};
#undef PS
LONG commandIndex = -1;
LONG uberPixelShaderIndex = -1;
LONG smaaShaderIndex = -1;
const char* va(_Printf_format_string_ const char* format, ...)
{
static char string[64][32000];
static char string[64][1 << 16];
static int index = 0;
char* buf = string[index++ & 63];
va_list argptr;
@ -71,15 +85,352 @@ const char* va(_Printf_format_string_ const char* format, ...)
return buf;
}
struct ShaderArgs
const char* Canonicalize(const char* path)
{
const char* headerPath;
const char* shaderPath;
const char* entryPoint;
const char* targetProfile;
static char canonPath[MAX_PATH];
PathCanonicalizeA(canonPath, path);
return canonPath;
}
void WaitForFiles(const char* searchPath, int expectedCount)
{
const DWORD startTimeMS = timeGetTime();
for(;;)
{
WIN32_FIND_DATAA fd;
const HANDLE search = FindFirstFileA(searchPath, &fd);
if(search == INVALID_HANDLE_VALUE)
{
continue;
}
int count = 0;
bool nonEmpty = true;
do
{
if(fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
{
continue;
}
else
{
count++;
if(fd.nFileSizeLow == 0 && fd.nFileSizeHigh == 0)
{
nonEmpty = false;
}
}
}
while(FindNextFileA(search, &fd) != 0);
FindClose(search);
if(count == expectedCount && nonEmpty)
{
#if PROFILE
printf("The wait for %d '%s' files took %d ms\n", expectedCount, searchPath, timeGetTime() - startTimeMS);
#endif
return;
}
const LONG elapsedMS = timeGetTime() - startTimeMS;
if(elapsedMS > 2000)
{
fprintf(stderr, "ERROR: The wait for the %d '%s' files timed out\n", expectedCount, searchPath);
exit(__LINE__);
}
Sleep(50);
}
}
struct CommandType
{
enum Id
{
Generic,
Shader,
UberPixelShader,
SMAAShader,
Count
};
};
struct Command
{
const char* command;
CommandType::Id type;
int length;
int cost;
};
template<int BufferCapacity, int CommandCapacity>
struct CommandData
{
char* Allocate(int byteCount)
{
if(allocated + byteCount > BufferCapacity)
{
assert(!"Out of memory");
fprintf(stderr, "ERROR: Failed to allocate command data");
exit(__LINE__);
}
char* const ptr = buffer + allocated;
allocated += byteCount;
return ptr;
}
void AddCommand(CommandType::Id type, const char* originalCommand, int cost)
{
if(commandCount + 1 > CommandCapacity)
{
assert(!"Out of memory");
fprintf(stderr, "ERROR: Failed to allocate a command");
exit(__LINE__);
}
const int length = strlen(originalCommand);
const int lengthNT = length + 1;
char* const newCommand = Allocate(lengthNT);
memcpy(newCommand, originalCommand, lengthNT);
Command& command = commands[commandCount++];
command.command = newCommand;
command.type = type;
command.length = length;
command.cost = cost;
}
void Clear()
{
allocated = 0;
commandCount = 0;
}
int GetMaxCommandLength()
{
int l = 0;
for(int i = 0; i < commandCount; i++)
{
l = max(l, commandLengths[i]);
}
return l;
}
static int __cdecl CommandComparison(const void* aPtr, const void* bPtr)
{
const Command* const a = (const Command*)aPtr;
const Command* const b = (const Command*)bPtr;
return a->cost - b->cost;
}
void SortByAscendingCost()
{
qsort(commands, commandCount, sizeof(Command), &CommandComparison);
}
char buffer[BufferCapacity];
Command commands[CommandCapacity];
int allocated = 0;
int commandCount = 0;
};
CommandData< 1 << 20, 1024> commands;
CommandData<64 << 10, 64> groupCommands;
bool commandGroupActive = false;
int compiledShaderCount = 0;
int maxThreadCount = 4; // very conservative default
void BeginCommandGroup()
{
assert(!commandGroupActive);
commandGroupActive = true;
groupCommands.Clear();
}
void EndCommandGroup(int cost = 1)
{
assert(commandGroupActive);
commandGroupActive = false;
if(groupCommands.commandCount == 1)
{
const Command& command = groupCommands.commands[0];
commands.AddCommand(command.type, command.command, cost);
groupCommands.Clear();
return;
}
const char* const separator = " && ";
const int separatorLength = strlen(separator);
int totalLength = 0;
for(int i = 0; i < groupCommands.commandCount; i++)
{
totalLength += groupCommands.commands[i].length;
}
totalLength += (groupCommands.commandCount - 1) * separatorLength;
totalLength += 1;
char* const newCommand = commands.Allocate(totalLength);
char* d = newCommand;
for(int i = 0; i < groupCommands.commandCount; i++)
{
const Command& command = groupCommands.commands[i];
const int l = command.length;
memcpy(d, command.command, l);
d += l;
if(i < groupCommands.commandCount - 1)
{
memcpy(d, separator, separatorLength);
d += separatorLength;
}
}
*d = '\0';
const Command& command = groupCommands.commands[0];
commands.AddCommand(command.type, newCommand, cost);
groupCommands.Clear();
}
void AddCommand(CommandType::Id type, const char* cmd, int cost = 1)
{
if(commandGroupActive)
{
groupCommands.AddCommand(type, cmd, cost);
}
else
{
commands.AddCommand(type, cmd, cost);
}
}
void ExecuteCommand(int index)
{
#if PROFILE
LARGE_INTEGER startTime;
QueryPerformanceCounter(&startTime);
#endif
const Command& command = commands.commands[index];
const int result = system(command.command);
if(result != 0)
{
printf("FAILED: %s\nError code: %d, errno = %d, length = %d\n",
command.command, result, errno, command.length);
}
#if PROFILE
else
{
LARGE_INTEGER endTime, freq;
QueryPerformanceCounter(&endTime);
QueryPerformanceFrequency(&freq);
const int durationMS = (int)((1000 * (endTime.QuadPart - startTime.QuadPart)) / freq.QuadPart);
fprintf(perfFile, "%4d ms: %s\n", durationMS, command.command);
}
#endif
if(command.type == CommandType::UberPixelShader && result == 0)
{
const LONG oldIndex = InterlockedExchangeAdd(&uberPixelShaderIndex, -1);
if(oldIndex == 1)
{
const char* const dirPath = Canonicalize(va("%s\\code\\renderer\\compshaders\\grp", repoPath));
const char* const cmd = va(
"type %s\\uber_shader_ps_*.h > %s\\complete_uber_ps.h && "
"del %s\\uber_shader_ps_*.h",
dirPath, dirPath, dirPath);
WaitForFiles(va("%s\\uber_shader_ps_*.h", dirPath), _countof(uberShaderPixelStates));
system(cmd);
}
}
else if(command.type == CommandType::SMAAShader && result == 0)
{
const LONG oldIndex = InterlockedExchangeAdd(&smaaShaderIndex, -1);
if(oldIndex == 1)
{
const char* const dirPath = Canonicalize(va("%s\\code\\renderer\\compshaders\\grp", repoPath));
const char* const cmd = va(
"type %s\\smaa*.h > %s\\complete_smaa.h && "
"del %s\\smaa*.h",
dirPath, dirPath, dirPath);
WaitForFiles(va("%s\\smaa*.h", dirPath), 24);
system(cmd);
}
}
}
DWORD WINAPI CommandThread(_In_ LPVOID)
{
for(;;)
{
const LONG index = InterlockedExchangeAdd(&commandIndex, -1);
if(index < 0)
{
break;
}
ExecuteCommand(index);
}
return 0;
}
void ExecuteCommands(int threadCount)
{
#if PROFILE
threadCount = 1;
#else
if(threadCount < 1)
{
threadCount = maxThreadCount;
}
#endif
assert(commands.commandCount > 0);
HANDLE threads[256];
assert(threadCount <= _countof(threads));
commands.SortByAscendingCost();
commandIndex = commands.commandCount - 1;
uberPixelShaderIndex = _countof(uberShaderPixelStates) - 1;
smaaShaderIndex = (4 * 3 * 2) - 1; // 4 variants, 3 passes, (1 VS + 1 PS)
if(threadCount == 1)
{
for(int i = 0; i < commands.commandCount; i++)
{
ExecuteCommand(i);
}
}
else
{
for(int t = 0; t < threadCount; t++)
{
threads[t] = CreateThread(NULL, 0, &CommandThread, NULL, 0, NULL);
}
WaitForMultipleObjects(threadCount, threads, TRUE, INFINITE);
}
for(int i = 0; i < commands.commandCount; i++)
{
if(commands.commands[i].type != CommandType::Generic)
{
compiledShaderCount++;
}
}
commands.Clear();
groupCommands.Clear();
}
const char* OutputPath(const char* path)
{
return va("%s\\%s", outputPath, path);
@ -91,9 +442,17 @@ const char* HeaderVariable(const char* name)
return va("\"static %s\"", name);
}
void CompileShader(const ShaderArgs& args, int extraCount = 0, const char** extras = NULL)
struct ShaderArgs
{
static char temp[4096];
const char* headerPath;
const char* shaderPath;
const char* entryPoint;
const char* targetProfile;
};
void CompileShader(int cost, const ShaderArgs& args, int extraCount, const char** extras)
{
static char cmd[4096];
const char* headerPath = va("%s\\%s", outputPath, args.headerPath);
@ -101,20 +460,30 @@ void CompileShader(const ShaderArgs& args, int extraCount = 0, const char** extr
// -Gis: Force IEEE strictness
// -Zi: Embed debug info
// -Qembed_debug: Embed debug info in shader container
strcpy(temp, va("%s -HV 2021 -Fh %s -E %s -T %s -WX -Ges -Gis -Zi -Qembed_debug",
strcpy(cmd, va("%s -HV 2021 -Fh %s -E %s -T %s -WX -Ges -Gis -Zi -Qembed_debug",
dxcPath, headerPath, args.entryPoint, args.targetProfile));
for(int i = 0; i < extraCount; ++i)
{
strcat(temp, " ");
strcat(temp, extras[i]);
strcat(cmd, " ");
strcat(cmd, extras[i]);
}
strcat(temp, " ");
strcat(temp, args.shaderPath);
strcat(cmd, " ");
strcat(cmd, shaderFolderPath);
strcat(cmd, "\\");
strcat(cmd, args.shaderPath);
printf("%s\n", temp);
system(temp);
CommandType::Id type = CommandType::Shader;
if(cost == COST_UBER_PIXEL_SHADER)
{
type = CommandType::UberPixelShader;
}
else if(cost == COST_SMAA_SHADER)
{
type = CommandType::SMAAShader;
}
AddCommand(type, cmd, cost);
}
struct SMAAArgs
@ -142,7 +511,7 @@ void CompileSMAAShader(const SMAAArgs& smaaArgs)
args.headerPath = smaaArgs.headerPath;
args.shaderPath = smaaArgs.shaderPath;
args.targetProfile = smaaArgs.vertexShader ? targetVS : targetPS;
CompileShader(args, _countof(extras), extras);
CompileShader(COST_SMAA_SHADER, args, _countof(extras), extras);
}
void ProcessSMAAShadersForPreset(const char* presetName, const char* presetMacro)
@ -170,11 +539,11 @@ void CompileSMAAShaders()
ProcessSMAAShadersForPreset("ultra", "-D SMAA_PRESET_ULTRA=1");
}
void CompileGraphics(const char* headerPath, const char* shaderPath, const char* varName,
void CompileGraphics(int cost, const char* headerPath, const char* shaderPath, const char* varName,
int vsOptionCount = 0, int psOptionCount = 0, ...)
{
const char* vsHeaderRelPath = va("%s.vs.h", shaderPath);
const char* psHeaderRelPath = va("%s.ps.h", shaderPath);
const char* vsHeaderRelPath = va("%s_%s.vs.h", shaderPath, varName);
const char* psHeaderRelPath = va("%s_%s.ps.h", shaderPath, varName);
const char* vsHeaderPath = OutputPath(vsHeaderRelPath);
const char* psHeaderPath = OutputPath(psHeaderRelPath);
@ -206,23 +575,27 @@ void CompileGraphics(const char* headerPath, const char* shaderPath, const char*
}
va_end(argPtr);
BeginCommandGroup();
ShaderArgs args;
args.entryPoint = "vs";
args.headerPath = vsHeaderRelPath;
args.shaderPath = shaderPath;
args.targetProfile = targetVS;
CompileShader(args, vsExtraCount, vsExtras);
CompileShader(1, args, vsExtraCount, vsExtras);
args.entryPoint = "ps";
args.headerPath = psHeaderRelPath;
args.shaderPath = shaderPath;
args.targetProfile = targetPS;
CompileShader(args, psExtraCount, psExtras);
CompileShader(1, args, psExtraCount, psExtras);
const char* outHeaderPath = OutputPath(headerPath);
system(va("type %s %s > %s", vsHeaderPath, psHeaderPath, outHeaderPath));
system(va("del %s", vsHeaderPath));
system(va("del %s", psHeaderPath));
AddCommand(CommandType::Generic, va("type %s %s > %s", vsHeaderPath, psHeaderPath, outHeaderPath));
AddCommand(CommandType::Generic, va("del %s", vsHeaderPath));
AddCommand(CommandType::Generic, va("del %s", psHeaderPath));
EndCommandGroup(cost);
}
void CompileVertexShader(const char* headerPath, const char* shaderPath, const char* varName)
@ -238,10 +611,10 @@ void CompileVertexShader(const char* headerPath, const char* shaderPath, const c
args.headerPath = headerPath;
args.shaderPath = shaderPath;
args.targetProfile = targetVS;
CompileShader(args, _countof(extras), extras);
CompileShader(1, args, _countof(extras), extras);
}
void CompilePixelShader(const char* headerPath, const char* shaderPath, const char* varName, int psOptionCount = 0, ...)
void CompilePixelShader(int cost, const char* headerPath, const char* shaderPath, const char* varName, int psOptionCount = 0, ...)
{
int psExtraCount = 4;
const char* psExtras[64] =
@ -265,10 +638,10 @@ void CompilePixelShader(const char* headerPath, const char* shaderPath, const ch
args.headerPath = headerPath;
args.shaderPath = shaderPath;
args.targetProfile = targetPS;
CompileShader(args, psExtraCount, psExtras);
CompileShader(cost, args, psExtraCount, psExtras);
}
void CompileCompute(const char* headerPath, const char* shaderPath, const char* varName, int csOptionCount = 0, ...)
void CompileCompute(int cost, const char* headerPath, const char* shaderPath, const char* varName, int csOptionCount = 0, ...)
{
int csExtraCount = 4;
const char* csExtras[64] =
@ -292,7 +665,7 @@ void CompileCompute(const char* headerPath, const char* shaderPath, const char*
args.headerPath = headerPath;
args.shaderPath = shaderPath;
args.targetProfile = targetCS;
CompileShader(args, csExtraCount, csExtras);
CompileShader(cost, args, csExtraCount, csExtras);
}
void CompileUberVS(const char* headerPath, const char* shaderPath, int stageCount)
@ -309,7 +682,7 @@ void CompileUberVS(const char* headerPath, const char* shaderPath, int stageCoun
args.headerPath = headerPath;
args.shaderPath = shaderPath;
args.targetProfile = targetVS;
CompileShader(args, _countof(extras), extras);
CompileShader(1, args, _countof(extras), extras);
}
void CompileUberPS(const char* stateString)
@ -317,8 +690,8 @@ void CompileUberPS(const char* stateString)
UberPixelShaderState state;
if(!ParseUberPixelShaderState(state, stateString))
{
fprintf(stderr, "ParseUberPixelShaderState failed!\n");
exit(666);
fprintf(stderr, "ERROR: ParseUberPixelShaderState failed!\n");
exit(__LINE__);
}
const char* extras[16];
@ -345,174 +718,201 @@ void CompileUberPS(const char* stateString)
args.headerPath = va("uber_shader_ps_%s.h", stateString);
args.shaderPath = "uber_shader.hlsl";
args.targetProfile = targetPS;
CompileShader(args, extraCount, extras);
}
const char* Canonicalize(const char* path)
{
static char canonPath[MAX_PATH];
PathCanonicalizeA(canonPath, path);
return canonPath;
CompileShader(COST_UBER_PIXEL_SHADER, args, extraCount, extras);
}
void InitDirectory(const char* dirName)
{
const char* rendererPath = va("%s\\code\\renderer", repoPath);
const char* cd = Canonicalize(va("%s\\shaders\\%s", rendererPath, dirName));
SetCurrentDirectoryA(cd);
const char* out = Canonicalize(va("%s\\compshaders\\%s", rendererPath, dirName));
strcpy(shaderFolderPath, dirName);
const char* const rendererPath = va("%s\\code\\renderer", repoPath);
const char* const out = Canonicalize(va("%s\\compshaders\\%s", rendererPath, dirName));
strcpy(outputPath, out);
CreateDirectoryA(outputPath, NULL);
system(va("del %s\\*.h", outputPath));
system(va("del %s\\*.temp", outputPath));
system(va("del %s\\*.h && del %s\\*.temp", outputPath, outputPath));
}
void ProcessGRP()
struct Timer
{
explicit Timer(const char* name_)
{
startTimeMS = timeGetTime();
name = name_;
}
~Timer()
{
const DWORD durationMS = timeGetTime() - startTimeMS;
printf("\n%s shader build: %.1f seconds\n", name, (float)durationMS / 1000.0f);
}
const char* name;
DWORD startTimeMS;
};
void ProcessBuildGRP()
{
InitDirectory("grp");
targetVS = "vs_6_0";
targetPS = "ps_6_0";
targetCS = "cs_6_0";
CompileGraphics("post_gamma.h", "post_gamma.hlsl", "post");
CompileGraphics("post_inverse_gamma.h", "post_inverse_gamma.hlsl", "post_inverse");
CompileGraphics("imgui.h", "imgui.hlsl", "imgui");
CompileGraphics("nuklear.h", "nuklear.hlsl", "nuklear");
CompileGraphics("ui.h", "ui.hlsl", "ui");
CompileGraphics("depth_pre_pass.h", "depth_pre_pass.hlsl", "zpp");
CompileGraphics("dynamic_light.h", "dynamic_light.hlsl", "dl");
CompileGraphics("blit.h", "blit.hlsl", "blit");
CompileVertexShader("fog.h", "fog_inside.hlsl", "fog");
CompilePixelShader("fog_inside.h", "fog_inside.hlsl", "fog_inside");
CompilePixelShader("fog_outside.h", "fog_outside.hlsl", "fog_outside");
CompileCompute("mip_1.h", "mip_1.hlsl", "mip_1");
CompileCompute("mip_2.h", "mip_2.hlsl", "mip_2");
CompileCompute("mip_3.h", "mip_3.hlsl", "mip_3");
CompileGraphics("im3d_points.h", "im3d.hlsl", "im3d_points", 1, 1, "-D POINTS=1", "-D POINTS=1");
CompileGraphics("im3d_lines.h", "im3d.hlsl", "im3d_lines", 1, 1, "-D LINES=1", "-D LINES=1");
CompileGraphics("im3d_triangles.h", "im3d.hlsl", "im3d_triangles", 1, 1, "-D TRIANGLES=1", "-D TRIANGLES=1");
CompilePixelShader(1, "fog_inside.h", "fog_inside.hlsl", "fog_inside");
CompilePixelShader(1, "fog_outside.h", "fog_outside.hlsl", "fog_outside");
CompileCompute(1, "mip_1.h", "mip_1.hlsl", "mip_1");
CompileCompute(1, "mip_2.h", "mip_2.hlsl", "mip_2");
CompileCompute(1, "mip_3.h", "mip_3.hlsl", "mip_3");
CompileSMAAShaders();
system(va("type %s\\smaa*.h > %s\\complete_smaa.h", outputPath, outputPath));
// type combines all files into one
system(va("type ..\\common\\state_bits.h.hlsli ..\\common\\blend.hlsli shared.hlsli uber_shader.hlsl > %s\\uber_shader.temp", outputPath));
system(va("%s --output %s\\uber_shader.h --hname uber_shader_string %s\\uber_shader.temp", bin2headerPath, outputPath, outputPath));
system(va("del %s\\uber_shader.temp", outputPath));
for(int i = 0; i < 8; ++i)
{
CompileUberVS(va("uber_shader_vs_%i.h", i + 1), "uber_shader.hlsl", i + 1);
}
system(va("type %s\\uber_shader_vs_*.h > %s\\complete_uber_vs.h", outputPath, outputPath));
system(va("del %s\\uber_shader_vs_*.h", outputPath));
for(int i = 0; i < _countof(uberShaderPixelStates); ++i)
{
CompileUberPS(uberShaderPixelStates[i]);
}
system(va("type %s\\uber_shader_ps_*.h > %s\\complete_uber_ps.h", outputPath, outputPath));
system(va("del %s\\uber_shader_ps_*.h", outputPath));
CompileGraphics(1, "post_gamma.h", "post_gamma.hlsl", "post");
CompileGraphics(1, "post_inverse_gamma.h", "post_inverse_gamma.hlsl", "post_inverse");
CompileGraphics(1, "imgui.h", "imgui.hlsl", "imgui");
CompileGraphics(1, "nuklear.h", "nuklear.hlsl", "nuklear");
CompileGraphics(1, "ui.h", "ui.hlsl", "ui");
CompileGraphics(1, "depth_pre_pass.h", "depth_pre_pass.hlsl", "zpp");
CompileGraphics(1, "dynamic_light.h", "dynamic_light.hlsl", "dl");
CompileGraphics(1, "blit.h", "blit.hlsl", "blit");
CompileGraphics(1, "im3d_points.h", "im3d.hlsl", "im3d_points", 1, 1, "-D POINTS=1", "-D POINTS=1");
CompileGraphics(1, "im3d_lines.h", "im3d.hlsl", "im3d_lines", 1, 1, "-D LINES=1", "-D LINES=1");
CompileGraphics(1, "im3d_triangles.h", "im3d.hlsl", "im3d_triangles", 1, 1, "-D TRIANGLES=1", "-D TRIANGLES=1");
}
void ProcessCRP()
void ProcessBuildCRP()
{
InitDirectory("crp");
targetVS = "vs_6_6";
targetPS = "ps_6_6";
targetCS = "cs_6_6";
CompileVertexShader("fullscreen.h", "fullscreen.hlsl", "fullscreen");
CompilePixelShader("blit.h", "blit.hlsl", "blit");
CompileGraphics("ui.h", "ui.hlsl", "ui");
CompileGraphics("imgui.h", "imgui.hlsl", "imgui");
CompileGraphics("nuklear.h", "nuklear.hlsl", "nuklear");
CompileCompute("mip_1.h", "mip_1.hlsl", "mip_1");
CompileCompute("mip_2.h", "mip_2.hlsl", "mip_2");
CompileCompute("mip_3.h", "mip_3.hlsl", "mip_3");
CompileGraphics("prepass.h", "prepass.hlsl", "prepass");
CompileGraphics("prepass_bary.h", "prepass.hlsl", "prepass_bary", 1, 1, "-D BARYCENTRICS=1", "-D BARYCENTRICS=1");
CompileGraphics("opaque.h", "opaque.hlsl", "opaque");
CompileGraphics("transp_draw.h", "transp_draw.hlsl", "transp_draw");
CompilePixelShader("transp_resolve.h", "transp_resolve.hlsl", "transp_resolve");
CompilePixelShader("transp_resolve_vol.h", "transp_resolve.hlsl", "transp_resolve_vol", 1, "-D VOLUMETRIC_LIGHT=1");
CompilePixelShader("tone_map.h", "tone_map.hlsl", "tone_map");
CompilePixelShader("tone_map_inverse.h", "tone_map_inverse.hlsl", "tone_map_inverse");
CompilePixelShader("accumdof_accum.h", "accumdof_accum.hlsl", "accum");
CompilePixelShader("accumdof_norm.h", "accumdof_norm.hlsl", "norm");
CompilePixelShader("accumdof_debug.h", "accumdof_debug.hlsl", "debug");
CompileCompute("gatherdof_split.h", "gatherdof_split.hlsl", "split");
CompileCompute("gatherdof_coc_tile_gen.h", "gatherdof_coc_tile_gen.hlsl", "coc_tile_gen");
CompileCompute("gatherdof_coc_tile_max.h", "gatherdof_coc_tile_max.hlsl", "coc_tile_max");
CompileCompute("gatherdof_blur.h", "gatherdof_blur.hlsl", "blur");
CompileCompute("gatherdof_fill.h", "gatherdof_fill.hlsl", "fill");
CompilePixelShader("gatherdof_combine.h", "gatherdof_combine.hlsl", "combine");
CompilePixelShader("gatherdof_debug.h", "gatherdof_debug.hlsl", "debug");
CompilePixelShader("magnifier.h", "magnifier.hlsl", "magnifier");
CompilePixelShader("dl_draw.h", "dl_draw.hlsl", "dl_draw");
CompilePixelShader("dl_denoising.h", "dl_denoising.hlsl", "dl_denoising");
CompileGraphics("add_light.h", "add_light.hlsl", "add_light");
CompilePixelShader("gbufferviz_depth.h", "gbufferviz_depth.hlsl", "gbufferviz_depth");
CompilePixelShader("gbufferviz_normal.h", "gbufferviz_normal.hlsl", "gbufferviz_normal");
CompilePixelShader("gbufferviz_position.h", "gbufferviz_position.hlsl", "gbufferviz_position");
CompilePixelShader("gbufferviz_motion.h", "gbufferviz_motion.hlsl", "gbufferviz_motion");
CompileGraphics("wireframe_normals.h", "wireframe_normals.hlsl", "wireframe_normals");
CompilePixelShader("skybox_motion.h", "skybox_motion.hlsl", "skybox_motion");
CompileCompute("mblur_tile_gen.h", "mblur_tile_gen.hlsl", "tile_gen");
CompileCompute("mblur_tile_max.h", "mblur_tile_max.hlsl", "tile_max");
CompilePixelShader("mblur_blur.h", "mblur_blur.hlsl", "blur");
CompilePixelShader("mblur_pack.h", "mblur_pack.hlsl", "pack");
CompilePixelShader("sun_overlay.h", "sun_overlay.hlsl", "sun_overlay");
CompilePixelShader("sun_visibility.h", "sun_visibility.hlsl", "sun_visibility");
CompilePixelShader("sun_blur.h", "sun_blur.hlsl", "sun_blur");
const char* vlComputeShaders[] =
struct Shader
{
"vl_extinction_injection_fog",
"vl_extinction_injection_nanovdb",
//"vl_extinction_injection_particles",
"vl_frustum_anisotropy_average",
"vl_frustum_depth_test",
"vl_frustum_injection_fog",
"vl_frustum_injection_nanovdb",
"vl_frustum_injection_particles",
"vl_frustum_inscatter_ambient",
"vl_frustum_inscatter_point_light",
"vl_frustum_inscatter_sunlight",
"vl_frustum_raymarch",
"vl_frustum_sunlight_visibility",
"vl_particles_clear",
"vl_particles_hit",
"vl_particles_list",
"vl_particles_tiles",
"vl_shadow_point_light",
"vl_shadow_sun"
const char* fileName;
int cost;
};
CompileVertexShader("fullscreen.h", "fullscreen.hlsl", "fullscreen");
CompilePixelShader(1, "blit.h", "blit.hlsl", "blit");
CompileGraphics(1, "ui.h", "ui.hlsl", "ui");
CompileGraphics(1, "imgui.h", "imgui.hlsl", "imgui");
CompileGraphics(1, "nuklear.h", "nuklear.hlsl", "nuklear");
CompileCompute(1, "mip_1.h", "mip_1.hlsl", "mip_1");
CompileCompute(1, "mip_2.h", "mip_2.hlsl", "mip_2");
CompileCompute(1, "mip_3.h", "mip_3.hlsl", "mip_3");
CompileGraphics(161, "prepass.h", "prepass.hlsl", "prepass");
CompileGraphics(176, "prepass_bary.h", "prepass.hlsl", "prepass_bary", 1, 1, "-D BARYCENTRICS=1", "-D BARYCENTRICS=1");
CompileGraphics(143, "opaque.h", "opaque.hlsl", "opaque");
CompileGraphics(154, "transp_draw.h", "transp_draw.hlsl", "transp_draw");
CompilePixelShader(96, "transp_resolve.h", "transp_resolve.hlsl", "transp_resolve");
CompilePixelShader(122, "transp_resolve_vol.h", "transp_resolve.hlsl", "transp_resolve_vol", 1, "-D VOLUMETRIC_LIGHT=1");
CompilePixelShader(1, "tone_map.h", "tone_map.hlsl", "tone_map");
CompilePixelShader(1, "tone_map_inverse.h", "tone_map_inverse.hlsl", "tone_map_inverse");
CompilePixelShader(1, "accumdof_accum.h", "accumdof_accum.hlsl", "accum");
CompilePixelShader(1, "accumdof_norm.h", "accumdof_norm.hlsl", "norm");
CompilePixelShader(1, "accumdof_debug.h", "accumdof_debug.hlsl", "debug");
CompileCompute(1, "gatherdof_split.h", "gatherdof_split.hlsl", "split");
CompileCompute(1, "gatherdof_coc_tile_gen.h", "gatherdof_coc_tile_gen.hlsl", "coc_tile_gen");
CompileCompute(1, "gatherdof_coc_tile_max.h", "gatherdof_coc_tile_max.hlsl", "coc_tile_max");
CompileCompute(1, "gatherdof_blur.h", "gatherdof_blur.hlsl", "blur");
CompileCompute(1, "gatherdof_fill.h", "gatherdof_fill.hlsl", "fill");
CompilePixelShader(1, "gatherdof_combine.h", "gatherdof_combine.hlsl", "combine");
CompilePixelShader(1, "gatherdof_debug.h", "gatherdof_debug.hlsl", "debug");
CompilePixelShader(1, "magnifier.h", "magnifier.hlsl", "magnifier");
CompilePixelShader(1, "dl_draw.h", "dl_draw.hlsl", "dl_draw");
CompilePixelShader(1, "dl_denoising.h", "dl_denoising.hlsl", "dl_denoising");
CompileGraphics(130, "add_light.h", "add_light.hlsl", "add_light");
CompilePixelShader(1, "gbufferviz_depth.h", "gbufferviz_depth.hlsl", "gbufferviz_depth");
CompilePixelShader(1, "gbufferviz_normal.h", "gbufferviz_normal.hlsl", "gbufferviz_normal");
CompilePixelShader(1, "gbufferviz_position.h", "gbufferviz_position.hlsl", "gbufferviz_position");
CompilePixelShader(1, "gbufferviz_motion.h", "gbufferviz_motion.hlsl", "gbufferviz_motion");
CompileGraphics(120, "wireframe_normals.h", "wireframe_normals.hlsl", "wireframe_normals");
CompilePixelShader(1, "skybox_motion.h", "skybox_motion.hlsl", "skybox_motion");
CompileCompute(1, "mblur_tile_gen.h", "mblur_tile_gen.hlsl", "tile_gen");
CompileCompute(1, "mblur_tile_max.h", "mblur_tile_max.hlsl", "tile_max");
CompilePixelShader(100, "mblur_blur.h", "mblur_blur.hlsl", "blur");
CompilePixelShader(1, "mblur_pack.h", "mblur_pack.hlsl", "pack");
CompilePixelShader(1, "sun_overlay.h", "sun_overlay.hlsl", "sun_overlay");
CompilePixelShader(1, "sun_visibility.h", "sun_visibility.hlsl", "sun_visibility");
CompilePixelShader(100, "sun_blur.h", "sun_blur.hlsl", "sun_blur");
CompileCompute(1, "vl_frustum_temporal_float4.h", "vl_frustum_temporal.hlsl", "vl_frustum_temporal_float4", 1, "-D TYPE_FLOAT4=1");
CompileCompute(1, "vl_frustum_temporal_float.h", "vl_frustum_temporal.hlsl", "vl_frustum_temporal_float", 1, "-D TYPE_FLOAT=1");
CompileCompute(668, "vl_frustum_injection_nanovdb_lq.h", "vl_frustum_injection_nanovdb.hlsl", "vl_frustum_injection_nanovdb_lq", 1, "-D PREVIEW_MODE=1");
CompileCompute(1, "vl_frustum_light_propagation_nx.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_nx", 1, "-D DIRECTION_NX=1");
CompileCompute(1, "vl_frustum_light_propagation_ny.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_ny", 1, "-D DIRECTION_NY=1");
CompileCompute(1, "vl_frustum_light_propagation_px.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_px", 1, "-D DIRECTION_PX=1");
CompileCompute(1, "vl_frustum_light_propagation_py.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_py", 1, "-D DIRECTION_PY=1");
CompileGraphics(169, "vl_debug_ambient.h", "vl_debug_ambient.hlsl", "vl_debug_ambient");
CompileGraphics(140, "vl_debug_extinction.h", "vl_debug_extinction.hlsl", "vl_debug_extinction");
CompileGraphics(140, "vl_debug_shadow_sun.h", "vl_debug_shadow_sun.hlsl", "vl_debug_shadow_sun");
CompileCompute(1, "depth_pyramid.h", "depth_pyramid.hlsl", "depth_pyramid");
CompileCompute(1, "particles_clear.h", "particles_clear.hlsl", "particles_clear");
CompileCompute(1, "particles_setup.h", "particles_setup.hlsl", "particles_setup");
CompileCompute(1, "particles_emit.h", "particles_emit.hlsl", "particles_emit");
CompileCompute(1, "particles_simulate.h", "particles_simulate.hlsl", "particles_simulate");
CompileGraphics(130, "im3d_points.h", "im3d.hlsl", "im3d_points", 1, 1, "-D POINTS=1", "-D POINTS=1");
CompileGraphics(130, "im3d_lines.h", "im3d.hlsl", "im3d_lines", 1, 1, "-D LINES=1", "-D LINES=1");
CompileGraphics(130, "im3d_triangles.h", "im3d.hlsl", "im3d_triangles", 1, 1, "-D TRIANGLES=1", "-D TRIANGLES=1");
#define Expensive(Name, Cost) { Name, Cost }
#define Cheap(Name) { Name, 1 }
const Shader vlComputeShaders[] =
{
Cheap("vl_extinction_injection_fog"),
Expensive("vl_extinction_injection_nanovdb", 465),
//Cheap("vl_extinction_injection_particles"),
Cheap("vl_frustum_anisotropy_average"),
Cheap("vl_frustum_depth_test"),
Cheap("vl_frustum_injection_fog"),
Expensive("vl_frustum_injection_nanovdb", 2254),
Expensive("vl_frustum_injection_particles", 172),
Cheap("vl_frustum_inscatter_ambient"),
Cheap("vl_frustum_inscatter_point_light"),
Expensive("vl_frustum_inscatter_sunlight", 113),
Cheap("vl_frustum_raymarch"),
Cheap("vl_frustum_sunlight_visibility"),
Cheap("vl_particles_clear"),
Cheap("vl_particles_hit"),
Cheap("vl_particles_list"),
Cheap("vl_particles_tiles"),
Cheap("vl_shadow_point_light"),
Cheap("vl_shadow_sun")
};
#undef Expensive
#undef Cheap
for(int i = 0; i < _countof(vlComputeShaders); i++)
{
const char* const s = vlComputeShaders[i];
CompileCompute(va("%s.h", s), va("%s.hlsl", s), s);
const char* const s = vlComputeShaders[i].fileName;
const int cost = vlComputeShaders[i].cost;
CompileCompute(cost, va("%s.h", s), va("%s.hlsl", s), s);
}
CompileCompute("vl_frustum_temporal_float4.h", "vl_frustum_temporal.hlsl", "vl_frustum_temporal_float4", 1, "-D TYPE_FLOAT4=1");
CompileCompute("vl_frustum_temporal_float.h", "vl_frustum_temporal.hlsl", "vl_frustum_temporal_float", 1, "-D TYPE_FLOAT=1");
CompileCompute("vl_frustum_injection_nanovdb_lq.h", "vl_frustum_injection_nanovdb.hlsl", "vl_frustum_injection_nanovdb_lq", 1, "-D PREVIEW_MODE=1");
CompileCompute("vl_frustum_light_propagation_nx.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_nx", 1, "-D DIRECTION_NX=1");
CompileCompute("vl_frustum_light_propagation_ny.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_ny", 1, "-D DIRECTION_NY=1");
CompileCompute("vl_frustum_light_propagation_px.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_px", 1, "-D DIRECTION_PX=1");
CompileCompute("vl_frustum_light_propagation_py.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_py", 1, "-D DIRECTION_PY=1");
CompileGraphics("vl_debug_ambient.h", "vl_debug_ambient.hlsl", "vl_debug_ambient");
CompileGraphics("vl_debug_extinction.h", "vl_debug_extinction.hlsl", "vl_debug_extinction");
CompileGraphics("vl_debug_shadow_sun.h", "vl_debug_shadow_sun.hlsl", "vl_debug_shadow_sun");
CompileCompute("depth_pyramid.h", "depth_pyramid.hlsl", "depth_pyramid");
CompileCompute("particles_clear.h", "particles_clear.hlsl", "particles_clear");
CompileCompute("particles_setup.h", "particles_setup.hlsl", "particles_setup");
CompileCompute("particles_emit.h", "particles_emit.hlsl", "particles_emit");
CompileCompute("particles_simulate.h", "particles_simulate.hlsl", "particles_simulate");
CompileGraphics("im3d_points.h", "im3d.hlsl", "im3d_points", 1, 1, "-D POINTS=1", "-D POINTS=1");
CompileGraphics("im3d_lines.h", "im3d.hlsl", "im3d_lines", 1, 1, "-D LINES=1", "-D LINES=1");
CompileGraphics("im3d_triangles.h", "im3d.hlsl", "im3d_triangles", 1, 1, "-D TRIANGLES=1", "-D TRIANGLES=1");
}
void ProcessCleanupGRP()
{
const char* const dirName = "grp";
strcpy(shaderFolderPath, dirName);
const char* const rendererPath = va("%s\\code\\renderer", repoPath);
const char* const out = Canonicalize(va("%s\\compshaders\\%s", rendererPath, dirName));
strcpy(outputPath, out);
const char* const cd = va("%s\\shaders\\%s", rendererPath, dirName);
SetCurrentDirectoryA(cd);
BeginCommandGroup();
AddCommand(CommandType::Generic, va("type ..\\common\\state_bits.h.hlsli ..\\common\\blend.hlsli shared.hlsli uber_shader.hlsl > %s\\uber_shader.temp", outputPath));
AddCommand(CommandType::Generic, va("%s --output %s\\uber_shader.h --hname uber_shader_string %s\\uber_shader.temp", bin2headerPath, outputPath, outputPath));
AddCommand(CommandType::Generic, va("del %s\\uber_shader.temp", outputPath));
EndCommandGroup();
BeginCommandGroup();
AddCommand(CommandType::Generic, va("type %s\\uber_shader_vs_*.h > %s\\complete_uber_vs.h", outputPath, outputPath));
AddCommand(CommandType::Generic, va("del %s\\uber_shader_vs_*.h", outputPath));
EndCommandGroup();
}
int main(int /*argc*/, const char** argv)
@ -531,7 +931,7 @@ int main(int /*argc*/, const char** argv)
strcpy(repoPath, Canonicalize(va("%s\\..\\..", dirPath)));
strcpy(bin2headerPath, Canonicalize(va("%s\\tools\\bin2header.exe", repoPath)));
char* path = getenv("DXCPATH");
char* const path = getenv("DXCPATH");
if(path != NULL)
{
strcpy(dxcPath, path);
@ -542,8 +942,39 @@ int main(int /*argc*/, const char** argv)
}
system(va("%s --version", dxcPath));
ProcessGRP();
ProcessCRP();
SYSTEM_INFO systeminfo;
GetSystemInfo(&systeminfo);
maxThreadCount = min((int)systeminfo.dwNumberOfProcessors / 2, 16);
const char* const rendererPath = va("%s\\code\\renderer", repoPath);
const char* const shaderPath = Canonicalize(va("%s\\shaders", rendererPath));
if(SetCurrentDirectoryA(shaderPath) == FALSE)
{
fprintf(stderr, "ERROR: Failed to set current directory to '%s'\n", shaderPath);
exit(__LINE__);
}
const char* const compShaderPath = Canonicalize(va("%s\\compshaders", rendererPath));
CreateDirectoryA(compShaderPath, NULL);
#if PROFILE
perfFile = fopen(va("%s\\shader_build.txt", repoPath), "w");
#endif
Timer timer("Total");
ProcessBuildGRP();
ProcessBuildCRP();
ExecuteCommands(-1); // use max. thread count
ProcessCleanupGRP();
ExecuteCommands(1);
#if PROFILE
fclose(perfFile);
#endif
printf("\n");
printf("Thread count: %d\n", maxThreadCount);
printf("Compiled shader count: %d\n", compiledShaderCount);
return 0;
}

View file

@ -643,7 +643,7 @@ solution "cnq3"
AddSourcesAndHeaders("shadercomp")
postbuildcommands { "$(TargetPath)" }
ApplyProjectSettings(true)
links { "Shlwapi" }
links { "Shlwapi", "Winmm" }
project "renderer"

View file

@ -72,7 +72,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>Shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Shlwapi.lib;Winmm.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\.build\debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalOptions> %(AdditionalOptions)</AdditionalOptions>
</Link>
@ -106,7 +106,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>Shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Shlwapi.lib;Winmm.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\.build\release;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalOptions> %(AdditionalOptions)</AdditionalOptions>
</Link>

View file

@ -73,7 +73,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>Shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Shlwapi.lib;Winmm.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\.build\debug;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalOptions> %(AdditionalOptions)</AdditionalOptions>
</Link>
@ -108,7 +108,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>Shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>Shlwapi.lib;Winmm.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\.build\release;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalOptions> %(AdditionalOptions)</AdditionalOptions>
</Link>