From 3d879533bb20ba5af9c485dc08f279ab55a982aa Mon Sep 17 00:00:00 2001 From: myT <> Date: Thu, 5 Dec 2024 23:20:51 +0100 Subject: [PATCH] massively improved shader compilation time --- code/shadercomp/shadercomp.cpp | 795 +++++++++++++++----- makefiles/premake5.lua | 2 +- makefiles/windows_vs2019/shadercomp.vcxproj | 4 +- makefiles/windows_vs2022/shadercomp.vcxproj | 4 +- 4 files changed, 618 insertions(+), 187 deletions(-) diff --git a/code/shadercomp/shadercomp.cpp b/code/shadercomp/shadercomp.cpp index b3b31bb..9a89995 100644 --- a/code/shadercomp/shadercomp.cpp +++ b/code/shadercomp/shadercomp.cpp @@ -31,24 +31,10 @@ along with Challenge Quake 3. If not, see . #include "../renderer/grp_uber_shaders.h" -char repoPath[MAX_PATH]; -char outputPath[MAX_PATH]; -char bin2headerPath[MAX_PATH]; -char dxcPath[MAX_PATH]; - -const char* targetVS = "vs_6_0"; -const char* targetPS = "ps_6_0"; -const char* targetCS = "cs_6_0"; - - -#define PS(Data) #Data, -const char* uberShaderPixelStates[] = -{ - UBER_SHADER_PS_LIST(PS) -}; -#undef PS +#define PROFILE 0 +// dxc options: // -Zi embeds debug info // -Qembed_debug embeds debug info in shader container // -Vn header variable name @@ -57,9 +43,37 @@ const char* uberShaderPixelStates[] = // -Wno-warning disables the warning +#define COST_UBER_PIXEL_SHADER (2002) +#define COST_SMAA_SHADER (2001) + +char repoPath[MAX_PATH]; +char shaderFolderPath[MAX_PATH]; +char outputPath[MAX_PATH]; +char bin2headerPath[MAX_PATH]; +char dxcPath[MAX_PATH]; + +#if PROFILE +FILE* perfFile; +#endif + +const char* targetVS = "vs_6_0"; +const char* targetPS = "ps_6_0"; +const char* targetCS = "cs_6_0"; + +#define PS(Data) #Data, +const char* uberShaderPixelStates[] = +{ + UBER_SHADER_PS_LIST(PS) +}; +#undef PS + +LONG commandIndex = -1; +LONG uberPixelShaderIndex = -1; +LONG smaaShaderIndex = -1; + const char* va(_Printf_format_string_ const char* format, ...) { - static char string[64][32000]; + static char string[64][1 << 16]; static int index = 0; char* buf = string[index++ & 63]; va_list argptr; @@ -71,15 +85,352 @@ const char* va(_Printf_format_string_ const char* format, ...) return buf; } - -struct ShaderArgs +const char* Canonicalize(const char* path) { - const char* headerPath; - const char* shaderPath; - const char* entryPoint; - const char* targetProfile; + static char canonPath[MAX_PATH]; + + PathCanonicalizeA(canonPath, path); + + return canonPath; +} + +void WaitForFiles(const char* searchPath, int expectedCount) +{ + const DWORD startTimeMS = timeGetTime(); + + for(;;) + { + WIN32_FIND_DATAA fd; + const HANDLE search = FindFirstFileA(searchPath, &fd); + if(search == INVALID_HANDLE_VALUE) + { + continue; + } + + int count = 0; + bool nonEmpty = true; + do + { + if(fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + { + continue; + } + else + { + count++; + if(fd.nFileSizeLow == 0 && fd.nFileSizeHigh == 0) + { + nonEmpty = false; + } + } + } + while(FindNextFileA(search, &fd) != 0); + + FindClose(search); + if(count == expectedCount && nonEmpty) + { +#if PROFILE + printf("The wait for %d '%s' files took %d ms\n", expectedCount, searchPath, timeGetTime() - startTimeMS); +#endif + return; + } + + const LONG elapsedMS = timeGetTime() - startTimeMS; + if(elapsedMS > 2000) + { + fprintf(stderr, "ERROR: The wait for the %d '%s' files timed out\n", expectedCount, searchPath); + exit(__LINE__); + } + + Sleep(50); + } +} + +struct CommandType +{ + enum Id + { + Generic, + Shader, + UberPixelShader, + SMAAShader, + Count + }; }; +struct Command +{ + const char* command; + CommandType::Id type; + int length; + int cost; +}; + +template +struct CommandData +{ + char* Allocate(int byteCount) + { + if(allocated + byteCount > BufferCapacity) + { + assert(!"Out of memory"); + fprintf(stderr, "ERROR: Failed to allocate command data"); + exit(__LINE__); + } + + char* const ptr = buffer + allocated; + allocated += byteCount; + + return ptr; + } + + void AddCommand(CommandType::Id type, const char* originalCommand, int cost) + { + if(commandCount + 1 > CommandCapacity) + { + assert(!"Out of memory"); + fprintf(stderr, "ERROR: Failed to allocate a command"); + exit(__LINE__); + } + + const int length = strlen(originalCommand); + const int lengthNT = length + 1; + char* const newCommand = Allocate(lengthNT); + memcpy(newCommand, originalCommand, lengthNT); + + Command& command = commands[commandCount++]; + command.command = newCommand; + command.type = type; + command.length = length; + command.cost = cost; + } + + void Clear() + { + allocated = 0; + commandCount = 0; + } + + int GetMaxCommandLength() + { + int l = 0; + for(int i = 0; i < commandCount; i++) + { + l = max(l, commandLengths[i]); + } + + return l; + } + + static int __cdecl CommandComparison(const void* aPtr, const void* bPtr) + { + const Command* const a = (const Command*)aPtr; + const Command* const b = (const Command*)bPtr; + + return a->cost - b->cost; + } + + void SortByAscendingCost() + { + qsort(commands, commandCount, sizeof(Command), &CommandComparison); + } + + char buffer[BufferCapacity]; + Command commands[CommandCapacity]; + int allocated = 0; + int commandCount = 0; +}; + +CommandData< 1 << 20, 1024> commands; +CommandData<64 << 10, 64> groupCommands; +bool commandGroupActive = false; +int compiledShaderCount = 0; +int maxThreadCount = 4; // very conservative default + +void BeginCommandGroup() +{ + assert(!commandGroupActive); + commandGroupActive = true; + groupCommands.Clear(); +} + +void EndCommandGroup(int cost = 1) +{ + assert(commandGroupActive); + commandGroupActive = false; + + if(groupCommands.commandCount == 1) + { + const Command& command = groupCommands.commands[0]; + commands.AddCommand(command.type, command.command, cost); + groupCommands.Clear(); + return; + } + + const char* const separator = " && "; + const int separatorLength = strlen(separator); + + int totalLength = 0; + for(int i = 0; i < groupCommands.commandCount; i++) + { + totalLength += groupCommands.commands[i].length; + } + totalLength += (groupCommands.commandCount - 1) * separatorLength; + totalLength += 1; + + char* const newCommand = commands.Allocate(totalLength); + char* d = newCommand; + for(int i = 0; i < groupCommands.commandCount; i++) + { + const Command& command = groupCommands.commands[i]; + const int l = command.length; + memcpy(d, command.command, l); + d += l; + if(i < groupCommands.commandCount - 1) + { + memcpy(d, separator, separatorLength); + d += separatorLength; + } + } + *d = '\0'; + + const Command& command = groupCommands.commands[0]; + commands.AddCommand(command.type, newCommand, cost); + groupCommands.Clear(); +} + +void AddCommand(CommandType::Id type, const char* cmd, int cost = 1) +{ + if(commandGroupActive) + { + groupCommands.AddCommand(type, cmd, cost); + } + else + { + commands.AddCommand(type, cmd, cost); + } +} + +void ExecuteCommand(int index) +{ +#if PROFILE + LARGE_INTEGER startTime; + QueryPerformanceCounter(&startTime); +#endif + const Command& command = commands.commands[index]; + const int result = system(command.command); + if(result != 0) + { + printf("FAILED: %s\nError code: %d, errno = %d, length = %d\n", + command.command, result, errno, command.length); + } +#if PROFILE + else + { + LARGE_INTEGER endTime, freq; + QueryPerformanceCounter(&endTime); + QueryPerformanceFrequency(&freq); + const int durationMS = (int)((1000 * (endTime.QuadPart - startTime.QuadPart)) / freq.QuadPart); + fprintf(perfFile, "%4d ms: %s\n", durationMS, command.command); + } +#endif + + if(command.type == CommandType::UberPixelShader && result == 0) + { + const LONG oldIndex = InterlockedExchangeAdd(&uberPixelShaderIndex, -1); + if(oldIndex == 1) + { + const char* const dirPath = Canonicalize(va("%s\\code\\renderer\\compshaders\\grp", repoPath)); + const char* const cmd = va( + "type %s\\uber_shader_ps_*.h > %s\\complete_uber_ps.h && " + "del %s\\uber_shader_ps_*.h", + dirPath, dirPath, dirPath); + WaitForFiles(va("%s\\uber_shader_ps_*.h", dirPath), _countof(uberShaderPixelStates)); + system(cmd); + } + } + else if(command.type == CommandType::SMAAShader && result == 0) + { + const LONG oldIndex = InterlockedExchangeAdd(&smaaShaderIndex, -1); + if(oldIndex == 1) + { + const char* const dirPath = Canonicalize(va("%s\\code\\renderer\\compshaders\\grp", repoPath)); + const char* const cmd = va( + "type %s\\smaa*.h > %s\\complete_smaa.h && " + "del %s\\smaa*.h", + dirPath, dirPath, dirPath); + WaitForFiles(va("%s\\smaa*.h", dirPath), 24); + system(cmd); + } + } +} + +DWORD WINAPI CommandThread(_In_ LPVOID) +{ + for(;;) + { + const LONG index = InterlockedExchangeAdd(&commandIndex, -1); + if(index < 0) + { + break; + } + + ExecuteCommand(index); + } + + return 0; +} + +void ExecuteCommands(int threadCount) +{ +#if PROFILE + threadCount = 1; +#else + if(threadCount < 1) + { + threadCount = maxThreadCount; + } +#endif + + assert(commands.commandCount > 0); + + HANDLE threads[256]; + assert(threadCount <= _countof(threads)); + + commands.SortByAscendingCost(); + + commandIndex = commands.commandCount - 1; + uberPixelShaderIndex = _countof(uberShaderPixelStates) - 1; + smaaShaderIndex = (4 * 3 * 2) - 1; // 4 variants, 3 passes, (1 VS + 1 PS) + + if(threadCount == 1) + { + for(int i = 0; i < commands.commandCount; i++) + { + ExecuteCommand(i); + } + } + else + { + for(int t = 0; t < threadCount; t++) + { + threads[t] = CreateThread(NULL, 0, &CommandThread, NULL, 0, NULL); + } + WaitForMultipleObjects(threadCount, threads, TRUE, INFINITE); + } + + for(int i = 0; i < commands.commandCount; i++) + { + if(commands.commands[i].type != CommandType::Generic) + { + compiledShaderCount++; + } + } + + commands.Clear(); + groupCommands.Clear(); +} + const char* OutputPath(const char* path) { return va("%s\\%s", outputPath, path); @@ -91,9 +442,17 @@ const char* HeaderVariable(const char* name) return va("\"static %s\"", name); } -void CompileShader(const ShaderArgs& args, int extraCount = 0, const char** extras = NULL) +struct ShaderArgs { - static char temp[4096]; + const char* headerPath; + const char* shaderPath; + const char* entryPoint; + const char* targetProfile; +}; + +void CompileShader(int cost, const ShaderArgs& args, int extraCount, const char** extras) +{ + static char cmd[4096]; const char* headerPath = va("%s\\%s", outputPath, args.headerPath); @@ -101,20 +460,30 @@ void CompileShader(const ShaderArgs& args, int extraCount = 0, const char** extr // -Gis: Force IEEE strictness // -Zi: Embed debug info // -Qembed_debug: Embed debug info in shader container - strcpy(temp, va("%s -HV 2021 -Fh %s -E %s -T %s -WX -Ges -Gis -Zi -Qembed_debug", + strcpy(cmd, va("%s -HV 2021 -Fh %s -E %s -T %s -WX -Ges -Gis -Zi -Qembed_debug", dxcPath, headerPath, args.entryPoint, args.targetProfile)); for(int i = 0; i < extraCount; ++i) { - strcat(temp, " "); - strcat(temp, extras[i]); + strcat(cmd, " "); + strcat(cmd, extras[i]); } - strcat(temp, " "); - strcat(temp, args.shaderPath); + strcat(cmd, " "); + strcat(cmd, shaderFolderPath); + strcat(cmd, "\\"); + strcat(cmd, args.shaderPath); - printf("%s\n", temp); - system(temp); + CommandType::Id type = CommandType::Shader; + if(cost == COST_UBER_PIXEL_SHADER) + { + type = CommandType::UberPixelShader; + } + else if(cost == COST_SMAA_SHADER) + { + type = CommandType::SMAAShader; + } + AddCommand(type, cmd, cost); } struct SMAAArgs @@ -142,7 +511,7 @@ void CompileSMAAShader(const SMAAArgs& smaaArgs) args.headerPath = smaaArgs.headerPath; args.shaderPath = smaaArgs.shaderPath; args.targetProfile = smaaArgs.vertexShader ? targetVS : targetPS; - CompileShader(args, _countof(extras), extras); + CompileShader(COST_SMAA_SHADER, args, _countof(extras), extras); } void ProcessSMAAShadersForPreset(const char* presetName, const char* presetMacro) @@ -170,11 +539,11 @@ void CompileSMAAShaders() ProcessSMAAShadersForPreset("ultra", "-D SMAA_PRESET_ULTRA=1"); } -void CompileGraphics(const char* headerPath, const char* shaderPath, const char* varName, +void CompileGraphics(int cost, const char* headerPath, const char* shaderPath, const char* varName, int vsOptionCount = 0, int psOptionCount = 0, ...) { - const char* vsHeaderRelPath = va("%s.vs.h", shaderPath); - const char* psHeaderRelPath = va("%s.ps.h", shaderPath); + const char* vsHeaderRelPath = va("%s_%s.vs.h", shaderPath, varName); + const char* psHeaderRelPath = va("%s_%s.ps.h", shaderPath, varName); const char* vsHeaderPath = OutputPath(vsHeaderRelPath); const char* psHeaderPath = OutputPath(psHeaderRelPath); @@ -206,23 +575,27 @@ void CompileGraphics(const char* headerPath, const char* shaderPath, const char* } va_end(argPtr); + BeginCommandGroup(); + ShaderArgs args; args.entryPoint = "vs"; args.headerPath = vsHeaderRelPath; args.shaderPath = shaderPath; args.targetProfile = targetVS; - CompileShader(args, vsExtraCount, vsExtras); + CompileShader(1, args, vsExtraCount, vsExtras); args.entryPoint = "ps"; args.headerPath = psHeaderRelPath; args.shaderPath = shaderPath; args.targetProfile = targetPS; - CompileShader(args, psExtraCount, psExtras); + CompileShader(1, args, psExtraCount, psExtras); const char* outHeaderPath = OutputPath(headerPath); - system(va("type %s %s > %s", vsHeaderPath, psHeaderPath, outHeaderPath)); - system(va("del %s", vsHeaderPath)); - system(va("del %s", psHeaderPath)); + AddCommand(CommandType::Generic, va("type %s %s > %s", vsHeaderPath, psHeaderPath, outHeaderPath)); + AddCommand(CommandType::Generic, va("del %s", vsHeaderPath)); + AddCommand(CommandType::Generic, va("del %s", psHeaderPath)); + + EndCommandGroup(cost); } void CompileVertexShader(const char* headerPath, const char* shaderPath, const char* varName) @@ -238,10 +611,10 @@ void CompileVertexShader(const char* headerPath, const char* shaderPath, const c args.headerPath = headerPath; args.shaderPath = shaderPath; args.targetProfile = targetVS; - CompileShader(args, _countof(extras), extras); + CompileShader(1, args, _countof(extras), extras); } -void CompilePixelShader(const char* headerPath, const char* shaderPath, const char* varName, int psOptionCount = 0, ...) +void CompilePixelShader(int cost, const char* headerPath, const char* shaderPath, const char* varName, int psOptionCount = 0, ...) { int psExtraCount = 4; const char* psExtras[64] = @@ -265,10 +638,10 @@ void CompilePixelShader(const char* headerPath, const char* shaderPath, const ch args.headerPath = headerPath; args.shaderPath = shaderPath; args.targetProfile = targetPS; - CompileShader(args, psExtraCount, psExtras); + CompileShader(cost, args, psExtraCount, psExtras); } -void CompileCompute(const char* headerPath, const char* shaderPath, const char* varName, int csOptionCount = 0, ...) +void CompileCompute(int cost, const char* headerPath, const char* shaderPath, const char* varName, int csOptionCount = 0, ...) { int csExtraCount = 4; const char* csExtras[64] = @@ -292,7 +665,7 @@ void CompileCompute(const char* headerPath, const char* shaderPath, const char* args.headerPath = headerPath; args.shaderPath = shaderPath; args.targetProfile = targetCS; - CompileShader(args, csExtraCount, csExtras); + CompileShader(cost, args, csExtraCount, csExtras); } void CompileUberVS(const char* headerPath, const char* shaderPath, int stageCount) @@ -309,7 +682,7 @@ void CompileUberVS(const char* headerPath, const char* shaderPath, int stageCoun args.headerPath = headerPath; args.shaderPath = shaderPath; args.targetProfile = targetVS; - CompileShader(args, _countof(extras), extras); + CompileShader(1, args, _countof(extras), extras); } void CompileUberPS(const char* stateString) @@ -317,8 +690,8 @@ void CompileUberPS(const char* stateString) UberPixelShaderState state; if(!ParseUberPixelShaderState(state, stateString)) { - fprintf(stderr, "ParseUberPixelShaderState failed!\n"); - exit(666); + fprintf(stderr, "ERROR: ParseUberPixelShaderState failed!\n"); + exit(__LINE__); } const char* extras[16]; @@ -345,174 +718,201 @@ void CompileUberPS(const char* stateString) args.headerPath = va("uber_shader_ps_%s.h", stateString); args.shaderPath = "uber_shader.hlsl"; args.targetProfile = targetPS; - CompileShader(args, extraCount, extras); -} - -const char* Canonicalize(const char* path) -{ - static char canonPath[MAX_PATH]; - - PathCanonicalizeA(canonPath, path); - - return canonPath; + CompileShader(COST_UBER_PIXEL_SHADER, args, extraCount, extras); } void InitDirectory(const char* dirName) { - const char* rendererPath = va("%s\\code\\renderer", repoPath); - const char* cd = Canonicalize(va("%s\\shaders\\%s", rendererPath, dirName)); - SetCurrentDirectoryA(cd); - const char* out = Canonicalize(va("%s\\compshaders\\%s", rendererPath, dirName)); + strcpy(shaderFolderPath, dirName); + const char* const rendererPath = va("%s\\code\\renderer", repoPath); + const char* const out = Canonicalize(va("%s\\compshaders\\%s", rendererPath, dirName)); strcpy(outputPath, out); CreateDirectoryA(outputPath, NULL); - system(va("del %s\\*.h", outputPath)); - system(va("del %s\\*.temp", outputPath)); + system(va("del %s\\*.h && del %s\\*.temp", outputPath, outputPath)); } -void ProcessGRP() +struct Timer +{ + explicit Timer(const char* name_) + { + startTimeMS = timeGetTime(); + name = name_; + } + + ~Timer() + { + const DWORD durationMS = timeGetTime() - startTimeMS; + printf("\n%s shader build: %.1f seconds\n", name, (float)durationMS / 1000.0f); + } + + const char* name; + DWORD startTimeMS; +}; + +void ProcessBuildGRP() { InitDirectory("grp"); targetVS = "vs_6_0"; targetPS = "ps_6_0"; targetCS = "cs_6_0"; - CompileGraphics("post_gamma.h", "post_gamma.hlsl", "post"); - CompileGraphics("post_inverse_gamma.h", "post_inverse_gamma.hlsl", "post_inverse"); - CompileGraphics("imgui.h", "imgui.hlsl", "imgui"); - CompileGraphics("nuklear.h", "nuklear.hlsl", "nuklear"); - CompileGraphics("ui.h", "ui.hlsl", "ui"); - CompileGraphics("depth_pre_pass.h", "depth_pre_pass.hlsl", "zpp"); - CompileGraphics("dynamic_light.h", "dynamic_light.hlsl", "dl"); - CompileGraphics("blit.h", "blit.hlsl", "blit"); CompileVertexShader("fog.h", "fog_inside.hlsl", "fog"); - CompilePixelShader("fog_inside.h", "fog_inside.hlsl", "fog_inside"); - CompilePixelShader("fog_outside.h", "fog_outside.hlsl", "fog_outside"); - CompileCompute("mip_1.h", "mip_1.hlsl", "mip_1"); - CompileCompute("mip_2.h", "mip_2.hlsl", "mip_2"); - CompileCompute("mip_3.h", "mip_3.hlsl", "mip_3"); - CompileGraphics("im3d_points.h", "im3d.hlsl", "im3d_points", 1, 1, "-D POINTS=1", "-D POINTS=1"); - CompileGraphics("im3d_lines.h", "im3d.hlsl", "im3d_lines", 1, 1, "-D LINES=1", "-D LINES=1"); - CompileGraphics("im3d_triangles.h", "im3d.hlsl", "im3d_triangles", 1, 1, "-D TRIANGLES=1", "-D TRIANGLES=1"); - + CompilePixelShader(1, "fog_inside.h", "fog_inside.hlsl", "fog_inside"); + CompilePixelShader(1, "fog_outside.h", "fog_outside.hlsl", "fog_outside"); + CompileCompute(1, "mip_1.h", "mip_1.hlsl", "mip_1"); + CompileCompute(1, "mip_2.h", "mip_2.hlsl", "mip_2"); + CompileCompute(1, "mip_3.h", "mip_3.hlsl", "mip_3"); CompileSMAAShaders(); - system(va("type %s\\smaa*.h > %s\\complete_smaa.h", outputPath, outputPath)); - - // type combines all files into one - system(va("type ..\\common\\state_bits.h.hlsli ..\\common\\blend.hlsli shared.hlsli uber_shader.hlsl > %s\\uber_shader.temp", outputPath)); - system(va("%s --output %s\\uber_shader.h --hname uber_shader_string %s\\uber_shader.temp", bin2headerPath, outputPath, outputPath)); - system(va("del %s\\uber_shader.temp", outputPath)); - for(int i = 0; i < 8; ++i) { CompileUberVS(va("uber_shader_vs_%i.h", i + 1), "uber_shader.hlsl", i + 1); } - system(va("type %s\\uber_shader_vs_*.h > %s\\complete_uber_vs.h", outputPath, outputPath)); - system(va("del %s\\uber_shader_vs_*.h", outputPath)); - for(int i = 0; i < _countof(uberShaderPixelStates); ++i) { CompileUberPS(uberShaderPixelStates[i]); } - system(va("type %s\\uber_shader_ps_*.h > %s\\complete_uber_ps.h", outputPath, outputPath)); - system(va("del %s\\uber_shader_ps_*.h", outputPath)); + CompileGraphics(1, "post_gamma.h", "post_gamma.hlsl", "post"); + CompileGraphics(1, "post_inverse_gamma.h", "post_inverse_gamma.hlsl", "post_inverse"); + CompileGraphics(1, "imgui.h", "imgui.hlsl", "imgui"); + CompileGraphics(1, "nuklear.h", "nuklear.hlsl", "nuklear"); + CompileGraphics(1, "ui.h", "ui.hlsl", "ui"); + CompileGraphics(1, "depth_pre_pass.h", "depth_pre_pass.hlsl", "zpp"); + CompileGraphics(1, "dynamic_light.h", "dynamic_light.hlsl", "dl"); + CompileGraphics(1, "blit.h", "blit.hlsl", "blit"); + CompileGraphics(1, "im3d_points.h", "im3d.hlsl", "im3d_points", 1, 1, "-D POINTS=1", "-D POINTS=1"); + CompileGraphics(1, "im3d_lines.h", "im3d.hlsl", "im3d_lines", 1, 1, "-D LINES=1", "-D LINES=1"); + CompileGraphics(1, "im3d_triangles.h", "im3d.hlsl", "im3d_triangles", 1, 1, "-D TRIANGLES=1", "-D TRIANGLES=1"); } -void ProcessCRP() +void ProcessBuildCRP() { InitDirectory("crp"); targetVS = "vs_6_6"; targetPS = "ps_6_6"; targetCS = "cs_6_6"; - CompileVertexShader("fullscreen.h", "fullscreen.hlsl", "fullscreen"); - CompilePixelShader("blit.h", "blit.hlsl", "blit"); - CompileGraphics("ui.h", "ui.hlsl", "ui"); - CompileGraphics("imgui.h", "imgui.hlsl", "imgui"); - CompileGraphics("nuklear.h", "nuklear.hlsl", "nuklear"); - CompileCompute("mip_1.h", "mip_1.hlsl", "mip_1"); - CompileCompute("mip_2.h", "mip_2.hlsl", "mip_2"); - CompileCompute("mip_3.h", "mip_3.hlsl", "mip_3"); - CompileGraphics("prepass.h", "prepass.hlsl", "prepass"); - CompileGraphics("prepass_bary.h", "prepass.hlsl", "prepass_bary", 1, 1, "-D BARYCENTRICS=1", "-D BARYCENTRICS=1"); - CompileGraphics("opaque.h", "opaque.hlsl", "opaque"); - CompileGraphics("transp_draw.h", "transp_draw.hlsl", "transp_draw"); - CompilePixelShader("transp_resolve.h", "transp_resolve.hlsl", "transp_resolve"); - CompilePixelShader("transp_resolve_vol.h", "transp_resolve.hlsl", "transp_resolve_vol", 1, "-D VOLUMETRIC_LIGHT=1"); - CompilePixelShader("tone_map.h", "tone_map.hlsl", "tone_map"); - CompilePixelShader("tone_map_inverse.h", "tone_map_inverse.hlsl", "tone_map_inverse"); - CompilePixelShader("accumdof_accum.h", "accumdof_accum.hlsl", "accum"); - CompilePixelShader("accumdof_norm.h", "accumdof_norm.hlsl", "norm"); - CompilePixelShader("accumdof_debug.h", "accumdof_debug.hlsl", "debug"); - CompileCompute("gatherdof_split.h", "gatherdof_split.hlsl", "split"); - CompileCompute("gatherdof_coc_tile_gen.h", "gatherdof_coc_tile_gen.hlsl", "coc_tile_gen"); - CompileCompute("gatherdof_coc_tile_max.h", "gatherdof_coc_tile_max.hlsl", "coc_tile_max"); - CompileCompute("gatherdof_blur.h", "gatherdof_blur.hlsl", "blur"); - CompileCompute("gatherdof_fill.h", "gatherdof_fill.hlsl", "fill"); - CompilePixelShader("gatherdof_combine.h", "gatherdof_combine.hlsl", "combine"); - CompilePixelShader("gatherdof_debug.h", "gatherdof_debug.hlsl", "debug"); - CompilePixelShader("magnifier.h", "magnifier.hlsl", "magnifier"); - CompilePixelShader("dl_draw.h", "dl_draw.hlsl", "dl_draw"); - CompilePixelShader("dl_denoising.h", "dl_denoising.hlsl", "dl_denoising"); - CompileGraphics("add_light.h", "add_light.hlsl", "add_light"); - CompilePixelShader("gbufferviz_depth.h", "gbufferviz_depth.hlsl", "gbufferviz_depth"); - CompilePixelShader("gbufferviz_normal.h", "gbufferviz_normal.hlsl", "gbufferviz_normal"); - CompilePixelShader("gbufferviz_position.h", "gbufferviz_position.hlsl", "gbufferviz_position"); - CompilePixelShader("gbufferviz_motion.h", "gbufferviz_motion.hlsl", "gbufferviz_motion"); - CompileGraphics("wireframe_normals.h", "wireframe_normals.hlsl", "wireframe_normals"); - CompilePixelShader("skybox_motion.h", "skybox_motion.hlsl", "skybox_motion"); - CompileCompute("mblur_tile_gen.h", "mblur_tile_gen.hlsl", "tile_gen"); - CompileCompute("mblur_tile_max.h", "mblur_tile_max.hlsl", "tile_max"); - CompilePixelShader("mblur_blur.h", "mblur_blur.hlsl", "blur"); - CompilePixelShader("mblur_pack.h", "mblur_pack.hlsl", "pack"); - CompilePixelShader("sun_overlay.h", "sun_overlay.hlsl", "sun_overlay"); - CompilePixelShader("sun_visibility.h", "sun_visibility.hlsl", "sun_visibility"); - CompilePixelShader("sun_blur.h", "sun_blur.hlsl", "sun_blur"); - const char* vlComputeShaders[] = + struct Shader { - "vl_extinction_injection_fog", - "vl_extinction_injection_nanovdb", - //"vl_extinction_injection_particles", - "vl_frustum_anisotropy_average", - "vl_frustum_depth_test", - "vl_frustum_injection_fog", - "vl_frustum_injection_nanovdb", - "vl_frustum_injection_particles", - "vl_frustum_inscatter_ambient", - "vl_frustum_inscatter_point_light", - "vl_frustum_inscatter_sunlight", - "vl_frustum_raymarch", - "vl_frustum_sunlight_visibility", - "vl_particles_clear", - "vl_particles_hit", - "vl_particles_list", - "vl_particles_tiles", - "vl_shadow_point_light", - "vl_shadow_sun" + const char* fileName; + int cost; }; + + CompileVertexShader("fullscreen.h", "fullscreen.hlsl", "fullscreen"); + CompilePixelShader(1, "blit.h", "blit.hlsl", "blit"); + CompileGraphics(1, "ui.h", "ui.hlsl", "ui"); + CompileGraphics(1, "imgui.h", "imgui.hlsl", "imgui"); + CompileGraphics(1, "nuklear.h", "nuklear.hlsl", "nuklear"); + CompileCompute(1, "mip_1.h", "mip_1.hlsl", "mip_1"); + CompileCompute(1, "mip_2.h", "mip_2.hlsl", "mip_2"); + CompileCompute(1, "mip_3.h", "mip_3.hlsl", "mip_3"); + CompileGraphics(161, "prepass.h", "prepass.hlsl", "prepass"); + CompileGraphics(176, "prepass_bary.h", "prepass.hlsl", "prepass_bary", 1, 1, "-D BARYCENTRICS=1", "-D BARYCENTRICS=1"); + CompileGraphics(143, "opaque.h", "opaque.hlsl", "opaque"); + CompileGraphics(154, "transp_draw.h", "transp_draw.hlsl", "transp_draw"); + CompilePixelShader(96, "transp_resolve.h", "transp_resolve.hlsl", "transp_resolve"); + CompilePixelShader(122, "transp_resolve_vol.h", "transp_resolve.hlsl", "transp_resolve_vol", 1, "-D VOLUMETRIC_LIGHT=1"); + CompilePixelShader(1, "tone_map.h", "tone_map.hlsl", "tone_map"); + CompilePixelShader(1, "tone_map_inverse.h", "tone_map_inverse.hlsl", "tone_map_inverse"); + CompilePixelShader(1, "accumdof_accum.h", "accumdof_accum.hlsl", "accum"); + CompilePixelShader(1, "accumdof_norm.h", "accumdof_norm.hlsl", "norm"); + CompilePixelShader(1, "accumdof_debug.h", "accumdof_debug.hlsl", "debug"); + CompileCompute(1, "gatherdof_split.h", "gatherdof_split.hlsl", "split"); + CompileCompute(1, "gatherdof_coc_tile_gen.h", "gatherdof_coc_tile_gen.hlsl", "coc_tile_gen"); + CompileCompute(1, "gatherdof_coc_tile_max.h", "gatherdof_coc_tile_max.hlsl", "coc_tile_max"); + CompileCompute(1, "gatherdof_blur.h", "gatherdof_blur.hlsl", "blur"); + CompileCompute(1, "gatherdof_fill.h", "gatherdof_fill.hlsl", "fill"); + CompilePixelShader(1, "gatherdof_combine.h", "gatherdof_combine.hlsl", "combine"); + CompilePixelShader(1, "gatherdof_debug.h", "gatherdof_debug.hlsl", "debug"); + CompilePixelShader(1, "magnifier.h", "magnifier.hlsl", "magnifier"); + CompilePixelShader(1, "dl_draw.h", "dl_draw.hlsl", "dl_draw"); + CompilePixelShader(1, "dl_denoising.h", "dl_denoising.hlsl", "dl_denoising"); + CompileGraphics(130, "add_light.h", "add_light.hlsl", "add_light"); + CompilePixelShader(1, "gbufferviz_depth.h", "gbufferviz_depth.hlsl", "gbufferviz_depth"); + CompilePixelShader(1, "gbufferviz_normal.h", "gbufferviz_normal.hlsl", "gbufferviz_normal"); + CompilePixelShader(1, "gbufferviz_position.h", "gbufferviz_position.hlsl", "gbufferviz_position"); + CompilePixelShader(1, "gbufferviz_motion.h", "gbufferviz_motion.hlsl", "gbufferviz_motion"); + CompileGraphics(120, "wireframe_normals.h", "wireframe_normals.hlsl", "wireframe_normals"); + CompilePixelShader(1, "skybox_motion.h", "skybox_motion.hlsl", "skybox_motion"); + CompileCompute(1, "mblur_tile_gen.h", "mblur_tile_gen.hlsl", "tile_gen"); + CompileCompute(1, "mblur_tile_max.h", "mblur_tile_max.hlsl", "tile_max"); + CompilePixelShader(100, "mblur_blur.h", "mblur_blur.hlsl", "blur"); + CompilePixelShader(1, "mblur_pack.h", "mblur_pack.hlsl", "pack"); + CompilePixelShader(1, "sun_overlay.h", "sun_overlay.hlsl", "sun_overlay"); + CompilePixelShader(1, "sun_visibility.h", "sun_visibility.hlsl", "sun_visibility"); + CompilePixelShader(100, "sun_blur.h", "sun_blur.hlsl", "sun_blur"); + CompileCompute(1, "vl_frustum_temporal_float4.h", "vl_frustum_temporal.hlsl", "vl_frustum_temporal_float4", 1, "-D TYPE_FLOAT4=1"); + CompileCompute(1, "vl_frustum_temporal_float.h", "vl_frustum_temporal.hlsl", "vl_frustum_temporal_float", 1, "-D TYPE_FLOAT=1"); + CompileCompute(668, "vl_frustum_injection_nanovdb_lq.h", "vl_frustum_injection_nanovdb.hlsl", "vl_frustum_injection_nanovdb_lq", 1, "-D PREVIEW_MODE=1"); + CompileCompute(1, "vl_frustum_light_propagation_nx.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_nx", 1, "-D DIRECTION_NX=1"); + CompileCompute(1, "vl_frustum_light_propagation_ny.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_ny", 1, "-D DIRECTION_NY=1"); + CompileCompute(1, "vl_frustum_light_propagation_px.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_px", 1, "-D DIRECTION_PX=1"); + CompileCompute(1, "vl_frustum_light_propagation_py.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_py", 1, "-D DIRECTION_PY=1"); + CompileGraphics(169, "vl_debug_ambient.h", "vl_debug_ambient.hlsl", "vl_debug_ambient"); + CompileGraphics(140, "vl_debug_extinction.h", "vl_debug_extinction.hlsl", "vl_debug_extinction"); + CompileGraphics(140, "vl_debug_shadow_sun.h", "vl_debug_shadow_sun.hlsl", "vl_debug_shadow_sun"); + CompileCompute(1, "depth_pyramid.h", "depth_pyramid.hlsl", "depth_pyramid"); + CompileCompute(1, "particles_clear.h", "particles_clear.hlsl", "particles_clear"); + CompileCompute(1, "particles_setup.h", "particles_setup.hlsl", "particles_setup"); + CompileCompute(1, "particles_emit.h", "particles_emit.hlsl", "particles_emit"); + CompileCompute(1, "particles_simulate.h", "particles_simulate.hlsl", "particles_simulate"); + CompileGraphics(130, "im3d_points.h", "im3d.hlsl", "im3d_points", 1, 1, "-D POINTS=1", "-D POINTS=1"); + CompileGraphics(130, "im3d_lines.h", "im3d.hlsl", "im3d_lines", 1, 1, "-D LINES=1", "-D LINES=1"); + CompileGraphics(130, "im3d_triangles.h", "im3d.hlsl", "im3d_triangles", 1, 1, "-D TRIANGLES=1", "-D TRIANGLES=1"); +#define Expensive(Name, Cost) { Name, Cost } +#define Cheap(Name) { Name, 1 } + const Shader vlComputeShaders[] = + { + Cheap("vl_extinction_injection_fog"), + Expensive("vl_extinction_injection_nanovdb", 465), + //Cheap("vl_extinction_injection_particles"), + Cheap("vl_frustum_anisotropy_average"), + Cheap("vl_frustum_depth_test"), + Cheap("vl_frustum_injection_fog"), + Expensive("vl_frustum_injection_nanovdb", 2254), + Expensive("vl_frustum_injection_particles", 172), + Cheap("vl_frustum_inscatter_ambient"), + Cheap("vl_frustum_inscatter_point_light"), + Expensive("vl_frustum_inscatter_sunlight", 113), + Cheap("vl_frustum_raymarch"), + Cheap("vl_frustum_sunlight_visibility"), + Cheap("vl_particles_clear"), + Cheap("vl_particles_hit"), + Cheap("vl_particles_list"), + Cheap("vl_particles_tiles"), + Cheap("vl_shadow_point_light"), + Cheap("vl_shadow_sun") + }; +#undef Expensive +#undef Cheap for(int i = 0; i < _countof(vlComputeShaders); i++) { - const char* const s = vlComputeShaders[i]; - CompileCompute(va("%s.h", s), va("%s.hlsl", s), s); + const char* const s = vlComputeShaders[i].fileName; + const int cost = vlComputeShaders[i].cost; + CompileCompute(cost, va("%s.h", s), va("%s.hlsl", s), s); } - CompileCompute("vl_frustum_temporal_float4.h", "vl_frustum_temporal.hlsl", "vl_frustum_temporal_float4", 1, "-D TYPE_FLOAT4=1"); - CompileCompute("vl_frustum_temporal_float.h", "vl_frustum_temporal.hlsl", "vl_frustum_temporal_float", 1, "-D TYPE_FLOAT=1"); - CompileCompute("vl_frustum_injection_nanovdb_lq.h", "vl_frustum_injection_nanovdb.hlsl", "vl_frustum_injection_nanovdb_lq", 1, "-D PREVIEW_MODE=1"); - CompileCompute("vl_frustum_light_propagation_nx.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_nx", 1, "-D DIRECTION_NX=1"); - CompileCompute("vl_frustum_light_propagation_ny.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_ny", 1, "-D DIRECTION_NY=1"); - CompileCompute("vl_frustum_light_propagation_px.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_px", 1, "-D DIRECTION_PX=1"); - CompileCompute("vl_frustum_light_propagation_py.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_py", 1, "-D DIRECTION_PY=1"); - CompileGraphics("vl_debug_ambient.h", "vl_debug_ambient.hlsl", "vl_debug_ambient"); - CompileGraphics("vl_debug_extinction.h", "vl_debug_extinction.hlsl", "vl_debug_extinction"); - CompileGraphics("vl_debug_shadow_sun.h", "vl_debug_shadow_sun.hlsl", "vl_debug_shadow_sun"); - CompileCompute("depth_pyramid.h", "depth_pyramid.hlsl", "depth_pyramid"); - CompileCompute("particles_clear.h", "particles_clear.hlsl", "particles_clear"); - CompileCompute("particles_setup.h", "particles_setup.hlsl", "particles_setup"); - CompileCompute("particles_emit.h", "particles_emit.hlsl", "particles_emit"); - CompileCompute("particles_simulate.h", "particles_simulate.hlsl", "particles_simulate"); - CompileGraphics("im3d_points.h", "im3d.hlsl", "im3d_points", 1, 1, "-D POINTS=1", "-D POINTS=1"); - CompileGraphics("im3d_lines.h", "im3d.hlsl", "im3d_lines", 1, 1, "-D LINES=1", "-D LINES=1"); - CompileGraphics("im3d_triangles.h", "im3d.hlsl", "im3d_triangles", 1, 1, "-D TRIANGLES=1", "-D TRIANGLES=1"); +} + +void ProcessCleanupGRP() +{ + const char* const dirName = "grp"; + strcpy(shaderFolderPath, dirName); + const char* const rendererPath = va("%s\\code\\renderer", repoPath); + const char* const out = Canonicalize(va("%s\\compshaders\\%s", rendererPath, dirName)); + strcpy(outputPath, out); + const char* const cd = va("%s\\shaders\\%s", rendererPath, dirName); + SetCurrentDirectoryA(cd); + + BeginCommandGroup(); + AddCommand(CommandType::Generic, va("type ..\\common\\state_bits.h.hlsli ..\\common\\blend.hlsli shared.hlsli uber_shader.hlsl > %s\\uber_shader.temp", outputPath)); + AddCommand(CommandType::Generic, va("%s --output %s\\uber_shader.h --hname uber_shader_string %s\\uber_shader.temp", bin2headerPath, outputPath, outputPath)); + AddCommand(CommandType::Generic, va("del %s\\uber_shader.temp", outputPath)); + EndCommandGroup(); + + BeginCommandGroup(); + AddCommand(CommandType::Generic, va("type %s\\uber_shader_vs_*.h > %s\\complete_uber_vs.h", outputPath, outputPath)); + AddCommand(CommandType::Generic, va("del %s\\uber_shader_vs_*.h", outputPath)); + EndCommandGroup(); } int main(int /*argc*/, const char** argv) @@ -531,7 +931,7 @@ int main(int /*argc*/, const char** argv) strcpy(repoPath, Canonicalize(va("%s\\..\\..", dirPath))); strcpy(bin2headerPath, Canonicalize(va("%s\\tools\\bin2header.exe", repoPath))); - char* path = getenv("DXCPATH"); + char* const path = getenv("DXCPATH"); if(path != NULL) { strcpy(dxcPath, path); @@ -542,8 +942,39 @@ int main(int /*argc*/, const char** argv) } system(va("%s --version", dxcPath)); - ProcessGRP(); - ProcessCRP(); + SYSTEM_INFO systeminfo; + GetSystemInfo(&systeminfo); + maxThreadCount = min((int)systeminfo.dwNumberOfProcessors / 2, 16); + + const char* const rendererPath = va("%s\\code\\renderer", repoPath); + const char* const shaderPath = Canonicalize(va("%s\\shaders", rendererPath)); + if(SetCurrentDirectoryA(shaderPath) == FALSE) + { + fprintf(stderr, "ERROR: Failed to set current directory to '%s'\n", shaderPath); + exit(__LINE__); + } + + const char* const compShaderPath = Canonicalize(va("%s\\compshaders", rendererPath)); + CreateDirectoryA(compShaderPath, NULL); + +#if PROFILE + perfFile = fopen(va("%s\\shader_build.txt", repoPath), "w"); +#endif + + Timer timer("Total"); + ProcessBuildGRP(); + ProcessBuildCRP(); + ExecuteCommands(-1); // use max. thread count + ProcessCleanupGRP(); + ExecuteCommands(1); + +#if PROFILE + fclose(perfFile); +#endif + + printf("\n"); + printf("Thread count: %d\n", maxThreadCount); + printf("Compiled shader count: %d\n", compiledShaderCount); return 0; } diff --git a/makefiles/premake5.lua b/makefiles/premake5.lua index 12fbe6b..23dce22 100644 --- a/makefiles/premake5.lua +++ b/makefiles/premake5.lua @@ -643,7 +643,7 @@ solution "cnq3" AddSourcesAndHeaders("shadercomp") postbuildcommands { "$(TargetPath)" } ApplyProjectSettings(true) - links { "Shlwapi" } + links { "Shlwapi", "Winmm" } project "renderer" diff --git a/makefiles/windows_vs2019/shadercomp.vcxproj b/makefiles/windows_vs2019/shadercomp.vcxproj index 6428efa..3989268 100644 --- a/makefiles/windows_vs2019/shadercomp.vcxproj +++ b/makefiles/windows_vs2019/shadercomp.vcxproj @@ -72,7 +72,7 @@ Console true - Shlwapi.lib;%(AdditionalDependencies) + Shlwapi.lib;Winmm.lib;%(AdditionalDependencies) ..\..\.build\debug;%(AdditionalLibraryDirectories) %(AdditionalOptions) @@ -106,7 +106,7 @@ true true true - Shlwapi.lib;%(AdditionalDependencies) + Shlwapi.lib;Winmm.lib;%(AdditionalDependencies) ..\..\.build\release;%(AdditionalLibraryDirectories) %(AdditionalOptions) diff --git a/makefiles/windows_vs2022/shadercomp.vcxproj b/makefiles/windows_vs2022/shadercomp.vcxproj index 3753b7c..855dda6 100644 --- a/makefiles/windows_vs2022/shadercomp.vcxproj +++ b/makefiles/windows_vs2022/shadercomp.vcxproj @@ -73,7 +73,7 @@ Console true - Shlwapi.lib;%(AdditionalDependencies) + Shlwapi.lib;Winmm.lib;%(AdditionalDependencies) ..\..\.build\debug;%(AdditionalLibraryDirectories) %(AdditionalOptions) @@ -108,7 +108,7 @@ true true true - Shlwapi.lib;%(AdditionalDependencies) + Shlwapi.lib;Winmm.lib;%(AdditionalDependencies) ..\..\.build\release;%(AdditionalLibraryDirectories) %(AdditionalOptions)