/* =========================================================================== Copyright (C) 2022-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). Challenge Quake 3 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. Challenge Quake 3 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Challenge Quake 3. If not, see . =========================================================================== */ // Direct3D 12 Rendering Hardware Interface #if defined(_DEBUG) #define D3D_DEBUG #endif #define D3D_AGILITY_SDK //#define D3D_GPU_BASED_VALIDATION //#define RHI_DEBUG_FENCE //#define RHI_ENABLE_NVAPI //#define RHI_ENABLE_NVAPI_RT_VALIDATION //#define RHI_ENABLE_AFTERMATH #if defined(D3D_DEBUG) && defined(RHI_ENABLE_AFTERMATH) #error You can't enable NVIDIA Aftermath when the Direct3D Debug Layer is active. #endif #include "rhi_local.h" #include #include "d3d12/d3d12.h" #include #if defined(D3D_DEBUG) #include #endif #include "d3d12/dxcapi.h" #include // for DwmGetCompositionTimingInfo #define D3D12MA_D3D12_HEADERS_ALREADY_INCLUDED #include "D3D12MemAlloc.h" #if defined(RHI_ENABLE_NVAPI) #include #pragma comment(lib, "nvapi64.lib") #endif #if defined(RHI_ENABLE_AFTERMATH) #include #pragma comment(lib, "GFSDK_Aftermath_Lib.x64.lib") #endif #include "../pix/pix3.h" #include "../client/cl_imgui.h" // @TODO: grab from ri.GetNextTargetTimeUS instead extern int64_t com_nextTargetTimeUS; #if defined(D3D_DEBUG) || defined(D3D_AGILITY_SDK) extern "C" { __declspec(dllexport) extern const UINT D3D12SDKVersion = D3D12_SDK_VERSION; } extern "C" { __declspec(dllexport) extern const char* D3D12SDKPath = u8".\\cnq3\\"; } #endif RHIExport rhie; RHIInfo rhiInfo; #define VENDORID_INVALID 0xDEAD #define VENDORID_NVIDIA 0x10DE #define VENDORID_INTEL 0x8086 #define VENDORID_AMD 0x1002 #define DXGI_FORMAT_LIST(X) \ X(UNKNOWN) \ X(R32G32B32A32_TYPELESS) \ X(R32G32B32A32_FLOAT) \ X(R32G32B32A32_UINT) \ X(R32G32B32A32_SINT) \ X(R32G32B32_TYPELESS) \ X(R32G32B32_FLOAT) \ X(R32G32B32_UINT) \ X(R32G32B32_SINT) \ X(R16G16B16A16_TYPELESS) \ X(R16G16B16A16_FLOAT) \ X(R16G16B16A16_UNORM) \ X(R16G16B16A16_UINT) \ X(R16G16B16A16_SNORM) \ X(R16G16B16A16_SINT) \ X(R32G32_TYPELESS) \ X(R32G32_FLOAT) \ X(R32G32_UINT) \ X(R32G32_SINT) \ X(R32G8X24_TYPELESS) \ X(D32_FLOAT_S8X24_UINT) \ X(R32_FLOAT_X8X24_TYPELESS) \ X(X32_TYPELESS_G8X24_UINT) \ X(R10G10B10A2_TYPELESS) \ X(R10G10B10A2_UNORM) \ X(R10G10B10A2_UINT) \ X(R11G11B10_FLOAT) \ X(R8G8B8A8_TYPELESS) \ X(R8G8B8A8_UNORM) \ X(R8G8B8A8_UNORM_SRGB) \ X(R8G8B8A8_UINT) \ X(R8G8B8A8_SNORM) \ X(R8G8B8A8_SINT) \ X(R16G16_TYPELESS) \ X(R16G16_FLOAT) \ X(R16G16_UNORM) \ X(R16G16_UINT) \ X(R16G16_SNORM) \ X(R16G16_SINT) \ X(R32_TYPELESS) \ X(D32_FLOAT) \ X(R32_FLOAT) \ X(R32_UINT) \ X(R32_SINT) \ X(R24G8_TYPELESS) \ X(D24_UNORM_S8_UINT) \ X(R24_UNORM_X8_TYPELESS) \ X(X24_TYPELESS_G8_UINT) \ X(R8G8_TYPELESS) \ X(R8G8_UNORM) \ X(R8G8_UINT) \ X(R8G8_SNORM) \ X(R8G8_SINT) \ X(R16_TYPELESS) \ X(R16_FLOAT) \ X(D16_UNORM) \ X(R16_UNORM) \ X(R16_UINT) \ X(R16_SNORM) \ X(R16_SINT) \ X(R8_TYPELESS) \ X(R8_UNORM) \ X(R8_UINT) \ X(R8_SNORM) \ X(R8_SINT) \ X(A8_UNORM) \ X(R1_UNORM) \ X(R9G9B9E5_SHAREDEXP) \ X(R8G8_B8G8_UNORM) \ X(G8R8_G8B8_UNORM) \ X(BC1_TYPELESS) \ X(BC1_UNORM) \ X(BC1_UNORM_SRGB) \ X(BC2_TYPELESS) \ X(BC2_UNORM) \ X(BC2_UNORM_SRGB) \ X(BC3_TYPELESS) \ X(BC3_UNORM) \ X(BC3_UNORM_SRGB) \ X(BC4_TYPELESS) \ X(BC4_UNORM) \ X(BC4_SNORM) \ X(BC5_TYPELESS) \ X(BC5_UNORM) \ X(BC5_SNORM) \ X(B5G6R5_UNORM) \ X(B5G5R5A1_UNORM) \ X(B8G8R8A8_UNORM) \ X(B8G8R8X8_UNORM) \ X(R10G10B10_XR_BIAS_A2_UNORM) \ X(B8G8R8A8_TYPELESS) \ X(B8G8R8A8_UNORM_SRGB) \ X(B8G8R8X8_TYPELESS) \ X(B8G8R8X8_UNORM_SRGB) \ X(BC6H_TYPELESS) \ X(BC6H_UF16) \ X(BC6H_SF16) \ X(BC7_TYPELESS) \ X(BC7_UNORM) \ X(BC7_UNORM_SRGB) \ X(AYUV) \ X(Y410) \ X(Y416) \ X(NV12) \ X(P010) \ X(P016) \ X(420_OPAQUE) \ X(YUY2) \ X(Y210) \ X(Y216) \ X(NV11) \ X(AI44) \ X(IA44) \ X(P8) \ X(A8P8) \ X(B4G4R4A4_UNORM) \ X(P208) \ X(V208) \ X(V408) \ X(SAMPLER_FEEDBACK_MIN_MIP_OPAQUE) \ X(SAMPLER_FEEDBACK_MIP_REGION_USED_OPAQUE) namespace RHI { // D3D_FEATURE_LEVEL_12_0 is the minimum to ensure at least Resource Binding Tier 2: // - unlimited SRVs // - 14 CBVs // - 64 UAVs // - 2048 samplers static const D3D_FEATURE_LEVEL FeatureLevel = D3D_FEATURE_LEVEL_12_0; struct ResourceType { enum Id { // @NOTE: a valid type never being 0 means we can discard 0 handles right away Invalid, Buffer, Texture, Sampler, RootSignature, DescriptorTable, Pipeline, DurationQuery, Shader, Count }; }; #define D3D_RESOURCE_LIST(R) \ R(CommandQueue, "command queue") \ R(CommandAllocator, "command allocator") \ R(PipelineState, "pipeline state") \ R(CommandList, "command list") \ R(Fence, "fence") \ R(RootSignature, "root signature") \ R(DescriptorHeap, "descriptor heap") \ R(Heap, "heap") \ R(QueryHeap, "query heap") \ R(Texture, "texture") \ R(Buffer, "buffer") \ R(Sampler, "samplers") #define R(Enum, Name) Enum, struct D3DResourceType { enum Id { D3D_RESOURCE_LIST(R) Count }; }; #undef R #define R(Enum, Name) Name, static const char* D3DResourceNames[] = { D3D_RESOURCE_LIST(R) "" }; #undef R #undef D3D_RESOURCE_LIST struct Buffer { BufferDesc desc; D3D12MA::Allocation* allocation; ID3D12Resource* buffer; D3D12_GPU_VIRTUAL_ADDRESS gpuAddress; D3D12_RESOURCE_STATES currentState; uint32_t cbvIndex; uint32_t srvIndex; uint32_t uavIndex; bool mapped; bool uploading; UINT64 uploadSrcByteOffset; UINT64 uploadDestByteOffset; UINT64 uploadByteCount; bool shortLifeTime = false; }; struct Texture { TextureDesc desc; D3D12MA::Allocation* allocation; ID3D12Resource* texture; uint32_t srvIndex; uint32_t rtvIndex; uint32_t dsvIndex; D3D12_RESOURCE_STATES currentState; struct Mip { uint32_t uavIndex; } mips[MaxTextureMips]; bool uploading; uint32_t uploadByteOffset; bool shortLifeTime = false; }; struct RootSignature { struct PerStageConstants { UINT parameterIndex; }; RootSignatureDesc desc; ID3D12RootSignature* signature; PerStageConstants constants[ShaderStage::Count]; UINT genericTableIndex; UINT samplerTableIndex; UINT genericDescCount; UINT samplerDescCount; bool shortLifeTime = false; }; struct DescriptorTable { ID3D12DescriptorHeap* genericHeap; // SRV, CBV, UAV ID3D12DescriptorHeap* samplerHeap; bool shortLifeTime = false; }; struct Pipeline { GraphicsPipelineDesc graphicsDesc; ComputePipelineDesc computeDesc; ID3D12PipelineState* pso = NULL; PipelineType::Id type = PipelineType::Graphics; bool shortLifeTime = false; }; struct Shader { IDxcBlob* blob = NULL; bool shortLifeTime = false; }; struct Sampler { SamplerDesc desc; uint32_t heapIndex = UINT32_MAX; bool shortLifeTime = true; }; struct QueryState { enum Id { Free, // ready to be (re-)used Begun, // first call done, not resolved yet Ended, // second call done, not resolved yet Count }; }; struct Fence { void Create(UINT64 value, const char* name); void Signal(ID3D12CommandQueue* queue, UINT64 value); void WaitOnCPU(UINT64 value); void WaitOnGPU(ID3D12CommandQueue* queue, UINT64 value); bool HasCompleted(UINT64 value); void Release(); ID3D12Fence* fence; HANDLE event; }; struct UploadManager { void Create(); void Release(); uint8_t* BeginBufferUpload(HBuffer buffer, uint32_t destByteOffset, uint32_t byteCount); void EndBufferUpload(HBuffer buffer); void BeginTextureUpload(MappedTexture& mappedTexture, HTexture texture); void EndTextureUpload(); void WaitToStartDrawing(ID3D12CommandQueue* commandQueue); ID3D12CommandQueue* commandQueue; ID3D12CommandAllocator* commandAllocator; ID3D12GraphicsCommandList* commandList; HBuffer uploadHBuffer; uint32_t bufferByteCount; uint32_t bufferByteOffset; uint8_t* mappedBuffer; Fence fence; UINT64 fenceValue; HTexture currentTexture; int bufferUploadCounter; bool multiBufferUpload; bool needsRewind; int batchTextureCount; int batchBufferCount; private: void WaitToStartUploading(uint32_t uploadByteCount); void EndOfBufferReached(); }; struct ReadbackManager { void Create(); void Release(); void ResizeIfNeeded(); void BeginTextureReadback(MappedTexture& mappedTexture, HTexture texture); void EndTextureReadback(); ID3D12CommandAllocator* readbackCommandAllocator; ID3D12GraphicsCommandList* readbackCommandList; HBuffer readbackBuffer; Fence readbackFence; UINT64 readbackFenceValue; uint32_t bufferByteCount; }; struct DescriptorHeap { void Create(D3D12_DESCRIPTOR_HEAP_TYPE type, uint32_t size, uint16_t* freeListItems, const char* name); void Release(); uint32_t Allocate(); void Free(uint32_t index); D3D12_CPU_DESCRIPTOR_HANDLE GetCPUHandle(uint32_t index); uint32_t CreateSRV(ID3D12Resource* resource, D3D12_SHADER_RESOURCE_VIEW_DESC& desc); uint32_t CreateUAV(ID3D12Resource* resource, D3D12_UNORDERED_ACCESS_VIEW_DESC& desc); uint32_t CreateRTV(ID3D12Resource* resource, D3D12_RENDER_TARGET_VIEW_DESC& desc); uint32_t CreateDSV(ID3D12Resource* resource, D3D12_DEPTH_STENCIL_VIEW_DESC& desc); uint32_t CreateCBV(D3D12_CONSTANT_BUFFER_VIEW_DESC& desc); uint32_t CreateSampler(D3D12_SAMPLER_DESC& desc); StaticFreeList freeList; ID3D12DescriptorHeap* heap; D3D12_CPU_DESCRIPTOR_HANDLE startAddress; UINT descriptorSize; D3D12_DESCRIPTOR_HEAP_TYPE type; }; struct DurationQuery { QueryState::Id state; }; struct FrameQueries { DurationQuery durationQueries[MaxDurationQueries]; uint32_t durationQueryCount; }; struct ResolvedQueries { uint32_t gpuMicroSeconds[MaxDurationQueries]; uint32_t durationQueryCount; }; struct PIX { typedef void(WINAPI* BeginEventOnCommandListPtr)(ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString); typedef void(WINAPI* EndEventOnCommandListPtr)(ID3D12GraphicsCommandList* commandList); typedef void(WINAPI* SetMarkerOnCommandListPtr)(ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString); BeginEventOnCommandListPtr BeginEventOnCommandList; EndEventOnCommandListPtr EndEventOnCommandList; SetMarkerOnCommandListPtr SetMarkerOnCommandList; HMODULE module; bool canBeginAndEnd; }; struct DynamicResources { struct DescriptorRange { void Init(D3D12_DESCRIPTOR_RANGE_TYPE type, uint32_t start, uint32_t count); uint32_t Allocate(bool slotAtIndex0 = false); D3D12_DESCRIPTOR_RANGE_TYPE type; uint32_t start; uint32_t count; uint32_t index; bool reservedSlotUsed; }; static const uint32_t MaxDescriptorsSRV = 65536; static const uint32_t MaxDescriptorsUAV = 65536; static const uint32_t MaxDescriptorsCBV = 65536; static const uint32_t MaxDescriptorsSamplers = 1024; static const uint32_t MaxDescriptorsGeneric = MaxDescriptorsSRV + MaxDescriptorsUAV + MaxDescriptorsCBV; ID3D12RootSignature* rootSignature; ID3D12DescriptorHeap* genericDescriptorHeap; // CPU write, GPU read ID3D12DescriptorHeap* samplerDescriptorHeap; // CPU write, GPU read ID3D12DescriptorHeap* genericCPUDescriptorHeap; // CPU read/write, for UAV clears and the like DescriptorRange srvIndex; DescriptorRange uavIndex; DescriptorRange cbvIndex; DescriptorRange samplerIndex; }; struct BufferBarrier { BufferBarrier() = default; BufferBarrier(HBuffer buffer_, ResourceStates::Flags newState_) { buffer = buffer_; newState = newState_; } HBuffer buffer = RHI_MAKE_NULL_HANDLE(); ResourceStates::Flags newState = ResourceStates::Common; }; struct TextureBarrier { TextureBarrier() = default; TextureBarrier(HTexture texture_, ResourceStates::Flags newState_) { texture = texture_; newState = newState_; } HTexture texture = RHI_MAKE_NULL_HANDLE(); ResourceStates::Flags newState = ResourceStates::Common; }; struct GPU { char name[256]; LUID uniqueId; }; struct BufferToDelete { HBuffer buffer; uint32_t beginFrameCounter; }; struct RHIPrivate { bool initialized; ID3D12Debug* debug; // can be NULL ID3D12InfoQueue* infoQueue; // can be NULL #if defined(D3D_DEBUG) IDXGIInfoQueue* dxgiInfoQueue; // can be NULL IDXGIFactory2* factory; #else IDXGIFactory1* factory; #endif IDXGIAdapter1* adapter; ID3D12Device5* device; D3D12MA::Allocator* allocator; D3D12MA::Pool* umaPool; // only non-NULL when using a cache-coherent UMA adapter ID3D12CommandQueue* mainCommandQueue; ID3D12CommandQueue* computeCommandQueue; IDXGISwapChain3* swapChain; HTexture renderTargets[FrameCount]; ID3D12CommandAllocator* mainCommandAllocators[FrameCount]; ID3D12GraphicsCommandList6* mainCommandList; ID3D12CommandAllocator* tempCommandAllocator; ID3D12GraphicsCommandList6* tempCommandList; bool tempCommandListOpen; ID3D12GraphicsCommandList6* commandList; // not owned, don't release it! uint32_t swapChainBufferCount; uint32_t renderFrameCount; HANDLE frameLatencyWaitableObject; bool frameLatencyWaitNeeded; UINT frameIndex; UINT swapChainBufferIndex; Fence mainFence; UINT64 mainFenceValues[FrameCount]; Fence tempFence; UINT64 tempFenceValue; ID3D12QueryHeap* timeStampHeaps[FrameCount]; HBuffer timeStampBuffers[FrameCount]; uint32_t frameDurationQueryIndex; HRootSignature currentRootSignature; bool isTearingSupported; bool vsync; bool frameBegun; bool baseVRSSupport; bool extendedVRSSupport; bool useDynamicResources; DynamicResources dynamicResources; ID3D12CommandSignature* indirectDispatchSignature; UINT vendorId; char umdVersionString[64]; uint16_t umdVersionSplit[4]; uint64_t umdVersion; HMODULE dxcModule; HMODULE dxilModule; IDxcUtils* dxcUtils; IDxcCompiler3* dxcCompiler; uint16_t descriptorFreeListData[MaxCPUDescriptors]; DescriptorHeap descHeapGeneric; DescriptorHeap descHeapSamplers; DescriptorHeap descHeapRTVs; DescriptorHeap descHeapDSVs; #define POOL(Type, Size) StaticPool POOL(Buffer, 128) buffers; POOL(Texture, MAX_DRAWIMAGES * 2) textures; POOL(RootSignature, 64) rootSignatures; POOL(DescriptorTable, 64) descriptorTables; POOL(Pipeline, 256) pipelines; POOL(Shader, 16) shaders; POOL(Sampler, 128) samplers; #undef POOL #define DESTROY_POOL_LIST(POOL) \ POOL(buffers, DestroyBuffer) \ POOL(textures, DestroyTexture) \ POOL(rootSignatures, DestroyRootSignature) \ POOL(descriptorTables, DestroyDescriptorTable) \ POOL(pipelines, DestroyPipeline) \ POOL(shaders, DestroyShader) \ POOL(samplers, DestroySampler) // null resources, no manual clean-up needed HTexture nullTexture; // SRV HTexture nullRWTexture; // UAV HBuffer nullBuffer; // CBV HBuffer nullRWBuffer; // UAV HSampler nullSampler; byte persStringData[64 << 10]; byte tempStringData[64 << 10]; char adapterName[256]; LinearAllocator persStringAllocator; LinearAllocator tempStringAllocator; UploadManager upload; ReadbackManager readback; StaticArray texturesToTransition; StaticArray buffersToTransition; StaticArray buffersToDelete; FrameQueries frameQueries[FrameCount]; ResolvedQueries resolvedQueries; PIX pix; int64_t beforeInputSamplingUS; int64_t beforeRenderingUS; GPU gpus[16]; uint32_t gpuCount; HBuffer raytracingScratchBuffer; HBuffer raytracingInstanceBuffer; uint32_t beginFrameCounter; D3D12_RAYTRACING_GEOMETRY_DESC* rtGeoDescs; uint32_t rtGeoDescCount; // immediate-mode barrier API TextureBarrier textureBarriers[64]; BufferBarrier bufferBarriers[64]; uint32_t textureBarrierCount; uint32_t bufferBarrierCount; ID3D12GraphicsCommandList* barrierCommandList; bool barrierOpen; // NVIDIA NVAPI #if defined(RHI_ENABLE_NVAPI) bool nvapiActive; #endif // NVIDIA Aftermath #if defined(RHI_ENABLE_AFTERMATH) struct AftermathMarker { char string[64]; }; AftermathMarker aftermathMarkers[64]; // stack: markers can be nested uint32_t aftermathMarkerDepth = 0; GFSDK_Aftermath_ContextHandle aftermathMainCommandList; GFSDK_Aftermath_ContextHandle aftermathTempCommandList; GFSDK_Aftermath_ContextHandle aftermathUploadCommandList; GFSDK_Aftermath_ContextHandle aftermathCommandList; // active command list bool aftermathActive; #endif }; static RHIPrivate rhi; #define COM_RELEASE(p) do { if(p) { p->Release(); p = NULL; } } while((void)0,0) #define COM_RELEASE_ARRAY(a) do { for(int i = 0; i < ARRAY_LEN(a); ++i) { COM_RELEASE(a[i]); } } while((void)0,0) #define D3D(Exp) Check((Exp), #Exp) #if defined(near) # undef near #endif #if defined(far) # undef far #endif #if !defined(D3DDDIERR_DEVICEREMOVED) # define D3DDDIERR_DEVICEREMOVED ((HRESULT)0x88760870L) #endif #define ASSERT_DR_ENABLED() ASSERT_OR_DIE(rhi.useDynamicResources, "RHI API requires DR on") #define ASSERT_DR_DISABLED() ASSERT_OR_DIE(!rhi.useDynamicResources, "RHI API requires DR off") static const char* GetSystemErrorString(HRESULT hr) { // FormatMessage might not always give us the string we want but that's ok, // we always print the original error code anyhow static char systemErrorStr[1024]; const DWORD written = FormatMessageA( FORMAT_MESSAGE_FROM_SYSTEM, NULL, (DWORD)hr, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), systemErrorStr, sizeof(systemErrorStr) - 1, NULL); if(written == 0) { // we have nothing valid Q_strncpyz(systemErrorStr, "???", sizeof(systemErrorStr)); } else { // remove the trailing whitespace char* s = systemErrorStr + strlen(systemErrorStr) - 1; while(s >= systemErrorStr) { if(*s == '\r' || *s == '\n' || *s == '\t' || *s == ' ') { *s-- = '\0'; } else { break; } } } return systemErrorStr; } static bool Check(HRESULT hr, const char* function) { if(SUCCEEDED(hr)) { return true; } // fatal error mode always on for now const char* const errorMessage = GetSystemErrorString(hr); if(IsDebuggerPresent()) { __debugbreak(); } ri.Error(ERR_FATAL, "'%s' failed with code 0x%08X (%s)\n", function, (unsigned int)hr, errorMessage); return false; } static const char* GetUTF8String(const WCHAR* wideStr, const char* defaultUTF8Str) { static char utf8Str[256]; const char* utf8StrPtr = defaultUTF8Str; if(WideCharToMultiByte(CP_UTF8, 0, wideStr, -1, utf8Str, sizeof(utf8Str), NULL, NULL) > 0) { utf8StrPtr = utf8Str; } return utf8StrPtr; } static const WCHAR* GetWideString(const char* utf8Str, const WCHAR* defaultWideStr) { static WCHAR wideStr[256]; const WCHAR* wideStrPtr = defaultWideStr; if(MultiByteToWideChar(CP_UTF8, 0, utf8Str, -1, wideStr, ARRAY_LEN(wideStr)) > 0) { wideStrPtr = wideStr; } return wideStrPtr; } static void SetDebugName(ID3D12DeviceChild* resource, const char* resourceName, D3DResourceType::Id resourceType) { if(resourceName == NULL || (uint32_t)resourceType >= D3DResourceType::Count) { return; } const char* const name = va("%s %s", resourceName, D3DResourceNames[resourceType]); // ID3D12Object::SetName is a Unicode wrapper for // ID3D12Object::SetPrivateData with WKPDID_D3DDebugObjectNameW // it was good enough for RenderDoc and PIX, but not Nsight //resource->SetPrivateData(WKPDID_D3DDebugObjectName, strlen(name), name); resource->SetName(GetWideString(name, L"???")); } static uint32_t GetBytesPerPixel(TextureFormat::Id format) { switch(format) { case TextureFormat::R32G32B32A32_Float: return 16; case TextureFormat::R16G16B16A16_UNorm: case TextureFormat::R16G16B16A16_Float: case TextureFormat::R32G32_Float: case TextureFormat::R32G32_UInt: return 8; case TextureFormat::R8G8B8A8_UNorm: case TextureFormat::Depth32_Float: case TextureFormat::Depth24_Stencil8: case TextureFormat::R10G10B10A2_UNorm: case TextureFormat::R32_UInt: case TextureFormat::R16G16_SNorm: case TextureFormat::R16G16_Float: return 4; case TextureFormat::R16_UInt: case TextureFormat::R16_Float: case TextureFormat::R8G8_UNorm: return 2; case TextureFormat::R8_UNorm: return 1; default: Q_assert(!"Unsupported texture format"); return 4; } } static ID3D12DescriptorHeap* CreateDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE type, UINT size, bool shaderVisible, const char* name) { if(size == 0) { return NULL; } ID3D12DescriptorHeap* heap; D3D12_DESCRIPTOR_HEAP_DESC heapDesc = { 0 }; heapDesc.Type = type; heapDesc.Flags = shaderVisible ? D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE : D3D12_DESCRIPTOR_HEAP_FLAG_NONE; heapDesc.NumDescriptors = size; heapDesc.NodeMask = 0; D3D(rhi.device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&heap))); SetDebugName(heap, name, D3DResourceType::DescriptorHeap); return heap; } static uint32_t GetReadbackTextureByteCount() { // we base the resolution on the render targets, not the swap chain images // this allows us to e.g. capture videos at 4K while displaying a 720p window D3D12_RESOURCE_DESC textureDesc = {}; textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; textureDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; textureDesc.Width = glConfig.vidWidth; textureDesc.Height = glConfig.vidHeight; textureDesc.DepthOrArraySize = 1; textureDesc.MipLevels = 1; textureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; textureDesc.SampleDesc.Count = 1; textureDesc.SampleDesc.Quality = 0; textureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; textureDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout; rhi.device->GetCopyableFootprints(&textureDesc, 0, 1, 0, &layout, NULL, NULL, NULL); const uint32_t byteCount = (uint32_t)(layout.Footprint.RowPitch * layout.Footprint.Height); return byteCount; } void Fence::Create(UINT64 value, const char* name) { D3D(rhi.device->CreateFence(value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence))); SetDebugName(fence, name, D3DResourceType::Fence); event = CreateEvent(NULL, FALSE, FALSE, NULL); if(event == NULL) { Check(HRESULT_FROM_WIN32(GetLastError()), "CreateEvent"); } } void Fence::Signal(ID3D12CommandQueue* queue, UINT64 value) { D3D(queue->Signal(fence, value)); } void Fence::WaitOnCPU(UINT64 value) { if(fence->GetCompletedValue() < value) { D3D(fence->SetEventOnCompletion(value, event)); WaitForSingleObjectEx(event, INFINITE, FALSE); } } void Fence::WaitOnGPU(ID3D12CommandQueue* queue, UINT64 value) { D3D(queue->Wait(fence, value)); } bool Fence::HasCompleted(UINT64 value) { return fence->GetCompletedValue() >= value; } void Fence::Release() { CloseHandle(event); event = NULL; COM_RELEASE(fence); } void UploadManager::Create() { BufferDesc bufferDesc("upload", 128 << 20, ResourceStates::CopyDestinationBit); bufferDesc.memoryUsage = MemoryUsage::Upload; uploadHBuffer = CreateBuffer(bufferDesc); bufferByteCount = bufferDesc.byteCount; bufferByteOffset = 0; mappedBuffer = MapBuffer(uploadHBuffer); D3D12_COMMAND_QUEUE_DESC queueDesc = { 0 }; queueDesc.Type = D3D12_COMMAND_LIST_TYPE_COPY; queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; queueDesc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL; queueDesc.NodeMask = 0; D3D(rhi.device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&commandQueue))); SetDebugName(commandQueue, "upload", D3DResourceType::CommandQueue); D3D(rhi.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COPY, IID_PPV_ARGS(&commandAllocator))); SetDebugName(commandAllocator, "upload", D3DResourceType::CommandAllocator); D3D(rhi.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COPY, commandAllocator, NULL, IID_PPV_ARGS(&commandList))); SetDebugName(commandList, "upload", D3DResourceType::CommandList); #if defined(RHI_ENABLE_AFTERMATH) if(rhi.aftermathActive) { // @NOTE: Aftermath context creation must happen on an opened command list Q_assert(commandList != NULL); const GFSDK_Aftermath_Result result = GFSDK_Aftermath_DX12_CreateContextHandle(commandList, &rhi.aftermathUploadCommandList); Q_assert(result == GFSDK_Aftermath_Result_Success); } #endif D3D(commandList->Close()); fence.Create(0, "upload"); fenceValue = 0; currentTexture = RHI_MAKE_NULL_HANDLE(); bufferUploadCounter = 0; multiBufferUpload = false; needsRewind = false; batchTextureCount = 0; batchBufferCount = 0; } void UploadManager::Release() { UnmapBuffer(uploadHBuffer); fence.Release(); COM_RELEASE(commandQueue); COM_RELEASE(commandList); COM_RELEASE(commandAllocator); } uint8_t* UploadManager::BeginBufferUpload(HBuffer userHBuffer, uint32_t destByteOffset, uint32_t byteCount) { Q_assert(bufferUploadCounter >= 0); bufferUploadCounter++; if(bufferUploadCounter > 1) { multiBufferUpload = true; } Buffer& userBuffer = rhi.buffers.Get(userHBuffer); Buffer& uploadBuffer = rhi.buffers.Get(uploadHBuffer); Q_assert(!userBuffer.uploading); if(byteCount == 0) { Q_assert(destByteOffset == 0); destByteOffset = 0; byteCount = min(userBuffer.desc.byteCount, uploadBuffer.desc.byteCount); } Q_assert(destByteOffset + byteCount <= userBuffer.desc.byteCount); uint8_t* mapped = NULL; Q_assert(userBuffer.desc.memoryUsage != MemoryUsage::Readback); if(userBuffer.desc.memoryUsage == MemoryUsage::GPU && rhi.umaPool == NULL) { WaitToStartUploading(byteCount); mapped = mappedBuffer + bufferByteOffset; userBuffer.uploadSrcByteOffset = bufferByteOffset; userBuffer.uploadDestByteOffset = destByteOffset; userBuffer.uploadByteCount = byteCount; bufferByteOffset = AlignUp(bufferByteOffset + byteCount, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); if(multiBufferUpload) { needsRewind = true; } batchBufferCount++; } else { mapped = (uint8_t*)MapBuffer(userHBuffer); Q_assert(mapped != NULL); } userBuffer.uploading = true; return mapped; } void UploadManager::EndBufferUpload(HBuffer userHBuffer) { bufferUploadCounter--; Q_assert(bufferUploadCounter >= 0); Buffer& userBuffer = rhi.buffers.Get(userHBuffer); Q_assert(userBuffer.uploading); if(!userBuffer.mapped) { D3D(commandList->Reset(commandAllocator, NULL)); #if defined(RHI_ENABLE_AFTERMATH) const uint64_t byteCount = (uint64_t)userBuffer.uploadByteCount; const char* const marker = va("Upload: CopyBufferRegion -> %s (%s)", userBuffer.desc.name, Com_FormatBytes(byteCount)); const GFSDK_Aftermath_Result result = GFSDK_Aftermath_SetEventMarker(rhi.aftermathUploadCommandList, marker, strlen(marker) + 1); Q_assert(result == GFSDK_Aftermath_Result_Success); #endif const Buffer& uploadBuffer = rhi.buffers.Get(uploadHBuffer); commandList->CopyBufferRegion( userBuffer.buffer, userBuffer.uploadDestByteOffset, uploadBuffer.buffer, userBuffer.uploadSrcByteOffset, userBuffer.uploadByteCount); ID3D12CommandList* commandLists[] = { commandList }; D3D(commandList->Close()); commandQueue->ExecuteCommandLists(ARRAY_LEN(commandLists), commandLists); fenceValue++; commandQueue->Signal(fence.fence, fenceValue); } else { UnmapBuffer(userHBuffer); } userBuffer.uploading = false; if(bufferUploadCounter == 0 && multiBufferUpload) { if(needsRewind) { EndOfBufferReached(); needsRewind = false; } multiBufferUpload = false; } } void UploadManager::BeginTextureUpload(MappedTexture& mappedTexture, HTexture htexture) { Q_assert(IsNullHandle(currentTexture)); Texture& texture = rhi.textures.Get(htexture); Q_assert(!texture.uploading); const D3D12_RESOURCE_DESC textureDesc = texture.texture->GetDesc(); D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout; UINT64 uploadByteCount; rhi.device->GetCopyableFootprints(&textureDesc, 0, 1, 0, &layout, NULL, NULL, &uploadByteCount); WaitToStartUploading(uploadByteCount); const UINT sourcePitch = (UINT)(texture.desc.width * GetBytesPerPixel(texture.desc.format)); mappedTexture.mappedData = mappedBuffer + bufferByteOffset; mappedTexture.columnCount = texture.desc.width; mappedTexture.rowCount = texture.desc.height; mappedTexture.sliceCount = texture.desc.depth; mappedTexture.srcRowByteCount = sourcePitch; mappedTexture.dstRowByteCount = AlignUp(layout.Footprint.RowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); texture.uploadByteOffset = bufferByteOffset; texture.uploading = true; bufferByteOffset = AlignUp(bufferByteOffset + uploadByteCount, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); currentTexture = htexture; batchTextureCount++; } void UploadManager::EndTextureUpload() { Q_assert(!IsNullHandle(currentTexture)); const HTexture htexture = currentTexture; Texture& texture = rhi.textures.Get(htexture); Q_assert(texture.uploading); const D3D12_RESOURCE_DESC textureDesc = texture.texture->GetDesc(); D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout; rhi.device->GetCopyableFootprints(&textureDesc, 0, 1, 0, &layout, NULL, NULL, NULL); Buffer& buffer = rhi.buffers.Get(uploadHBuffer); D3D12_TEXTURE_COPY_LOCATION dstLoc = { 0 }; D3D12_TEXTURE_COPY_LOCATION srcLoc = { 0 }; dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; dstLoc.pResource = texture.texture; dstLoc.SubresourceIndex = 0; srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; srcLoc.pResource = buffer.buffer; srcLoc.PlacedFootprint = layout; srcLoc.PlacedFootprint.Offset = texture.uploadByteOffset; D3D12_BOX srcBox = { 0 }; srcBox.left = 0; srcBox.top = 0; srcBox.front = 0; srcBox.right = textureDesc.Width; srcBox.bottom = textureDesc.Height; srcBox.back = textureDesc.DepthOrArraySize; D3D(commandList->Reset(commandAllocator, NULL)); #if defined(RHI_ENABLE_AFTERMATH) const char* const marker = va("Upload: CopyTextureRegion -> %s", texture.desc.name); const GFSDK_Aftermath_Result result = GFSDK_Aftermath_SetEventMarker(rhi.aftermathUploadCommandList, marker, strlen(marker) + 1); Q_assert(result == GFSDK_Aftermath_Result_Success); #endif commandList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, &srcBox); ID3D12CommandList* commandLists[] = { commandList }; D3D(commandList->Close()); commandQueue->ExecuteCommandLists(ARRAY_LEN(commandLists), commandLists); fenceValue++; commandQueue->Signal(fence.fence, fenceValue); texture.uploading = false; currentTexture = RHI_MAKE_NULL_HANDLE(); } void UploadManager::WaitToStartDrawing(ID3D12CommandQueue* commandQueue_) { fence.WaitOnGPU(commandQueue_, fenceValue); } void UploadManager::WaitToStartUploading(uint32_t uploadByteCount) { if(uploadByteCount > bufferByteCount) { ri.Error(ERR_FATAL, "Upload request too large!\n"); } if(bufferByteOffset + uploadByteCount > bufferByteCount) { EndOfBufferReached(); } } void UploadManager::EndOfBufferReached() { ri.Printf(PRINT_DEVELOPER, "Waiting for GPU upload: %s (%d T, %d B)\n", Com_FormatBytes(bufferByteOffset), batchTextureCount, batchBufferCount); fence.WaitOnCPU(fenceValue); D3D(commandAllocator->Reset()); bufferByteOffset = 0; batchTextureCount = 0; batchBufferCount = 0; } void ReadbackManager::Create() { D3D(rhi.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&readbackCommandAllocator))); SetDebugName(readbackCommandAllocator, "readback", D3DResourceType::CommandAllocator); D3D(rhi.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, readbackCommandAllocator, NULL, IID_PPV_ARGS(&readbackCommandList))); SetDebugName(readbackCommandList, "readback", D3DResourceType::CommandList); D3D(readbackCommandList->Close()); const uint32_t byteCount = GetReadbackTextureByteCount(); BufferDesc desc("readback", byteCount, ResourceStates::CopyDestinationBit); desc.memoryUsage = MemoryUsage::Readback; readbackBuffer = CreateBuffer(desc); bufferByteCount = byteCount; readbackFence.Create(readbackFenceValue, "readback"); } void ReadbackManager::Release() { readbackFence.Release(); COM_RELEASE(readbackCommandList); COM_RELEASE(readbackCommandAllocator); } void ReadbackManager::ResizeIfNeeded() { const uint32_t byteCount = GetReadbackTextureByteCount(); if(byteCount <= bufferByteCount) { return; } // @NOTE: this is called after the device has become idle DestroyBuffer(readbackBuffer); BufferDesc desc("readback", byteCount, ResourceStates::CopyDestinationBit); desc.memoryUsage = MemoryUsage::Readback; readbackBuffer = CreateBuffer(desc); bufferByteCount = byteCount; } void ReadbackManager::BeginTextureReadback(MappedTexture& mappedTexture, HTexture htexture) { D3D(readbackCommandAllocator->Reset()); D3D(readbackCommandList->Reset(readbackCommandAllocator, NULL)); Texture& texture = rhi.textures.Get(htexture); Q_assert(texture.desc.format == TextureFormat::R8G8B8A8_UNorm); const D3D12_RESOURCE_DESC textureDesc = texture.texture->GetDesc(); D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout; rhi.device->GetCopyableFootprints(&textureDesc, 0, 1, 0, &layout, NULL, NULL, NULL); Q_assert(layout.Footprint.Format == DXGI_FORMAT_R8G8B8A8_UNORM); Q_assert(layout.Footprint.Width == texture.desc.width); Q_assert(layout.Footprint.Height == texture.desc.height); Buffer& buffer = rhi.buffers.Get(readbackBuffer); D3D12_TEXTURE_COPY_LOCATION dstLoc = { 0 }; D3D12_TEXTURE_COPY_LOCATION srcLoc = { 0 }; dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; dstLoc.pResource = buffer.buffer; dstLoc.PlacedFootprint = layout; dstLoc.PlacedFootprint.Offset = 0; srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; srcLoc.pResource = texture.texture; srcLoc.SubresourceIndex = 0; D3D12_BOX srcBox = { 0 }; srcBox.left = 0; srcBox.top = 0; srcBox.front = 0; srcBox.right = textureDesc.Width; srcBox.bottom = textureDesc.Height; srcBox.back = 1; const D3D12_RESOURCE_STATES prevState = texture.currentState; // @TODO: use CmdBarrier D3D12_RESOURCE_BARRIER barrier = {}; barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; barrier.Transition.pResource = texture.texture; barrier.Transition.StateBefore = prevState; barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; if(texture.currentState != D3D12_RESOURCE_STATE_COPY_SOURCE) { readbackCommandList->ResourceBarrier(1, &barrier); texture.currentState = D3D12_RESOURCE_STATE_COPY_SOURCE; } readbackCommandList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, &srcBox); barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; barrier.Transition.StateAfter = prevState; if(texture.currentState != prevState) { readbackCommandList->ResourceBarrier(1, &barrier); texture.currentState = prevState; } D3D(readbackCommandList->Close()); ID3D12CommandList* commandListArray[] = { readbackCommandList }; rhi.mainCommandQueue->ExecuteCommandLists(ARRAY_LEN(commandListArray), commandListArray); readbackFenceValue++; readbackFence.Signal(rhi.mainCommandQueue, readbackFenceValue); readbackFence.WaitOnCPU(readbackFenceValue); mappedTexture.mappedData = MapBuffer(readbackBuffer); mappedTexture.rowCount = layout.Footprint.Height; mappedTexture.columnCount = layout.Footprint.Width; mappedTexture.srcRowByteCount = layout.Footprint.RowPitch; mappedTexture.dstRowByteCount = 0; } void ReadbackManager::EndTextureReadback() { UnmapBuffer(readbackBuffer); } void DescriptorHeap::Create(D3D12_DESCRIPTOR_HEAP_TYPE heapType, uint32_t size, uint16_t* freeListItems, const char* name) { heap = CreateDescriptorHeap(heapType, size, false, name); freeList.Init(freeListItems, size); startAddress = heap->GetCPUDescriptorHandleForHeapStart(); descriptorSize = rhi.device->GetDescriptorHandleIncrementSize(heapType); type = heapType; } void DescriptorHeap::Release() { COM_RELEASE(heap); } uint32_t DescriptorHeap::Allocate() { return freeList.Allocate(); } void DescriptorHeap::Free(uint32_t index) { freeList.Free(index); } D3D12_CPU_DESCRIPTOR_HANDLE DescriptorHeap::GetCPUHandle(uint32_t index) { D3D12_CPU_DESCRIPTOR_HANDLE handle = startAddress; handle.ptr += index * descriptorSize; return handle; } uint32_t DescriptorHeap::CreateSRV(ID3D12Resource* resource, D3D12_SHADER_RESOURCE_VIEW_DESC& desc) { Q_assert(resource); Q_assert(type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); const uint32_t index = freeList.Allocate(); rhi.device->CreateShaderResourceView(resource, &desc, GetCPUHandle(index)); return index; } uint32_t DescriptorHeap::CreateUAV(ID3D12Resource* resource, D3D12_UNORDERED_ACCESS_VIEW_DESC& desc) { Q_assert(resource); Q_assert(type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); const uint32_t index = freeList.Allocate(); rhi.device->CreateUnorderedAccessView(resource, NULL, &desc, GetCPUHandle(index)); return index; } uint32_t DescriptorHeap::CreateRTV(ID3D12Resource* resource, D3D12_RENDER_TARGET_VIEW_DESC& desc) { Q_assert(resource); Q_assert(type == D3D12_DESCRIPTOR_HEAP_TYPE_RTV); const uint32_t index = freeList.Allocate(); rhi.device->CreateRenderTargetView(resource, &desc, GetCPUHandle(index)); return index; } uint32_t DescriptorHeap::CreateDSV(ID3D12Resource* resource, D3D12_DEPTH_STENCIL_VIEW_DESC& desc) { Q_assert(resource); Q_assert(type == D3D12_DESCRIPTOR_HEAP_TYPE_DSV); const uint32_t index = freeList.Allocate(); rhi.device->CreateDepthStencilView(resource, &desc, GetCPUHandle(index)); return index; } uint32_t DescriptorHeap::CreateCBV(D3D12_CONSTANT_BUFFER_VIEW_DESC& desc) { Q_assert(type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); const uint32_t index = freeList.Allocate(); rhi.device->CreateConstantBufferView(&desc, GetCPUHandle(index)); return index; } uint32_t DescriptorHeap::CreateSampler(D3D12_SAMPLER_DESC& desc) { Q_assert(type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); const uint32_t index = freeList.Allocate(); rhi.device->CreateSampler(&desc, GetCPUHandle(index)); return index; } void DynamicResources::DescriptorRange::Init(D3D12_DESCRIPTOR_RANGE_TYPE type_, uint32_t start_, uint32_t count_) { Q_assert(count_ > 0); type = type_; start = start_; count = count_; index = start_ + 1; reservedSlotUsed = false; } uint32_t DynamicResources::DescriptorRange::Allocate(bool slotAtIndex0) { if(slotAtIndex0) { ASSERT_OR_DIE(!reservedSlotUsed, "Can only use 1 reserved slot"); reservedSlotUsed = true; return start; } ASSERT_OR_DIE(index + 1 < start + count, "Not enough descriptors"); return index++; } static const char* GetDeviceRemovedReasonString(HRESULT reason) { switch(reason) { case DXGI_ERROR_DEVICE_HUNG: return "device hung"; case DXGI_ERROR_DEVICE_REMOVED: return "device removed"; case DXGI_ERROR_DEVICE_RESET: return "device reset"; case DXGI_ERROR_DRIVER_INTERNAL_ERROR: return "internal driver error"; case DXGI_ERROR_INVALID_CALL: return "invalid call"; case S_OK: return "no error"; default: return va("unknown error code 0x%08X", (unsigned int)reason); } } static DXGI_GPU_PREFERENCE GetGPUPreference(int preference) { switch(preference) { case GPUPREF_HIGHPERF: return DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE; case GPUPREF_LOWPOWER: return DXGI_GPU_PREFERENCE_MINIMUM_POWER; default: return DXGI_GPU_PREFERENCE_UNSPECIFIED; } } static bool IsSuitableAdapter(IDXGIAdapter1* adapter) { HRESULT hr = S_OK; DXGI_ADAPTER_DESC1 desc; hr = adapter->GetDesc1(&desc); if(FAILED(hr)) { ri.Printf(PRINT_WARNING, "D3D12: IDXGIAdapter1::GetDesc1 failed with code 0x%08X (%s)\n", (unsigned int)hr, GetSystemErrorString(hr)); return false; } if(desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { //ri.Printf(PRINT_WARNING, "D3D12: '%s' is not real hardware\n", //GetUTF8Name(desc.Description, "unknown adapter")); return false; } hr = D3D12CreateDevice(adapter, FeatureLevel, __uuidof(ID3D12Device), NULL); if(FAILED(hr)) { ri.Printf(PRINT_WARNING, "D3D12: can't create device for '%s' with code 0x%08X (%s)\n", GetUTF8String(desc.Description, "unknown adapter"), (unsigned int)hr, GetSystemErrorString(hr)); return false; } return true; } static void CreateAdapterList() { IDXGIAdapter1* adapter = NULL; UINT enumIndex = 0; rhi.gpuCount = 0; while(rhi.gpuCount < ARRAY_LEN(rhi.gpus) && SUCCEEDED(rhi.factory->EnumAdapters1(enumIndex++, &adapter))) { DXGI_ADAPTER_DESC1 desc; if(IsSuitableAdapter(adapter) && SUCCEEDED(adapter->GetDesc1(&desc))) { GPU& gpu = rhi.gpus[rhi.gpuCount++]; gpu.uniqueId = desc.AdapterLuid; Q_strncpyz(gpu.name, GetUTF8String(desc.Description, "???"), sizeof(gpu.name)); } COM_RELEASE(adapter); } } static IDXGIAdapter1* GetAdapterAtIndex(int gpuIndex) { if(gpuIndex < 0 || gpuIndex >= ARRAY_LEN(rhi.gpus)) { ri.Printf(PRINT_WARNING, "GPU index %d is invalid", gpuIndex + 1); return NULL; } const LUID uniqueId = rhi.gpus[gpuIndex].uniqueId; IDXGIAdapter1* adapter = NULL; UINT enumIndex = 0; while(SUCCEEDED(rhi.factory->EnumAdapters1(enumIndex++, &adapter))) { DXGI_ADAPTER_DESC1 desc; if(SUCCEEDED(adapter->GetDesc1(&desc)) && desc.AdapterLuid.LowPart == uniqueId.LowPart && desc.AdapterLuid.HighPart == uniqueId.HighPart) { return adapter; } COM_RELEASE(adapter); } ri.Printf(PRINT_WARNING, "GPU at index %d (%s) is no longer available", gpuIndex + 1, rhi.gpus[gpuIndex].name); return NULL; } static IDXGIAdapter1* FindMostSuitableAdapter(IDXGIFactory1* factory, int enginePreference) { IDXGIAdapter1* adapter = NULL; IDXGIFactory6* factory6 = NULL; if(SUCCEEDED(factory->QueryInterface(IID_PPV_ARGS(&factory6)))) { const DXGI_GPU_PREFERENCE dxgiPreference = GetGPUPreference(enginePreference); UINT i = 0; while(SUCCEEDED(factory6->EnumAdapterByGpuPreference(i++, dxgiPreference, IID_PPV_ARGS(&adapter)))) { if(IsSuitableAdapter(adapter)) { COM_RELEASE(factory6); return adapter; } COM_RELEASE(adapter); } } COM_RELEASE(factory6); UINT i = 0; while(SUCCEEDED(rhi.factory->EnumAdapters1(i++, &adapter))) { if(IsSuitableAdapter(adapter)) { return adapter; } COM_RELEASE(adapter); } ri.Error(ERR_FATAL, "No suitable DXGI adapter was found!\n"); return NULL; } static void Present() { UINT flags; UINT swapInterval; if(r_vsync->integer) { swapInterval = 1; flags = 0; } else { swapInterval = 0; flags = rhi.isTearingSupported ? DXGI_PRESENT_ALLOW_TEARING : 0; } const HRESULT hr = rhi.swapChain->Present(swapInterval, flags); rhi.frameLatencyWaitNeeded = true; #if defined(RHI_ENABLE_NVAPI) && defined(RHI_ENABLE_NVAPI_RT_VALIDATION) if(rhi.nvapiActive && (hr == DXGI_ERROR_DEVICE_REMOVED || hr == D3DDDIERR_DEVICEREMOVED || hr == DXGI_ERROR_DEVICE_RESET)) { NvAPI_D3D12_FlushRaytracingValidationMessages(rhi.device); } #endif enum PresentError { PE_NONE, PE_DEVICE_REMOVED, PE_DEVICE_RESET }; PresentError presentError = PE_NONE; HRESULT deviceRemovedReason = S_OK; if(hr == DXGI_ERROR_DEVICE_REMOVED || hr == D3DDDIERR_DEVICEREMOVED) { deviceRemovedReason = rhi.device->GetDeviceRemovedReason(); if(deviceRemovedReason == DXGI_ERROR_DEVICE_RESET) { presentError = PE_DEVICE_RESET; } else { presentError = PE_DEVICE_REMOVED; } } else if(hr == DXGI_ERROR_DEVICE_RESET) { presentError = PE_DEVICE_RESET; } #if defined(D3D_DEBUG) else if(hr != S_OK) { Sys_DebugPrintf("Present error: 0x%08X (%s)\n", (unsigned int)hr, GetSystemErrorString(hr)); } #endif if(presentError == PE_DEVICE_REMOVED) { ri.Error(ERR_FATAL, "Direct3D device was removed! Reason: %s\n", GetDeviceRemovedReasonString(deviceRemovedReason)); } else if(presentError == PE_DEVICE_RESET) { ri.Printf(PRINT_ERROR, "Direct3D device was reset! Restarting the video system...\n"); Cbuf_AddText("vid_restart\n"); } } #if defined(_DEBUG) static bool CanWriteCommands() { // @TODO: check that the command list is open return rhi.commandList != NULL; } #endif template static void DestroyPool(StaticPool& pool, void (*DestroyResource)(HT), bool fullShutDown) { T* resource; HT handle; for(int i = 0; pool.FindNext(&resource, &handle, &i);) { if(fullShutDown || resource->shortLifeTime) { (*DestroyResource)(handle); } } if(fullShutDown) { pool.Clear(); } } static const char* AllocateName(const char* name, bool shortLifeTime) { LinearAllocator& allocator = shortLifeTime ? rhi.tempStringAllocator : rhi.persStringAllocator; return allocator.Allocate(name); } template static void AllocateAndFixName(const T& desc) { ((BufferDesc&)desc).name = AllocateName(desc.name, desc.shortLifeTime); } static DXGI_FORMAT GetD3DIndexFormat(IndexType::Id type) { return type == IndexType::UInt16 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; } static D3D12_SHADER_VISIBILITY GetD3DVisibility(ShaderStage::Id shaderType) { switch(shaderType) { case ShaderStage::Vertex: return D3D12_SHADER_VISIBILITY_VERTEX; case ShaderStage::Pixel: return D3D12_SHADER_VISIBILITY_PIXEL; case ShaderStage::Compute: return D3D12_SHADER_VISIBILITY_ALL; default: Q_assert(!"Unsupported shader type"); return D3D12_SHADER_VISIBILITY_ALL; } } static D3D12_SHADER_VISIBILITY GetD3DVisibility(ShaderStages::Flags flags) { if(__popcnt(flags & ShaderStages::AllGraphicsBits) > 1) { return D3D12_SHADER_VISIBILITY_ALL; } if(flags & ShaderStages::VertexBit) { return D3D12_SHADER_VISIBILITY_VERTEX; } if(flags & ShaderStages::PixelBit) { return D3D12_SHADER_VISIBILITY_PIXEL; } return D3D12_SHADER_VISIBILITY_ALL; } static D3D12_DESCRIPTOR_RANGE_TYPE GetD3DDescriptorRangeType(DescriptorType::Id descType) { switch(descType) { case DescriptorType::Texture: return D3D12_DESCRIPTOR_RANGE_TYPE_SRV; case DescriptorType::Buffer: return D3D12_DESCRIPTOR_RANGE_TYPE_CBV; case DescriptorType::RWTexture: return D3D12_DESCRIPTOR_RANGE_TYPE_UAV; case DescriptorType::RWBuffer: return D3D12_DESCRIPTOR_RANGE_TYPE_UAV; case DescriptorType::Sampler: return D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; default: Q_assert(!"Unsupported descriptor type"); return D3D12_DESCRIPTOR_RANGE_TYPE_SRV; } } static const char* GetD3DSemanticName(ShaderSemantic::Id semantic) { switch(semantic) { case ShaderSemantic::Position: return "POSITION"; case ShaderSemantic::Normal: return "NORMAL"; case ShaderSemantic::TexCoord: return "TEXCOORD"; case ShaderSemantic::Color: return "COLOR"; default: Q_assert(!"Unsupported shader semantic"); return ""; } } static DXGI_FORMAT GetD3DFormat(DataType::Id dataType, uint32_t vectorLength) { if(vectorLength < 1 || vectorLength > 4) { Q_assert(!"Invalid vector length"); return DXGI_FORMAT_UNKNOWN; } switch(dataType) { case DataType::Float32: switch(vectorLength) { case 1: return DXGI_FORMAT_R32_FLOAT; case 2: return DXGI_FORMAT_R32G32_FLOAT; case 3: return DXGI_FORMAT_R32G32B32_FLOAT; case 4: return DXGI_FORMAT_R32G32B32A32_FLOAT; } case DataType::UInt32: switch(vectorLength) { case 1: return DXGI_FORMAT_R32_UINT; case 2: return DXGI_FORMAT_R32G32_UINT; case 3: return DXGI_FORMAT_R32G32B32_UINT; case 4: return DXGI_FORMAT_R32G32B32A32_UINT; } case DataType::UNorm8: switch(vectorLength) { case 1: return DXGI_FORMAT_R8_UNORM; case 2: return DXGI_FORMAT_R8G8_UNORM; case 3: Q_assert(!"Unsupported format"); return DXGI_FORMAT_UNKNOWN; case 4: return DXGI_FORMAT_R8G8B8A8_UNORM; } default: Q_assert(!"Unsupported data type"); return DXGI_FORMAT_UNKNOWN; } } static D3D12_COMPARISON_FUNC GetD3DComparisonFunction(ComparisonFunction::Id function) { switch(function) { case ComparisonFunction::Always: return D3D12_COMPARISON_FUNC_ALWAYS; case ComparisonFunction::Equal: return D3D12_COMPARISON_FUNC_EQUAL; case ComparisonFunction::Greater: return D3D12_COMPARISON_FUNC_GREATER; case ComparisonFunction::GreaterEqual: return D3D12_COMPARISON_FUNC_GREATER_EQUAL; case ComparisonFunction::Less: return D3D12_COMPARISON_FUNC_LESS; case ComparisonFunction::LessEqual: return D3D12_COMPARISON_FUNC_LESS_EQUAL; case ComparisonFunction::Never: return D3D12_COMPARISON_FUNC_NEVER; case ComparisonFunction::NotEqual: return D3D12_COMPARISON_FUNC_NOT_EQUAL; default: Q_assert(!"Unsupported comparison function"); return D3D12_COMPARISON_FUNC_ALWAYS; } } static DXGI_FORMAT GetD3DFormat(TextureFormat::Id format) { switch(format) { case TextureFormat::R8G8B8A8_UNorm: return DXGI_FORMAT_R8G8B8A8_UNORM; case TextureFormat::R16G16B16A16_UNorm: return DXGI_FORMAT_R16G16B16A16_UNORM; case TextureFormat::R32G32_Float: return DXGI_FORMAT_R32G32_FLOAT; case TextureFormat::R16G16B16A16_Float: return DXGI_FORMAT_R16G16B16A16_FLOAT; case TextureFormat::R32G32B32A32_Float: return DXGI_FORMAT_R32G32B32A32_FLOAT; case TextureFormat::Depth32_Float: return DXGI_FORMAT_D32_FLOAT; case TextureFormat::Depth24_Stencil8: return DXGI_FORMAT_D24_UNORM_S8_UINT; case TextureFormat::R8G8_UNorm: return DXGI_FORMAT_R8G8_UNORM; case TextureFormat::R8_UNorm: return DXGI_FORMAT_R8_UNORM; case TextureFormat::R10G10B10A2_UNorm: return DXGI_FORMAT_R10G10B10A2_UNORM; case TextureFormat::R16_UInt: return DXGI_FORMAT_R16_UINT; case TextureFormat::R32_UInt: return DXGI_FORMAT_R32_UINT; case TextureFormat::R32G32_UInt: return DXGI_FORMAT_R32G32_UINT; case TextureFormat::R16G16_SNorm: return DXGI_FORMAT_R16G16_SNORM; case TextureFormat::R16G16_Float: return DXGI_FORMAT_R16G16_FLOAT; case TextureFormat::R16_Float: return DXGI_FORMAT_R16_FLOAT; default: Q_assert(!"Unsupported texture format"); return DXGI_FORMAT_R8G8B8A8_UNORM; } } static D3D12_CULL_MODE GetD3DCullMode(cullType_t cullMode) { switch(cullMode) { case CT_TWO_SIDED: return D3D12_CULL_MODE_NONE; case CT_BACK_SIDED: return D3D12_CULL_MODE_BACK; case CT_FRONT_SIDED: return D3D12_CULL_MODE_FRONT; default: Q_assert(!"Unsupported cull mode"); return D3D12_CULL_MODE_NONE; } } static D3D12_TEXTURE_ADDRESS_MODE GetD3DTextureAddressMode(textureWrap_t wrap) { switch(wrap) { case TW_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_WRAP; case TW_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP; default: Q_assert(!"Unsupported texture wrap mode"); return D3D12_TEXTURE_ADDRESS_MODE_WRAP; } } static D3D12_FILTER GetD3DFilter(TextureFilter::Id filter) { switch(filter) { case TextureFilter::Point: return D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR; case TextureFilter::Linear: return D3D12_FILTER_MIN_MAG_MIP_LINEAR; case TextureFilter::Anisotropic: return D3D12_FILTER_ANISOTROPIC; default: Q_assert(!"Unsupported texture filter mode"); return D3D12_FILTER_MIN_MAG_MIP_LINEAR; } } static D3D12_STENCIL_OP GetD3DStencilOp(StencilOp::Id stencilOp) { switch(stencilOp) { case StencilOp::Keep: return D3D12_STENCIL_OP_KEEP; case StencilOp::Zero: return D3D12_STENCIL_OP_ZERO; case StencilOp::Replace: return D3D12_STENCIL_OP_REPLACE; case StencilOp::SaturatedIncrement: return D3D12_STENCIL_OP_INCR_SAT; case StencilOp::SaturatedDecrement: return D3D12_STENCIL_OP_DECR_SAT; case StencilOp::Invert: return D3D12_STENCIL_OP_INVERT; case StencilOp::WrappedIncrement: return D3D12_STENCIL_OP_INCR; case StencilOp::WrappedDecrement: return D3D12_STENCIL_OP_DECR; default: Q_assert(!"Unsupported stencop operation"); return D3D12_STENCIL_OP_REPLACE; } } static D3D12_RESOURCE_STATES GetD3DResourceStates(ResourceStates::Flags flags) { #define ADD_BITS(RHIBit, D3DBits) \ if(flags & ResourceStates::RHIBit) \ { \ states |= D3DBits; \ } D3D12_RESOURCE_STATES states = D3D12_RESOURCE_STATE_COMMON; ADD_BITS(VertexBufferBit, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER); ADD_BITS(IndexBufferBit, D3D12_RESOURCE_STATE_INDEX_BUFFER); ADD_BITS(ConstantBufferBit, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER); ADD_BITS(RenderTargetBit, D3D12_RESOURCE_STATE_RENDER_TARGET); ADD_BITS(VertexShaderAccessBit, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); ADD_BITS(PixelShaderAccessBit, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); ADD_BITS(ComputeShaderAccessBit, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); ADD_BITS(CopySourceBit, D3D12_RESOURCE_STATE_COPY_SOURCE); ADD_BITS(CopyDestinationBit, D3D12_RESOURCE_STATE_COPY_DEST); ADD_BITS(DepthReadBit, D3D12_RESOURCE_STATE_DEPTH_READ); ADD_BITS(DepthWriteBit, D3D12_RESOURCE_STATE_DEPTH_WRITE); ADD_BITS(UnorderedAccessBit, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); ADD_BITS(PresentBit, D3D12_RESOURCE_STATE_PRESENT); ADD_BITS(RaytracingASBit, D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE); ADD_BITS(IndirectDispatchBit, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); return states; #undef ADD_BITS } static D3D12_BLEND GetD3DSourceBlend(uint32_t stateBits) { switch(stateBits & GLS_SRCBLEND_BITS) { case 0: return D3D12_BLEND_ONE; case GLS_SRCBLEND_ZERO: return D3D12_BLEND_ZERO; case GLS_SRCBLEND_ONE: return D3D12_BLEND_ONE; case GLS_SRCBLEND_DST_COLOR: return D3D12_BLEND_DEST_COLOR; case GLS_SRCBLEND_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR; case GLS_SRCBLEND_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; case GLS_SRCBLEND_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; case GLS_SRCBLEND_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; case GLS_SRCBLEND_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; case GLS_SRCBLEND_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT; default: Q_assert(!"Unsupported source blend mode"); return D3D12_BLEND_ONE; } } static D3D12_BLEND GetD3DDestBlend(uint32_t stateBits) { switch(stateBits & GLS_DSTBLEND_BITS) { case 0: return D3D12_BLEND_ZERO; case GLS_DSTBLEND_ZERO: return D3D12_BLEND_ZERO; case GLS_DSTBLEND_ONE: return D3D12_BLEND_ONE; case GLS_DSTBLEND_SRC_COLOR: return D3D12_BLEND_SRC_COLOR; case GLS_DSTBLEND_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR; case GLS_DSTBLEND_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA; case GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA; case GLS_DSTBLEND_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA; case GLS_DSTBLEND_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA; default: Q_assert(!"Unsupported dest blend mode"); return D3D12_BLEND_ONE; } } D3D12_SHADING_RATE GetD3DShadingRate(ShadingRate::Id shadingRate) { switch(shadingRate) { case ShadingRate::SR_1x1: return D3D12_SHADING_RATE_1X1; case ShadingRate::SR_1x2: return D3D12_SHADING_RATE_1X2; case ShadingRate::SR_2x1: return D3D12_SHADING_RATE_2X1; case ShadingRate::SR_2x2: return D3D12_SHADING_RATE_2X2; case ShadingRate::SR_2x4: return D3D12_SHADING_RATE_2X4; case ShadingRate::SR_4x2: return D3D12_SHADING_RATE_4X2; case ShadingRate::SR_4x4: return D3D12_SHADING_RATE_4X4; default: Q_assert(!"Unsupported shading rate"); return D3D12_SHADING_RATE_1X1; } } static D3D12_BLEND GetAlphaBlendFromColorBlend(D3D12_BLEND colorBlend) { switch(colorBlend) { case D3D12_BLEND_SRC_COLOR: return D3D12_BLEND_SRC_ALPHA; case D3D12_BLEND_INV_SRC_COLOR: return D3D12_BLEND_INV_SRC_ALPHA; case D3D12_BLEND_DEST_COLOR: return D3D12_BLEND_DEST_ALPHA; case D3D12_BLEND_INV_DEST_COLOR: return D3D12_BLEND_INV_DEST_ALPHA; default: return colorBlend; } } static bool IsD3DDepthFormat(DXGI_FORMAT format) { switch(format) { case DXGI_FORMAT_D16_UNORM: case DXGI_FORMAT_D24_UNORM_S8_UINT: case DXGI_FORMAT_D32_FLOAT: case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: return true; default: return false; } } static const char* GetNameForD3DResourceStates(D3D12_RESOURCE_STATES states) { switch(states) { case D3D12_RESOURCE_STATE_COMMON: return "common/present"; case D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER: return "vertex/constant buffer"; case D3D12_RESOURCE_STATE_INDEX_BUFFER: return "index buffer"; case D3D12_RESOURCE_STATE_RENDER_TARGET: return "render target"; case D3D12_RESOURCE_STATE_UNORDERED_ACCESS: return "UAV"; case D3D12_RESOURCE_STATE_DEPTH_WRITE: return "depth write"; case D3D12_RESOURCE_STATE_DEPTH_READ: return "depth read"; case D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE: return "non-pixel shader resource"; case D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE: return "pixel shader resource"; case D3D12_RESOURCE_STATE_COPY_DEST: return "copy destination"; case D3D12_RESOURCE_STATE_COPY_SOURCE: return "copy source"; case D3D12_RESOURCE_STATE_GENERIC_READ: return "generic read"; case D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE: return "generic shader resource"; case D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE: return "raytracing acceleration structure"; default: return "???"; } } static const char* GetNameForD3DFormat(DXGI_FORMAT format) { switch(format) { #define FORMAT(Enum) case DXGI_FORMAT_##Enum: return #Enum; DXGI_FORMAT_LIST(FORMAT) default: return "???"; #undef FORMAT } } static const char* GetHeapTypeName(D3D12_HEAP_TYPE type) { switch(type) { case D3D12_HEAP_TYPE_DEFAULT: return "GPU"; case D3D12_HEAP_TYPE_UPLOAD: return "upload"; case D3D12_HEAP_TYPE_READBACK: return "readback"; case D3D12_HEAP_TYPE_CUSTOM: return "UMA"; default: Q_assert(!"Unsupported heap type"); return "unknown"; } } static const char* GetResourceHeapName(ID3D12Resource* resource) { D3D12_HEAP_PROPERTIES props; D3D12_HEAP_FLAGS flags; if(SUCCEEDED(resource->GetHeapProperties(&props, &flags))) { return GetHeapTypeName(props.Type); } return "unknown"; } static void ValidateResourceStateForBarrier(D3D12_RESOURCE_STATES state) { if(state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS || state == D3D12_RESOURCE_STATE_DEPTH_WRITE) { return; } const D3D12_RESOURCE_STATES readOnly[] = { D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_RESOURCE_STATE_INDEX_BUFFER, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_READ }; const D3D12_RESOURCE_STATES readWrite[] = { D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_DEPTH_WRITE }; const D3D12_RESOURCE_STATES writeOnly[] = { D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_STREAM_OUT }; int rBits = 0; int wBits = 0; for(auto bit : readOnly) { if(state & bit) { rBits++; } } for(auto bit : readWrite) { if(state & bit) { rBits++; wBits++; } } for(auto bit : writeOnly) { if(state & bit) { wBits++; } } // MS: "At most one write bit can be set." Q_assert(wBits == 0 || wBits == 1); if(wBits == 1) { // MS: "If any write bit is set, then no read bit may be set." Q_assert(rBits == 0); } } // returns true if the barrier should be used static bool SetBarrier( D3D12_RESOURCE_STATES& currentState, D3D12_RESOURCE_BARRIER& barrier, ResourceStates::Flags newState, ID3D12Resource* resource) { const D3D12_RESOURCE_STATES before = currentState; const D3D12_RESOURCE_STATES after = GetD3DResourceStates(newState); ValidateResourceStateForBarrier(before); ValidateResourceStateForBarrier(after); if((before & after & D3D12_RESOURCE_STATE_UNORDERED_ACCESS) != 0 || ((before & D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE) != 0 && (after & D3D12_RESOURCE_STATE_UNORDERED_ACCESS) != 0)) { // note that UAV barriers are unnecessary in a bunch of cases: // - before/after access is read-only // - before/after access is write-only, but to different ranges barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; barrier.UAV.pResource = resource; } else { if(before == after) { return false; } barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; barrier.Transition.pResource = resource; barrier.Transition.StateBefore = before; barrier.Transition.StateAfter = after; barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; currentState = after; } return true; } static void ResolveDurationQueries() { const uint32_t frameIndex = (rhi.frameIndex + 1) % rhi.renderFrameCount; const HBuffer hbuffer = rhi.timeStampBuffers[frameIndex]; const Buffer& buffer = rhi.buffers.Get(hbuffer); #if defined(D3D_DEBUG) if(r_vsync->integer) { Q_assert(rhi.frameIndex == 0); Q_assert(frameIndex == 0); } #endif FrameQueries& fq = rhi.frameQueries[frameIndex]; if(fq.durationQueryCount == 0) { rhi.resolvedQueries.durationQueryCount = 0; return; } UINT64 gpuFrequencyU64; if(FAILED(rhi.mainCommandQueue->GetTimestampFrequency(&gpuFrequencyU64))) { for(uint32_t q = 0; q < fq.durationQueryCount; ++q) { DurationQuery& dq = fq.durationQueries[q]; dq.state = QueryState::Free; } fq.durationQueryCount = 0; rhi.resolvedQueries.durationQueryCount = 0; } const double gpuFrequencyF64 = (double)gpuFrequencyU64; const UINT timestampQueryCount = fq.durationQueryCount * 2; rhi.commandList->ResolveQueryData(rhi.timeStampHeaps[frameIndex], D3D12_QUERY_TYPE_TIMESTAMP, 0, timestampQueryCount, buffer.buffer, 0); const UINT64* const timeStamps = (const UINT64*)MapBuffer(hbuffer); uint32_t* const gpuMicroSeconds = rhi.resolvedQueries.gpuMicroSeconds; for(uint32_t q = 0; q < fq.durationQueryCount; ++q) { DurationQuery& dq = fq.durationQueries[q]; Q_assert(dq.state == QueryState::Ended); if(dq.state != QueryState::Ended) { gpuMicroSeconds[q] = 0; dq.state = QueryState::Free; continue; } const UINT timeStampBeginIndex = q * 2; const UINT timeStampEndIndex = timeStampBeginIndex + 1; const UINT64 beginTime = timeStamps[timeStampBeginIndex]; const UINT64 endTime = timeStamps[timeStampEndIndex]; if(endTime > beginTime) { const UINT64 elapsed = endTime - beginTime; gpuMicroSeconds[q] = (uint32_t)((elapsed / gpuFrequencyF64) * 1000000.0); } else { gpuMicroSeconds[q] = 0; } dq.state = QueryState::Free; } rhi.resolvedQueries.durationQueryCount = fq.durationQueryCount; fq.durationQueryCount = 0; UnmapBuffer(hbuffer); } static void GrabSwapChainTextures() { for(uint32_t b = 0; b < rhi.swapChainBufferCount; ++b) { ID3D12Resource* renderTarget; D3D(rhi.swapChain->GetBuffer(b, IID_PPV_ARGS(&renderTarget))); TextureDesc desc(va("swap chain #%d", b + 1), glConfig.vidWidth, glConfig.vidHeight); desc.nativeResource = renderTarget; desc.initialState = ResourceStates::PresentBit; desc.allowedState = ResourceStates::PresentBit | ResourceStates::RenderTargetBit; rhi.renderTargets[b] = CreateTexture(desc); } } static void GetMonitorRefreshRate() { DWM_TIMING_INFO info = {}; info.cbSize = sizeof(info); if(SUCCEEDED(DwmGetCompositionTimingInfo(NULL, &info))) { rhie.monitorFrameDurationMS = 1000.0f * ((float)(info.rateRefresh.uiDenominator) / (float)info.rateRefresh.uiNumerator); } else { rhie.monitorFrameDurationMS = 0.0f; } if(r_vsync->integer == 0) { const float maxFPS = ri.Cvar_Get("com_maxfps", "125", CVAR_ARCHIVE)->value; rhie.targetFrameDurationMS = 1000.0f / maxFPS; } else if(rhie.monitorFrameDurationMS > 0.0f) { rhie.targetFrameDurationMS = rhie.monitorFrameDurationMS; } else { rhie.targetFrameDurationMS = 1.0f / 120.0f; // 120 Hz by default } } static void CreateNullResources() { { TextureDesc desc("null", 1, 1); rhi.nullTexture = CreateTexture(desc); } { TextureDesc desc("null RW", 1, 1); desc.format = TextureFormat::R8G8B8A8_UNorm; desc.initialState = ResourceStates::UnorderedAccessBit; desc.allowedState = ResourceStates::UnorderedAccessBit | ResourceStates::PixelShaderAccessBit; rhi.nullRWTexture = CreateTexture(desc); } { BufferDesc desc("null", 256, ResourceStates::ShaderAccessBits); desc.memoryUsage = MemoryUsage::GPU; rhi.nullBuffer = CreateBuffer(desc); } { BufferDesc desc("null RW", 256, ResourceStates::UnorderedAccessBit); desc.memoryUsage = MemoryUsage::GPU; rhi.nullRWBuffer = CreateBuffer(desc); } rhi.nullSampler = CreateSampler(SamplerDesc()); } static void CopyDescriptor(ID3D12DescriptorHeap* dstHeap, uint32_t dstIndex, DescriptorHeap& srcHeap, uint32_t srcIndex) { Q_assert(srcIndex != InvalidDescriptorIndex); D3D12_CPU_DESCRIPTOR_HANDLE dstHandle = dstHeap->GetCPUDescriptorHandleForHeapStart(); dstHandle.ptr += dstIndex * srcHeap.descriptorSize; rhi.device->CopyDescriptorsSimple(1, dstHandle, srcHeap.GetCPUHandle(srcIndex), srcHeap.type); } static UINT BGRAUIntFromFloat(float r, float g, float b) { const BYTE br = (BYTE)(Com_Clamp(0.0f, 1.0f, r) * 255.0f); const BYTE bg = (BYTE)(Com_Clamp(0.0f, 1.0f, g) * 255.0f); const BYTE bb = (BYTE)(Com_Clamp(0.0f, 1.0f, b) * 255.0f); return PIX_COLOR(br, bg, bb); } static bool IsTearingSupported() { HMODULE library = LoadLibraryA("DXGI.dll"); if(library == NULL) { ri.Printf(PRINT_WARNING, "D3D12: DXGI.dll couldn't be found or opened\n"); return false; } typedef HRESULT(WINAPI* PFN_CreateDXGIFactory)(REFIID riid, _Out_ void** ppFactory); PFN_CreateDXGIFactory pCreateDXGIFactory = (PFN_CreateDXGIFactory)GetProcAddress(library, "CreateDXGIFactory"); if(pCreateDXGIFactory == NULL) { FreeLibrary(library); ri.Printf(PRINT_WARNING, "D3D12: Failed to locate CreateDXGIFactory in DXGI.dll\n"); return false; } HRESULT hr; BOOL enabled = FALSE; IDXGIFactory5* pFactory; hr = (*pCreateDXGIFactory)(__uuidof(IDXGIFactory5), (void**)&pFactory); if(FAILED(hr)) { FreeLibrary(library); ri.Printf(PRINT_WARNING, "D3D12: 'CreateDXGIFactory' failed with code 0x%08X (%s)\n", (unsigned int)hr, GetSystemErrorString(hr)); return false; } hr = pFactory->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &enabled, sizeof(enabled)); pFactory->Release(); FreeLibrary(library); if(FAILED(hr)) { ri.Printf(PRINT_WARNING, "D3D12: 'IDXGIFactory5::CheckFeatureSupport' failed with code 0x%08X (%s)\n", (unsigned int)hr, GetSystemErrorString(hr)); return false; } return enabled != 0; } static UINT GetSwapChainFlags() { UINT flags = 0; if(r_vsync->integer) { flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; } else { flags = rhi.isTearingSupported ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0; } return flags; } static void WaitForSwapChain() { if(rhi.frameLatencyWaitableObject != NULL && rhi.frameLatencyWaitNeeded) { Q_assert(r_vsync->integer != 0); WaitForSingleObjectEx(rhi.frameLatencyWaitableObject, INFINITE, TRUE); rhi.frameLatencyWaitNeeded = false; } } static uint32_t CreateSRV(ID3D12Resource* resource, D3D12_SHADER_RESOURCE_VIEW_DESC& desc, bool slotAtIndex0) { Q_assert(resource); if(desc.Format == DXGI_FORMAT_D32_FLOAT) { desc.Format = DXGI_FORMAT_R32_FLOAT; } if(!rhi.useDynamicResources) { return rhi.descHeapGeneric.CreateSRV(resource, desc); } if(desc.ViewDimension == D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE) { resource = NULL; } DynamicResources& dr = rhi.dynamicResources; const uint32_t index = dr.srvIndex.Allocate(slotAtIndex0); D3D12_CPU_DESCRIPTOR_HANDLE handle = dr.genericDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); handle.ptr += index * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); rhi.device->CreateShaderResourceView(resource, &desc, handle); return index; } static uint32_t CreateUAV(ID3D12Resource* resource, D3D12_UNORDERED_ACCESS_VIEW_DESC& desc) { Q_assert(resource); if(!rhi.useDynamicResources) { return rhi.descHeapGeneric.CreateUAV(resource, desc); } DynamicResources& dr = rhi.dynamicResources; const uint32_t index = dr.uavIndex.Allocate(); D3D12_CPU_DESCRIPTOR_HANDLE handleGPU = dr.genericDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); handleGPU.ptr += index * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); rhi.device->CreateUnorderedAccessView(resource, NULL, &desc, handleGPU); // make a CPU-visible copy at the same offset in another heap to enable UAV clears D3D12_CPU_DESCRIPTOR_HANDLE handleCPU = dr.genericCPUDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); handleCPU.ptr += index * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); rhi.device->CreateUnorderedAccessView(resource, NULL, &desc, handleCPU); return index; } static uint32_t CreateCBV(D3D12_CONSTANT_BUFFER_VIEW_DESC& desc) { if(!rhi.useDynamicResources) { return rhi.descHeapGeneric.CreateCBV(desc); } DynamicResources& dr = rhi.dynamicResources; const uint32_t index = dr.cbvIndex.Allocate(); D3D12_CPU_DESCRIPTOR_HANDLE handle = dr.genericDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); handle.ptr += index * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); rhi.device->CreateConstantBufferView(&desc, handle); return index; } static uint32_t CreateSampler(D3D12_SAMPLER_DESC& desc) { if(!rhi.useDynamicResources) { return rhi.descHeapSamplers.CreateSampler(desc); } DynamicResources& dr = rhi.dynamicResources; const uint32_t index = dr.samplerIndex.Allocate(); D3D12_CPU_DESCRIPTOR_HANDLE handle = dr.samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); handle.ptr += index * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); rhi.device->CreateSampler(&desc, handle); return index; } static void CreateBufferDescriptors(Buffer& buffer) { const BufferDesc& rhiDesc = buffer.desc; ID3D12Resource* const resource = buffer.buffer; uint32_t srvIndex = InvalidDescriptorIndex; if(rhiDesc.initialState & ResourceStates::ShaderAccessBits) { D3D12_SHADER_RESOURCE_VIEW_DESC srv = {}; srv.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; srv.Buffer.FirstElement = 0; if(rhiDesc.structureByteCount > 0) { srv.Format = DXGI_FORMAT_UNKNOWN; srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; srv.Buffer.NumElements = rhiDesc.byteCount / rhiDesc.structureByteCount; srv.Buffer.StructureByteStride = rhiDesc.structureByteCount; srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; } else { srv.Format = DXGI_FORMAT_R32_TYPELESS; srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; srv.Buffer.NumElements = rhiDesc.byteCount / 4; srv.Buffer.StructureByteStride = 0; srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; } srvIndex = CreateSRV(resource, srv, rhiDesc.useSrvIndex0); } else if(rhiDesc.initialState & ResourceStates::RaytracingASBit) { D3D12_SHADER_RESOURCE_VIEW_DESC srv = {}; srv.ViewDimension = D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE; srv.Format = DXGI_FORMAT_UNKNOWN; srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; srv.RaytracingAccelerationStructure.Location = buffer.gpuAddress; srvIndex = CreateSRV(resource, srv, false); } uint32_t cbvIndex = InvalidDescriptorIndex; if(rhiDesc.initialState & ResourceStates::ConstantBufferBit) { D3D12_CONSTANT_BUFFER_VIEW_DESC cbv = {}; cbv.BufferLocation = resource->GetGPUVirtualAddress(); cbv.SizeInBytes = rhiDesc.byteCount; cbvIndex = CreateCBV(cbv); } uint32_t uavIndex = InvalidDescriptorIndex; if(rhiDesc.initialState & ResourceStates::UnorderedAccessBit) { D3D12_UNORDERED_ACCESS_VIEW_DESC uav = {}; uav.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; uav.Buffer.CounterOffsetInBytes = 0; uav.Buffer.FirstElement = 0; if(rhiDesc.structureByteCount > 0) { uav.Format = DXGI_FORMAT_UNKNOWN; uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; uav.Buffer.NumElements = rhiDesc.byteCount / rhiDesc.structureByteCount; uav.Buffer.StructureByteStride = rhiDesc.structureByteCount; } else { uav.Format = DXGI_FORMAT_R32_TYPELESS; uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; uav.Buffer.NumElements = rhiDesc.byteCount / 4; uav.Buffer.StructureByteStride = 0; } uavIndex = CreateUAV(resource, uav); } buffer.cbvIndex = cbvIndex; buffer.uavIndex = uavIndex; buffer.srvIndex = srvIndex; } static void CreateTextureDescriptors(Texture& texture) { const TextureDesc& rhiDesc = texture.desc; ID3D12Resource* const resource = texture.texture; if(rhiDesc.allowedState & ResourceStates::ShaderAccessBits) { D3D12_SHADER_RESOURCE_VIEW_DESC srv = {}; srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srv.Format = GetD3DFormat(rhiDesc.format); srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; srv.Texture2D.MipLevels = rhiDesc.mipCount; srv.Texture2D.MostDetailedMip = 0; srv.Texture2D.PlaneSlice = 0; srv.Texture2D.ResourceMinLODClamp = 0.0f; if(rhiDesc.format == TextureFormat::Depth24_Stencil8) { srv.Format = DXGI_FORMAT_R24_UNORM_X8_TYPELESS; } if(rhiDesc.depth > 1) { srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; srv.Texture3D.MipLevels = rhiDesc.mipCount; srv.Texture3D.MostDetailedMip = 0; srv.Texture3D.ResourceMinLODClamp = 0.0f; } texture.srvIndex = CreateSRV(resource, srv, false); } else { texture.srvIndex = InvalidDescriptorIndex; } if(rhiDesc.allowedState & ResourceStates::UnorderedAccessBit) { for(uint32_t m = 0; m < rhiDesc.mipCount; ++m) { D3D12_UNORDERED_ACCESS_VIEW_DESC uav = {}; uav.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; uav.Format = GetD3DFormat(rhiDesc.format); uav.Texture2D.MipSlice = m; uav.Texture2D.PlaneSlice = 0; if(rhiDesc.depth > 1) { uav.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; uav.Texture3D.MipSlice = m; uav.Texture3D.FirstWSlice = 0; uav.Texture3D.WSize = UINT(~0); // get access to all slices } texture.mips[m].uavIndex = CreateUAV(resource, uav); } } else { for(uint32_t m = 0; m < rhiDesc.mipCount; ++m) { texture.mips[m].uavIndex = InvalidDescriptorIndex; } } } static void InitDynamicResourceDescriptorRanges() { const uint32_t uavStart = DynamicResources::MaxDescriptorsSRV; const uint32_t cbvStart = uavStart + DynamicResources::MaxDescriptorsUAV; DynamicResources& dr = rhi.dynamicResources; dr.srvIndex.Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, DynamicResources::MaxDescriptorsSRV); dr.uavIndex.Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, uavStart, DynamicResources::MaxDescriptorsUAV); dr.cbvIndex.Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, cbvStart, DynamicResources::MaxDescriptorsCBV); dr.samplerIndex.Init(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1024); } static void CreateDynamicResources() { if(!rhi.useDynamicResources) { return; } const uint32_t genericDescriptorCount = DynamicResources::MaxDescriptorsGeneric; const uint32_t samplerDescriptorCount = DynamicResources::MaxDescriptorsSamplers; DynamicResources& dr = rhi.dynamicResources; InitDynamicResourceDescriptorRanges(); { // keep 1.0 behavior for max. flexibility, // effectively disabling optimizations const D3D12_DESCRIPTOR_RANGE_FLAGS genericRangeFlags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE | D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE; const D3D12_DESCRIPTOR_RANGE_FLAGS samplerRangeFlags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE; const DynamicResources::DescriptorRange generic[3] = { dr.srvIndex, dr.uavIndex, dr.cbvIndex }; D3D12_DESCRIPTOR_RANGE1 mainRanges[3] = {}; for(uint32_t i = 0; i < ARRAY_LEN(generic); ++i) { mainRanges[i].RangeType = generic[i].type; mainRanges[i].BaseShaderRegister = generic[i].start; mainRanges[i].NumDescriptors = generic[i].count; mainRanges[i].RegisterSpace = 0; mainRanges[i].OffsetInDescriptorsFromTableStart = generic[i].start; mainRanges[i].Flags = genericRangeFlags; } D3D12_DESCRIPTOR_RANGE1 samplerRange = {}; samplerRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; samplerRange.BaseShaderRegister = 0; samplerRange.NumDescriptors = samplerDescriptorCount; samplerRange.RegisterSpace = 0; samplerRange.OffsetInDescriptorsFromTableStart = 0; samplerRange.Flags = samplerRangeFlags; D3D12_ROOT_PARAMETER1 rootParameters[3] = {}; rootParameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; rootParameters[0].DescriptorTable.NumDescriptorRanges = ARRAY_LEN(mainRanges); rootParameters[0].DescriptorTable.pDescriptorRanges = mainRanges; rootParameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; rootParameters[1].DescriptorTable.NumDescriptorRanges = 1; rootParameters[1].DescriptorTable.pDescriptorRanges = &samplerRange; rootParameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; rootParameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; rootParameters[2].Constants.Num32BitValues = 64 - 2; // all the available space minus the 2 tables rootParameters[2].Constants.ShaderRegister = 0; // access the RC at register b0 D3D12_VERSIONED_ROOT_SIGNATURE_DESC desc; desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1; desc.Desc_1_1.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED | D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS | D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS | D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS; desc.Desc_1_1.NumParameters = ARRAY_LEN(rootParameters); desc.Desc_1_1.pParameters = rootParameters; desc.Desc_1_1.NumStaticSamplers = 0; desc.Desc_1_1.pStaticSamplers = NULL; ID3DBlob* blob; ID3DBlob* errorBlob; if(FAILED(D3D12SerializeVersionedRootSignature(&desc, &blob, &errorBlob))) { ri.Error(ERR_FATAL, "Root signature creation failed!\n%s\n", (const char*)errorBlob->GetBufferPointer()); } COM_RELEASE(errorBlob); ID3D12RootSignature* signature; D3D(rhi.device->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(&signature))); COM_RELEASE(blob); SetDebugName(signature, "Uber", D3DResourceType::RootSignature); dr.rootSignature = signature; } { ID3D12DescriptorHeap* heap; D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; heapDesc.NumDescriptors = genericDescriptorCount; heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; D3D(rhi.device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&heap))); SetDebugName(heap, "Uber GPU generic", D3DResourceType::DescriptorHeap); dr.genericDescriptorHeap = heap; heapDesc.NumDescriptors = samplerDescriptorCount; heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; D3D(rhi.device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&heap))); SetDebugName(heap, "Uber GPU sampler", D3DResourceType::DescriptorHeap); dr.samplerDescriptorHeap = heap; heapDesc.NumDescriptors = genericDescriptorCount; heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; D3D(rhi.device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&heap))); SetDebugName(heap, "Uber CPU generic", D3DResourceType::DescriptorHeap); dr.genericCPUDescriptorHeap = heap; } } static void DestroyDynamicResources() { DynamicResources& dr = rhi.dynamicResources; COM_RELEASE(dr.rootSignature); COM_RELEASE(dr.genericDescriptorHeap); COM_RELEASE(dr.samplerDescriptorHeap); COM_RELEASE(dr.genericCPUDescriptorHeap); } static void BindDynamicResources() { if(!rhi.useDynamicResources) { return; } // @NOTE: Set*RootSignature must be called after SetDescriptorHeaps DynamicResources& dr = rhi.dynamicResources; ID3D12DescriptorHeap* heaps[] = { dr.genericDescriptorHeap, dr.samplerDescriptorHeap }; rhi.commandList->SetDescriptorHeaps(ARRAY_LEN(heaps), heaps); if(rhi.commandList->GetType() == D3D12_COMMAND_LIST_TYPE_DIRECT) { rhi.commandList->SetGraphicsRootSignature(dr.rootSignature); rhi.commandList->SetGraphicsRootDescriptorTable(0, dr.genericDescriptorHeap->GetGPUDescriptorHandleForHeapStart()); rhi.commandList->SetGraphicsRootDescriptorTable(1, dr.samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart()); } rhi.commandList->SetComputeRootSignature(dr.rootSignature); rhi.commandList->SetComputeRootDescriptorTable(0, dr.genericDescriptorHeap->GetGPUDescriptorHandleForHeapStart()); rhi.commandList->SetComputeRootDescriptorTable(1, dr.samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart()); } static void UpdateDynamicResources() { if(!rhi.useDynamicResources) { return; } InitDynamicResourceDescriptorRanges(); Texture* texture; HTexture htexture; for(int i = 0; rhi.textures.FindNext(&texture, &htexture, &i);) { CreateTextureDescriptors(*texture); } Buffer* buffer; HBuffer hbuffer; for(int i = 0; rhi.buffers.FindNext(&buffer, &hbuffer, &i);) { CreateBufferDescriptors(*buffer); } } static void SetRootConstants(uint32_t byteOffset, uint32_t byteCount, const void* constants, bool graphics) { Q_assert(CanWriteCommands()); Q_assert(byteCount > 0); Q_assert(byteCount % 4 == 0); Q_assert(byteOffset % 4 == 0); Q_assert(byteOffset + byteCount <= 256 - 2 * 4); // 8 bytes are used for the 2 descriptor tables Q_assert(constants); // parameter index is 2 because the 2 descriptor tables come first if(graphics) { rhi.commandList->SetGraphicsRoot32BitConstants(2, (UINT)byteCount / 4, constants, (UINT)byteOffset / 4); } else { rhi.commandList->SetComputeRoot32BitConstants(2, (UINT)byteCount / 4, constants, (UINT)byteOffset / 4); } } static void Barrier(uint32_t texCount, const TextureBarrier* textures, uint32_t buffCount, const BufferBarrier* buffers) { Q_assert(CanWriteCommands()); static D3D12_RESOURCE_BARRIER barriers[MAX_DRAWIMAGES * 2]; Q_assert(buffCount + texCount <= ARRAY_LEN(barriers)); UINT barrierCount = 0; for(uint32_t i = 0; i < texCount; ++i) { Q_assert(!IsNullHandle(textures[i].texture)); Texture& texture = rhi.textures.Get(textures[i].texture); if(SetBarrier(texture.currentState, barriers[barrierCount], textures[i].newState, texture.texture)) { barrierCount++; } } for(uint32_t i = 0; i < buffCount; ++i) { Q_assert(!IsNullHandle(buffers[i].buffer)); Buffer& buffer = rhi.buffers.Get(buffers[i].buffer); if(SetBarrier(buffer.currentState, barriers[barrierCount], buffers[i].newState, buffer.buffer)) { barrierCount++; } } if(barrierCount > 0) { rhi.commandList->ResourceBarrier(barrierCount, barriers); } } static void EnsureBufferIsThisLarge(HBuffer& hbuffer, const char* name, ResourceStates::Flags state, uint32_t byteCount) { uint32_t oldByteCount = 0; if(!IsNullHandle(hbuffer)) { const Buffer& buffer = rhi.buffers.Get(hbuffer); if(buffer.desc.byteCount >= byteCount) { return; } oldByteCount = buffer.desc.byteCount; } byteCount = max(byteCount, 2 * oldByteCount); DestroyBufferDelayed(hbuffer); BufferDesc desc(name, byteCount, state); desc.shortLifeTime = true; hbuffer = CreateBuffer(desc); } static void UpdateGPUIndexRangeAndHelp() { Cvar_SetRange(r_gpuIndex->name, r_gpuIndex->type, "0", va("%d", rhi.gpuCount)); char values[256]; StringList stringList; stringList.Init(values, sizeof(values)); stringList.Append("0"); stringList.Append("Default GPU"); stringList.Append(""); for(uint32_t i = 0; i < rhi.gpuCount; ++i) { stringList.Append(va("%d", (int)i + 1)); stringList.Append(rhi.gpus[i].name); stringList.Append(""); } stringList.Terminate(); Cvar_SetMenuData(r_gpuIndex->name, CVARCAT_DISPLAY | CVARCAT_PERFORMANCE, "GPU selection", "Choose the GPU to use", "", values); } #if defined(RHI_ENABLE_NVAPI) && defined(RHI_ENABLE_NVAPI_RT_VALIDATION) static void __stdcall NVAPI_RTValidationCallback(void* pUserData, NVAPI_D3D12_RAYTRACING_VALIDATION_MESSAGE_SEVERITY severity, const char* messageCode, const char* message, const char* messageDetails) { const char* severityString = "unknown"; switch(severity) { case NVAPI_D3D12_RAYTRACING_VALIDATION_MESSAGE_SEVERITY_ERROR: severityString = "error"; break; case NVAPI_D3D12_RAYTRACING_VALIDATION_MESSAGE_SEVERITY_WARNING: severityString = "warning"; break; } OutputDebugStringA(va("NVAPI RT: %s: [%s] %s\n%s\n", severityString, messageCode, message, messageDetails)); } #endif static void DrawResourceUsage() { if(BeginTable("Handles", 3)) { TableHeader(3, "Type", "Count", "Max"); #define ITEM(Name, Variable) TableRow(3, Name, va("%d", (int)Variable.CountUsedSlots()), va("%d", (int)Variable.size)) ITEM("Buffers", rhi.buffers); ITEM("Textures", rhi.textures); ITEM("Root Signatures", rhi.rootSignatures); ITEM("Descriptor Tables", rhi.descriptorTables); ITEM("Pipelines", rhi.pipelines); ITEM("Shaders", rhi.shaders); ITEM("Samplers", rhi.samplers); #undef ITEM TableRow(3, "Duration Queries", va("%d", rhi.frameQueries[rhi.frameIndex].durationQueryCount), va("%d", MaxDurationQueries)); ImGui::EndTable(); } ImGui::NewLine(); if(BeginTable("Descriptors", 3)) { TableHeader(3, "Type", "Count", "Max"); #define ITEM(Name, Variable) TableRow(3, Name, va("%d", (int)Variable.allocatedItemCount), va("%d", (int)Variable.size)) #define DRITEM(Name, Index) TableRow(3, Name, va("%d", (int)(Index.index - Index.start)), va("%d", (int)Index.count)); if(rhi.useDynamicResources) { const DynamicResources& dr = rhi.dynamicResources; DRITEM("GPU SRV", dr.srvIndex); DRITEM("GPU UAV", dr.uavIndex); DRITEM("GPU CBV", dr.cbvIndex); DRITEM("GPU Samplers", dr.samplerIndex); } else { ITEM("CPU CBV/SRV/UAV", rhi.descHeapGeneric.freeList); ITEM("CPU Samplers", rhi.descHeapSamplers.freeList); } ITEM("CPU RTV", rhi.descHeapRTVs.freeList); ITEM("CPU DSV", rhi.descHeapDSVs.freeList); #undef DRITEM #undef ITEM ImGui::EndTable(); } ImGui::NewLine(); if(BeginTable("Memory", 2)) { D3D12MA::Budget budget; rhi.allocator->GetBudget(&budget, NULL); TableRow2("UMA", rhi.allocator->IsUMA()); TableRow2("Cache coherent UMA", rhi.allocator->IsCacheCoherentUMA()); TableRow(2, "Total", Com_FormatBytes(rhi.allocator->GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_LOCAL))); TableRow(2, "Budget", Com_FormatBytes(budget.BudgetBytes)); TableRow(2, "Usage", Com_FormatBytes(budget.UsageBytes)); TableRow(2, "Allocated", Com_FormatBytes(budget.Stats.BlockBytes)); TableRow(2, "Used", Com_FormatBytes(budget.Stats.AllocationBytes)); TableRow(2, "Block count", va("%d", budget.Stats.BlockCount)); TableRow(2, "Allocation count", va("%d", budget.Stats.AllocationCount)); ImGui::EndTable(); } } static void DrawCaps() { if(BeginTable("Capabilities", 2)) { TableRow(2, "Adapter", rhi.adapterName); TableRow(2, "Driver version", rhi.umdVersionString); D3D12_FEATURE_DATA_D3D12_OPTIONS options0 = { 0 }; if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options0, sizeof(options0)))) { const char* tier = "Unknown"; switch(options0.ResourceBindingTier) { case D3D12_RESOURCE_BINDING_TIER_1: tier = "1"; break; case D3D12_RESOURCE_BINDING_TIER_2: tier = "2"; break; case D3D12_RESOURCE_BINDING_TIER_3: tier = "3"; break; default: break; } TableRow(2, "Resource binding tier", tier); } D3D12_FEATURE_DATA_D3D12_OPTIONS2 options2 = { 0 }; if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS2, &options2, sizeof(options2)))) { TableRow2("Depth bounds test", options2.DepthBoundsTestSupported ? "YES" : "NO"); } D3D12_FEATURE_DATA_ARCHITECTURE arch0 = { 0 }; if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_ARCHITECTURE, &arch0, sizeof(arch0)))) { TableRow2("Tile-based renderer", arch0.TileBasedRenderer ? "YES" : "NO"); } D3D12_FEATURE_DATA_ROOT_SIGNATURE root0 = {}; if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_ROOT_SIGNATURE, &root0, sizeof(root0)))) { const char* version = "Unknown"; switch(root0.HighestVersion) { case D3D_ROOT_SIGNATURE_VERSION_1_0: version = "1.0"; break; case D3D_ROOT_SIGNATURE_VERSION_1_1: version = "1.1"; break; default: break; } TableRow(2, "Root signature version", version); } D3D12_FEATURE_DATA_D3D12_OPTIONS5 options5 = { 0 }; if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS5, &options5, sizeof(options5)))) { const char* tier = "Unknown"; switch(options5.RenderPassesTier) { case D3D12_RENDER_PASS_TIER_0: tier = "0"; break; case D3D12_RENDER_PASS_TIER_1: tier = "1"; break; case D3D12_RENDER_PASS_TIER_2: tier = "2"; break; default: break; } TableRow(2, "Render passes tier", tier); tier = "Unknown"; switch(options5.RaytracingTier) { case D3D12_RAYTRACING_TIER_NOT_SUPPORTED: tier = "Not supported"; break; case D3D12_RAYTRACING_TIER_1_0: tier = "1.0"; break; case D3D12_RAYTRACING_TIER_1_1: tier = "1.1"; break; default: break; } TableRow(2, "Raytracing (DXR) tier", tier); } D3D12_FEATURE_DATA_D3D12_OPTIONS6 options6 = { 0 }; if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS6, &options6, sizeof(options6)))) { const char* tier = "Unknown"; switch(options6.VariableShadingRateTier) { case D3D12_VARIABLE_SHADING_RATE_TIER_NOT_SUPPORTED: tier = "N/A"; break; case D3D12_VARIABLE_SHADING_RATE_TIER_1: tier = "1"; break; case D3D12_VARIABLE_SHADING_RATE_TIER_2: tier = "2"; break; default: break; } TableRow(2, "Variable-rate shading (VRS) tier", tier); TableRow(2, "VRS: 2x4, 4x2, 4x4 support", options6.AdditionalShadingRatesSupported ? "YES" : "NO"); } D3D12_FEATURE_DATA_SHADER_MODEL shaderModel = {}; shaderModel.HighestShaderModel = D3D_SHADER_MODEL_6_8; if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &shaderModel, sizeof(shaderModel)))) { const char* model = "Unknown"; switch(shaderModel.HighestShaderModel) { case D3D_SHADER_MODEL_6_0: model = "6.0"; break; case D3D_SHADER_MODEL_6_1: model = "6.1"; break; case D3D_SHADER_MODEL_6_2: model = "6.2"; break; case D3D_SHADER_MODEL_6_3: model = "6.3"; break; case D3D_SHADER_MODEL_6_4: model = "6.4"; break; case D3D_SHADER_MODEL_6_5: model = "6.5"; break; case D3D_SHADER_MODEL_6_6: model = "6.6"; break; case D3D_SHADER_MODEL_6_7: model = "6.7"; break; case D3D_SHADER_MODEL_6_8: model = "6.8"; break; default: break; } TableRow(2, "Shader model", model); } TableRow(2, "UMA", rhiInfo.isUMA ? "YES" : "NO"); TableRow(2, "Cache-coherent UMA", rhiInfo.isCacheCoherentUMA ? "YES" : "NO"); TableRow(2, "Barycentrics", rhiInfo.hasBarycentrics ? "YES" : "NO"); // the validation layer reports live objects at shutdown when NvAPI_D3D12_QueryCpuVisibleVidmem is called #if defined(RHI_ENABLE_NVAPI) if(rhi.nvapiActive) { NvU64 cvvTotal, cvvFree; if(NvAPI_D3D12_QueryCpuVisibleVidmem(rhi.device, &cvvTotal, &cvvFree) == NvAPI_Status::NVAPI_OK && cvvTotal > 0) { TableRow(2, "CPU Visible VRAM Total", Com_FormatBytes(cvvTotal)); TableRow(2, "CPU Visible VRAM Free", Com_FormatBytes(cvvFree)); } else { TableRow(2, "CPU Visible VRAM", "N/A"); } } #endif ImGui::EndTable(); } } static void DrawTextures() { static char filter[256]; if(ImGui::Button("Clear filter")) { filter[0] = '\0'; } ImGui::SameLine(); ImGui::InputText(" ", filter, ARRAY_LEN(filter)); if(BeginTable("Textures", 4)) { TableHeader(4, "Name", "State", "Size", "Format"); int i = 0; Texture* texture; HTexture htexture; while(rhi.textures.FindNext(&texture, &htexture, &i)) { if(filter[0] != '\0' && !Com_Filter(filter, texture->desc.name)) { continue; } const D3D12_RESOURCE_DESC desc = texture->texture->GetDesc(); const uint64_t byteCount = texture->allocation != NULL ? texture->allocation->GetSize() : 0; TableRow(4, texture->desc.name, GetNameForD3DResourceStates(texture->currentState), Com_FormatBytes(byteCount), GetNameForD3DFormat(desc.Format)); } ImGui::EndTable(); } } static void DrawBuffers() { static char filter[256]; if(ImGui::Button("Clear filter")) { filter[0] = '\0'; } ImGui::SameLine(); ImGui::InputText(" ", filter, ARRAY_LEN(filter)); if(BeginTable("Buffers", 4)) { TableHeader(4, "Buffer", "State", "Heap", "Size"); int i = 0; Buffer* buffer; HBuffer hbuffer; while(rhi.buffers.FindNext(&buffer, &hbuffer, &i)) { if(filter[0] != '\0' && !Com_Filter(filter, buffer->desc.name)) { continue; } TableRow(4, buffer->desc.name, GetNameForD3DResourceStates(buffer->currentState), GetResourceHeapName(buffer->buffer), Com_FormatBytes(buffer->allocation->GetSize())); } ImGui::EndTable(); } } typedef void (*UICallback)(); static void DrawSection(const char* name, UICallback callback) { if(ImGui::BeginTabItem(name)) { (*callback)(); ImGui::EndTabItem(); } } static void DrawGUI() { static bool resourcesActive = false; ToggleBooleanWithShortcut(resourcesActive, ImGuiKey_R); GUI_AddMainMenuItem(GUI_MainMenu::Info, "RHI Resources", "Ctrl+R", &resourcesActive); if(resourcesActive) { if(ImGui::Begin("Direct3D 12 RHI", &resourcesActive)) { ImGui::BeginTabBar("Tabs#RHI"); DrawSection("Resources", &DrawResourceUsage); DrawSection("Caps", &DrawCaps); DrawSection("Textures", &DrawTextures); DrawSection("Buffers", &DrawBuffers); ImGui::EndTabBar(); } ImGui::End(); } } bool Init(const InitDesc& initDesc) { Sys_V_Init(); if(rhi.device != NULL) { rhi.raytracingScratchBuffer = RHI_MAKE_NULL_HANDLE(); rhi.raytracingInstanceBuffer = RHI_MAKE_NULL_HANDLE(); DXGI_SWAP_CHAIN_DESC desc; D3D(rhi.swapChain->GetDesc(&desc)); // V-Sync toggles require changing the swap chain flags, // which means ResizeBuffers can't be used const bool vsync = r_vsync->integer != 0; rhi.renderFrameCount = vsync ? 1 : 2; if(glInfo.winWidth != desc.BufferDesc.Width || glInfo.winHeight != desc.BufferDesc.Height || vsync != rhi.vsync) { WaitUntilDeviceIsIdle(); for(uint32_t f = 0; f < rhi.swapChainBufferCount; ++f) { DestroyTexture(rhi.renderTargets[f]); } const UINT flags = GetSwapChainFlags(); if(vsync == rhi.vsync) { D3D(rhi.swapChain->ResizeBuffers(desc.BufferCount, glInfo.winWidth, glInfo.winHeight, desc.BufferDesc.Format, flags)); } else { if(rhi.frameLatencyWaitableObject != NULL) { CloseHandle(rhi.frameLatencyWaitableObject); rhi.frameLatencyWaitableObject = NULL; } COM_RELEASE(rhi.swapChain); IDXGISwapChain* dxgiSwapChain; DXGI_SWAP_CHAIN_DESC swapChainDesc = { 0 }; swapChainDesc.BufferCount = rhi.swapChainBufferCount; swapChainDesc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; swapChainDesc.BufferDesc.Width = glInfo.winWidth; swapChainDesc.BufferDesc.Height = glInfo.winHeight; swapChainDesc.BufferDesc.RefreshRate.Numerator = 0; swapChainDesc.BufferDesc.RefreshRate.Denominator = 1; swapChainDesc.BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED; swapChainDesc.BufferDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED; swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; swapChainDesc.Flags = flags; swapChainDesc.OutputWindow = GetActiveWindow(); swapChainDesc.SampleDesc.Count = 1; swapChainDesc.SampleDesc.Quality = 0; swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; swapChainDesc.Windowed = TRUE; D3D(rhi.factory->CreateSwapChain(rhi.mainCommandQueue, &swapChainDesc, &dxgiSwapChain)); D3D(dxgiSwapChain->QueryInterface(IID_PPV_ARGS(&rhi.swapChain))); COM_RELEASE(dxgiSwapChain); if(vsync) { rhi.frameLatencyWaitableObject = rhi.swapChain->GetFrameLatencyWaitableObject(); rhi.frameLatencyWaitNeeded = true; D3D(rhi.swapChain->SetMaximumFrameLatency(1)); } } GrabSwapChainTextures(); rhi.swapChainBufferIndex = rhi.swapChain->GetCurrentBackBufferIndex(); for(uint32_t f = 0; f < FrameCount; ++f) { rhi.mainFenceValues[f] = 0; } rhi.readback.ResizeIfNeeded(); } GetMonitorRefreshRate(); rhi.tempStringAllocator.Clear(); rhi.vsync = vsync; UpdateDynamicResources(); UpdateGPUIndexRangeAndHelp(); return false; } // @NOTE: we can't use memset because of the StaticPool members new (&rhi) RHIPrivate(); rhi.useDynamicResources = initDesc.directDescriptorHeapIndexing; // check for the presence of our 3 DLLs ASAP { HMODULE coreModule = LoadLibraryA("cnq3/D3D12Core.dll"); if(coreModule == NULL) { ri.Error(ERR_FATAL, "Failed to locate/open cnq3/D3D12Core.dll\n"); } FreeLibrary(coreModule); rhi.dxilModule = LoadLibraryA("cnq3/dxil.dll"); if(rhi.dxilModule == NULL) { ri.Error(ERR_FATAL, "Failed to locate/open cnq3/dxil.dll\n"); } rhi.dxcModule = LoadLibraryA("cnq3/dxcompiler.dll"); if(rhi.dxcModule == NULL) { ri.Error(ERR_FATAL, "Failed to locate/open cnq3/dxcompiler.dll\n"); } } rhi.persStringAllocator.Init(rhi.persStringData, sizeof(rhi.persStringData)); rhi.tempStringAllocator.Init(rhi.tempStringData, sizeof(rhi.tempStringData)); #if defined(D3D_DEBUG) if(SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&rhi.debug)))) { // calling after device creation will remove the device // if you hit this error: // "D3D12 SDKLayers dll does not match the D3D12SDKVersion of D3D12 Core dll." // make sure your D3D12SDKVersion and D3D12SDKPath are valid! rhi.debug->EnableDebugLayer(); #if defined(D3D_GPU_BASED_VALIDATION) ID3D12Debug1* debug1; if(SUCCEEDED(rhi.debug->QueryInterface(IID_PPV_ARGS(&debug1)))) { debug1->SetEnableGPUBasedValidation(TRUE); debug1->SetEnableSynchronizedCommandQueueValidation(TRUE); } #endif } UINT dxgiFactoryFlags = 0; if(SUCCEEDED(DXGIGetDebugInterface1(0, IID_PPV_ARGS(&rhi.dxgiInfoQueue)))) { dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG; rhi.dxgiInfoQueue->SetBreakOnSeverity(DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, TRUE); rhi.dxgiInfoQueue->SetBreakOnSeverity(DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, TRUE); } #endif #if defined(D3D_DEBUG) D3D(CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(&rhi.factory))); #else D3D(CreateDXGIFactory1(IID_PPV_ARGS(&rhi.factory))); #endif CreateAdapterList(); if(r_gpuIndex->integer > 0) { rhi.adapter = GetAdapterAtIndex(r_gpuIndex->integer - 1); } if(rhi.adapter == NULL) { rhi.adapter = FindMostSuitableAdapter(rhi.factory, r_gpuPreference->integer); } { char adapterName[256]; const char* adapterNamePtr = "unknown"; DXGI_ADAPTER_DESC1 desc; if(SUCCEEDED(rhi.adapter->GetDesc1(&desc)) && WideCharToMultiByte(CP_UTF8, 0, desc.Description, -1, adapterName, sizeof(adapterName), NULL, NULL) > 0) { adapterNamePtr = adapterName; } ri.Printf(PRINT_ALL, "Selected graphics adapter: %s\n", adapterNamePtr); Q_strncpyz(rhi.adapterName, adapterNamePtr, sizeof(rhi.adapterName)); } UpdateGPUIndexRangeAndHelp(); D3D(D3D12CreateDevice(rhi.adapter, FeatureLevel, IID_PPV_ARGS(&rhi.device))); rhi.vendorId = VENDORID_INVALID; { DXGI_ADAPTER_DESC desc; if(SUCCEEDED(rhi.adapter->GetDesc(&desc))) { rhi.vendorId = desc.VendorId; } } { LARGE_INTEGER driverVersion; if(SUCCEEDED(rhi.adapter->CheckInterfaceSupport(__uuidof(IDXGIDevice), &driverVersion))) { rhi.umdVersionSplit[0] = driverVersion.QuadPart >> 48; rhi.umdVersionSplit[1] = (driverVersion.QuadPart >> 32) & 0xFFFF; rhi.umdVersionSplit[2] = (driverVersion.QuadPart >> 16) & 0xFFFF; rhi.umdVersionSplit[3] = driverVersion.QuadPart & 0xFFFF; rhi.umdVersion = driverVersion.QuadPart; Com_sprintf(rhi.umdVersionString, sizeof(rhi.umdVersionString), "%d.%d.%d.%d", (int)rhi.umdVersionSplit[0], (int)rhi.umdVersionSplit[1], (int)rhi.umdVersionSplit[2], (int)rhi.umdVersionSplit[3]); } } // with direct descriptor heap indexing on Intel, // *Texture*.GetDimensions() calls return incorrect values // UMD version 31.0.101.5382 (0x001F000000651506) is still broken // UMD version 31.0.101.5444 (0X001F000000651544) is the first fixed version if(initDesc.directDescriptorHeapIndexing && rhi.vendorId == VENDORID_INTEL && rhi.umdVersion < 0X001F000000651544) { ri.Error(ERR_FATAL, "This Intel graphics driver is known to have incorrect behavior.\n" "Version 31.0.101.5444 (April 19, 2024) or newer is required.\n"); } #if defined(RHI_ENABLE_AFTERMATH) rhi.aftermathActive = false; if(rhi.vendorId == VENDORID_NVIDIA) { const GFSDK_Aftermath_FeatureFlags flags = GFSDK_Aftermath_FeatureFlags_Maximum; const GFSDK_Aftermath_Result result = GFSDK_Aftermath_DX12_Initialize(GFSDK_Aftermath_Version_API, flags, rhi.device); if(result == GFSDK_Aftermath_Result_Success) { rhi.aftermathActive = true; } Q_assert(rhi.aftermathActive); } #endif { D3D12_INDIRECT_ARGUMENT_DESC argDesc = {}; argDesc.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH; D3D12_COMMAND_SIGNATURE_DESC desc = {}; desc.ByteStride = 12; desc.NodeMask = 0; desc.NumArgumentDescs = 1; desc.pArgumentDescs = &argDesc; D3D(rhi.device->CreateCommandSignature(&desc, NULL, IID_PPV_ARGS(&rhi.indirectDispatchSignature))); } if(rhi.useDynamicResources) { D3D12_FEATURE_DATA_D3D12_OPTIONS options0 = {}; if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options0, sizeof(options0)))) { if(options0.ResourceBindingTier < D3D12_RESOURCE_BINDING_TIER_3) { ri.Error(ERR_FATAL, "The CRP requires Resource Binding Tier 3 capable hardware\n"); } } D3D12_FEATURE_DATA_SHADER_MODEL shaderModel = {}; shaderModel.HighestShaderModel = D3D_SHADER_MODEL_6_6; if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &shaderModel, sizeof(shaderModel)))) { if(shaderModel.HighestShaderModel < D3D_SHADER_MODEL_6_6) { ri.Error(ERR_FATAL, "The CRP requires Shader Model 6.6 capable hardware\n"); } } } { D3D12MA::ALLOCATOR_DESC desc = {}; desc.pDevice = rhi.device; desc.pAdapter = rhi.adapter; desc.Flags = D3D12MA::ALLOCATOR_FLAG_SINGLETHREADED; D3D(D3D12MA::CreateAllocator(&desc, &rhi.allocator)); } if(rhi.allocator->IsCacheCoherentUMA()) { D3D12MA::POOL_DESC poolDesc = {}; poolDesc.HeapProperties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE; poolDesc.HeapProperties.CreationNodeMask = 0; poolDesc.HeapProperties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0; // system poolDesc.HeapProperties.Type = D3D12_HEAP_TYPE_CUSTOM; poolDesc.HeapProperties.VisibleNodeMask = 0; poolDesc.HeapFlags = D3D12_HEAP_FLAG_NONE; poolDesc.Flags = D3D12MA::POOL_FLAG_NONE; D3D12MA::Pool* pool; if(SUCCEEDED(rhi.allocator->CreatePool(&poolDesc, &pool))) { rhi.umaPool = pool; } } #if defined(D3D_DEBUG) if(rhi.debug) { rhi.device->QueryInterface(IID_PPV_ARGS(&rhi.infoQueue)); if(rhi.infoQueue) { rhi.infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, TRUE); rhi.infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE); rhi.infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE); D3D12_MESSAGE_ID filteredMessages[] = { // can't remember what this one is for... //D3D12_MESSAGE_ID_SETPRIVATEDATA_CHANGINGPARAMS, // clear color mismatch will happen when going through a teleporter D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE }; D3D12_INFO_QUEUE_FILTER filter = { 0 }; filter.DenyList.NumIDs = ARRAY_LEN(filteredMessages); filter.DenyList.pIDList = filteredMessages; rhi.infoQueue->AddStorageFilterEntries(&filter); } } if(rhi.debug) { ID3D12DebugDevice1* debugDevice1; if(SUCCEEDED(rhi.device->QueryInterface(IID_PPV_ARGS(&debugDevice1)))) { // defaults: // D3D12_GPU_BASED_VALIDATION_SHADER_PATCH_MODE_UNGUARDED_VALIDATION // 256 // D3D12_GPU_BASED_VALIDATION_PIPELINE_STATE_CREATE_FLAG_NONE D3D12_DEBUG_DEVICE_GPU_BASED_VALIDATION_SETTINGS gbv = {}; gbv.DefaultShaderPatchMode = D3D12_GPU_BASED_VALIDATION_SHADER_PATCH_MODE_GUARDED_VALIDATION; gbv.MaxMessagesPerCommandList = 1024; // defaults to 256 gbv.PipelineStateCreateFlags = D3D12_GPU_BASED_VALIDATION_PIPELINE_STATE_CREATE_FLAG_FRONT_LOAD_CREATE_GUARDED_VALIDATION_SHADERS; debugDevice1->SetDebugParameter(D3D12_DEBUG_DEVICE_PARAMETER_GPU_BASED_VALIDATION_SETTINGS, &gbv, sizeof(gbv)); // default: D3D12_DEBUG_FEATURE_NONE const D3D12_DEBUG_FEATURE features = D3D12_DEBUG_FEATURE_ALLOW_BEHAVIOR_CHANGING_DEBUG_AIDS | D3D12_DEBUG_FEATURE_CONSERVATIVE_RESOURCE_STATE_TRACKING; debugDevice1->SetDebugParameter(D3D12_DEBUG_DEVICE_PARAMETER_FEATURE_FLAGS, &features, sizeof(features)); COM_RELEASE(debugDevice1); } } #endif { uint16_t* freeList = rhi.descriptorFreeListData; rhi.descHeapGeneric.Create(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, MaxCPUGenericDescriptors, freeList, "all-encompassing CBV SRV UAV"); freeList += MaxCPUGenericDescriptors; rhi.descHeapSamplers.Create(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, MaxCPUSamplerDescriptors, freeList, "all-encompassing sampler"); freeList += MaxCPUSamplerDescriptors; rhi.descHeapRTVs.Create(D3D12_DESCRIPTOR_HEAP_TYPE_RTV, MaxCPURTVDescriptors, freeList, "all-encompassing RTV"); freeList += MaxCPURTVDescriptors; rhi.descHeapDSVs.Create(D3D12_DESCRIPTOR_HEAP_TYPE_DSV, MaxCPUDSVDescriptors, freeList, "all-encompassing DSV"); } { D3D12_COMMAND_QUEUE_DESC commandQueueDesc = { 0 }; commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; commandQueueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; commandQueueDesc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL; commandQueueDesc.NodeMask = 0; D3D(rhi.device->CreateCommandQueue(&commandQueueDesc, IID_PPV_ARGS(&rhi.mainCommandQueue))); SetDebugName(rhi.mainCommandQueue, "main", D3DResourceType::CommandQueue); commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_COMPUTE; D3D(rhi.device->CreateCommandQueue(&commandQueueDesc, IID_PPV_ARGS(&rhi.computeCommandQueue))); SetDebugName(rhi.computeCommandQueue, "compute", D3DResourceType::CommandQueue); } rhi.isTearingSupported = IsTearingSupported(); rhi.swapChainBufferCount = 2; rhi.renderFrameCount = r_vsync->integer ? 1 : 2; { const UINT flags = GetSwapChainFlags(); IDXGISwapChain* dxgiSwapChain; DXGI_SWAP_CHAIN_DESC swapChainDesc = { 0 }; swapChainDesc.BufferCount = rhi.swapChainBufferCount; swapChainDesc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; swapChainDesc.BufferDesc.Width = glInfo.winWidth; swapChainDesc.BufferDesc.Height = glInfo.winHeight; swapChainDesc.BufferDesc.RefreshRate.Numerator = 0; swapChainDesc.BufferDesc.RefreshRate.Denominator = 1; swapChainDesc.BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED; swapChainDesc.BufferDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED; swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; swapChainDesc.Flags = flags; swapChainDesc.OutputWindow = GetActiveWindow(); swapChainDesc.SampleDesc.Count = 1; swapChainDesc.SampleDesc.Quality = 0; swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; swapChainDesc.Windowed = TRUE; D3D(rhi.factory->CreateSwapChain(rhi.mainCommandQueue, &swapChainDesc, &dxgiSwapChain)); rhi.vsync = r_vsync->integer != 0; D3D(dxgiSwapChain->QueryInterface(IID_PPV_ARGS(&rhi.swapChain))); rhi.swapChainBufferIndex = rhi.swapChain->GetCurrentBackBufferIndex(); COM_RELEASE(dxgiSwapChain); if(r_vsync->integer) { rhi.frameLatencyWaitableObject = rhi.swapChain->GetFrameLatencyWaitableObject(); rhi.frameLatencyWaitNeeded = true; D3D(rhi.swapChain->SetMaximumFrameLatency(1)); } GrabSwapChainTextures(); } GetMonitorRefreshRate(); for(UINT f = 0; f < FrameCount; ++f) { D3D(rhi.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&rhi.mainCommandAllocators[f]))); SetDebugName(rhi.mainCommandAllocators[f], va("main #%d", f + 1), D3DResourceType::CommandAllocator); } // get command list ready to use during init D3D(rhi.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, rhi.mainCommandAllocators[rhi.frameIndex], NULL, IID_PPV_ARGS(&rhi.mainCommandList))); SetDebugName(rhi.mainCommandList, "main", D3DResourceType::CommandList); D3D(rhi.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE, IID_PPV_ARGS(&rhi.tempCommandAllocator))); SetDebugName(rhi.tempCommandAllocator, "temp", D3DResourceType::CommandAllocator); // the temp command list is always left open for the user D3D(rhi.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE, rhi.tempCommandAllocator, NULL, IID_PPV_ARGS(&rhi.tempCommandList))); SetDebugName(rhi.tempCommandList, "temp", D3DResourceType::CommandList); rhi.tempCommandListOpen = true; // the active/bound command list is the main one by default rhi.commandList = rhi.mainCommandList; rhi.mainFence.Create(rhi.mainFenceValues[rhi.frameIndex], "main command queue"); rhi.tempFence.Create(rhi.tempFenceValue, "temp command queue"); rhi.upload.Create(); rhi.readback.Create(); #if defined(RHI_ENABLE_AFTERMATH) if(rhi.aftermathActive) { Q_assert(rhi.mainCommandList != NULL); Q_assert(rhi.tempCommandList != NULL); GFSDK_Aftermath_Result r = GFSDK_Aftermath_Result_Fail; r = GFSDK_Aftermath_DX12_CreateContextHandle(rhi.mainCommandList, &rhi.aftermathMainCommandList); Q_assert(r == GFSDK_Aftermath_Result_Success); r = GFSDK_Aftermath_DX12_CreateContextHandle(rhi.tempCommandList, &rhi.aftermathTempCommandList); Q_assert(r == GFSDK_Aftermath_Result_Success); rhi.aftermathCommandList = rhi.aftermathMainCommandList; } #endif for(uint32_t f = 0; f < FrameCount; ++f) { D3D12_QUERY_HEAP_DESC desc = { 0 }; desc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; desc.Count = MaxDurationQueries * 2; desc.NodeMask = 0; D3D(rhi.device->CreateQueryHeap(&desc, IID_PPV_ARGS(&rhi.timeStampHeaps[f]))); SetDebugName(rhi.timeStampHeaps[f], va("timestamp #%d", f + 1), D3DResourceType::QueryHeap); } for(uint32_t f = 0; f < FrameCount; ++f) { const uint32_t byteCount = MaxDurationQueries * 2 * sizeof(UINT64); BufferDesc desc(va("timestamp readback #%d", f + 1), byteCount, ResourceStates::CopySourceBit); desc.memoryUsage = MemoryUsage::Readback; rhi.timeStampBuffers[f] = CreateBuffer(desc); } CreateDynamicResources(); CreateNullResources(); // queue some actual work... D3D(rhi.commandList->Close()); WaitUntilDeviceIsIdle(); #if defined(RHI_ENABLE_NVAPI) rhi.nvapiActive = false; if(rhi.vendorId == VENDORID_NVIDIA) { NvAPI_Status nr = NvAPI_Initialize(); if(nr == NvAPI_Status::NVAPI_OK) { NvAPI_ShortString version; if(NvAPI_GetInterfaceVersionString(version) == NvAPI_Status::NVAPI_OK) { ri.Printf(PRINT_ALL, "Opened nvapi.dll (%s)\n", version); } else { ri.Printf(PRINT_ALL, "Opened nvapi.dll\n"); } rhi.nvapiActive = true; #if defined(RHI_ENABLE_NVAPI_RT_VALIDATION) nr = NvAPI_D3D12_EnableRaytracingValidation(rhi.device, NVAPI_D3D12_RAYTRACING_VALIDATION_FLAG_NONE); Q_assert(nr == NvAPI_Status::NVAPI_OK); void* nvapiValidationCallbackHandle = NULL; nr = NvAPI_D3D12_RegisterRaytracingValidationMessageCallback(rhi.device, &NVAPI_RTValidationCallback, NULL, &nvapiValidationCallbackHandle); Q_assert(nr == NvAPI_Status::NVAPI_OK); #endif } else { NvAPI_ShortString desc; if(NvAPI_GetErrorMessage(nr, desc) == NvAPI_Status::NVAPI_OK) { ri.Printf(PRINT_WARNING, "Failed to load nvapi.dll: %s\n", desc); } else { ri.Printf(PRINT_WARNING, "Failed to load nvapi.dll\n"); } } } #endif rhi.pix.module = LoadLibraryA("cnq3/WinPixEventRuntime.dll"); if(rhi.pix.module != NULL) { rhi.pix.BeginEventOnCommandList = (PIX::BeginEventOnCommandListPtr)GetProcAddress(rhi.pix.module, "PIXBeginEventOnCommandList"); rhi.pix.EndEventOnCommandList = (PIX::EndEventOnCommandListPtr)GetProcAddress(rhi.pix.module, "PIXEndEventOnCommandList"); rhi.pix.SetMarkerOnCommandList = (PIX::SetMarkerOnCommandListPtr)GetProcAddress(rhi.pix.module, "PIXSetMarkerOnCommandList"); rhi.pix.canBeginAndEnd = rhi.pix.BeginEventOnCommandList != NULL && rhi.pix.EndEventOnCommandList != NULL; } typedef HRESULT (__stdcall* DxcCreateInstancePtr)(REFCLSID, REFIID, LPVOID*); DxcCreateInstancePtr dxcCreateInstance = (DxcCreateInstancePtr)GetProcAddress(rhi.dxcModule, "DxcCreateInstance"); if(dxcCreateInstance == NULL) { ri.Error(ERR_FATAL, "Failed to locate DxcCreateInstance in cnq3/dxcompiler.dll\n"); } D3D(dxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(&rhi.dxcUtils))); D3D(dxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&rhi.dxcCompiler))); { D3D12_FEATURE_DATA_D3D12_OPTIONS6 options6 = {}; if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS6, &options6, sizeof(options6)))) { rhi.baseVRSSupport = options6.VariableShadingRateTier != D3D12_VARIABLE_SHADING_RATE_TIER_NOT_SUPPORTED; rhi.extendedVRSSupport = rhi.baseVRSSupport && options6.AdditionalShadingRatesSupported; } const char* modeLists[] = { "1x1", "1x1 2x1 1x2 2x2", "1x1 2x1 1x2 2x2 4x2 2x4 4x4" }; const int listIndex = rhi.extendedVRSSupport ? 2 : (rhi.baseVRSSupport ? 1 : 0); ri.Printf(PRINT_ALL, "Supported VRS modes: %s\n", modeLists[listIndex]); } D3D12_FEATURE_DATA_D3D12_OPTIONS5 options5 = {}; const bool hasInlineRaytracing = SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS5, &options5, sizeof(options5))) && options5.RaytracingTier >= D3D12_RAYTRACING_TIER_1_1; D3D12_FEATURE_DATA_D3D12_OPTIONS3 options3 = {}; const bool hasBarycentrics = SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &options3, sizeof(options3))) && options3.BarycentricsSupported; glInfo.maxTextureSize = MAX_TEXTURE_SIZE; glInfo.maxAnisotropy = 16; glInfo.depthFadeSupport = qtrue; Q_strncpyz(glConfig.vendor_string, rhi.adapterName, sizeof(glConfig.vendor_string)); Q_strncpyz(glConfig.renderer_string, "Direct3D 12", sizeof(glConfig.renderer_string)); Q_strncpyz(rhiInfo.name, "Direct3D 12", sizeof(rhiInfo.name)); Q_strncpyz(rhiInfo.adapter, rhi.adapterName, sizeof(rhiInfo.adapter)); rhiInfo.hasTearing = rhi.isTearingSupported; rhiInfo.hasBaseVRS = rhi.baseVRSSupport; rhiInfo.hasExtendedVRS = rhi.extendedVRSSupport; rhiInfo.isUMA = rhi.allocator->IsUMA(); rhiInfo.isCacheCoherentUMA = rhi.allocator->IsCacheCoherentUMA(); rhiInfo.hasInlineRaytracing = hasInlineRaytracing; rhiInfo.hasBarycentrics = hasBarycentrics; rhiInfo.allocatedByteCount = 0; rhi.initialized = true; return true; } void ShutDown(bool destroyWindow) { #define DESTROY_POOL(Name, Func) DestroyPool(rhi.Name, &Func, !!destroyWindow); if(!destroyWindow && r_gpuPreference->latchedString != NULL && Q_stricmp(r_gpuPreference->latchedString, r_gpuPreference->string) != 0) { destroyWindow = true; } if(!destroyWindow && r_gpuIndex->latchedString != NULL && Q_stricmp(r_gpuIndex->latchedString, r_gpuIndex->string) != 0) { destroyWindow = true; } if(rhi.frameBegun) { backEnd.renderFrame = qfalse; EndFrame(); backEnd.renderFrame = qtrue; } rhi.buffersToDelete.Clear(); if(!destroyWindow) { WaitUntilDeviceIsIdle(); rhi.texturesToTransition.Clear(); rhi.buffersToTransition.Clear(); DESTROY_POOL_LIST(DESTROY_POOL); return; } rhi.initialized = false; FreeLibrary(rhi.pix.module); WaitUntilDeviceIsIdle(); if(destroyWindow) { DestroyDynamicResources(); } if(rhi.frameLatencyWaitableObject != NULL) { CloseHandle(rhi.frameLatencyWaitableObject); } free(rhi.rtGeoDescs); rhi.upload.Release(); rhi.readback.Release(); rhi.mainFence.Release(); rhi.tempFence.Release(); rhi.descHeapGeneric.Release(); rhi.descHeapSamplers.Release(); rhi.descHeapRTVs.Release(); rhi.descHeapDSVs.Release(); DESTROY_POOL_LIST(DESTROY_POOL); COM_RELEASE(rhi.indirectDispatchSignature); COM_RELEASE(rhi.dxcCompiler); COM_RELEASE(rhi.dxcUtils); COM_RELEASE_ARRAY(rhi.timeStampHeaps); COM_RELEASE(rhi.mainCommandList); COM_RELEASE_ARRAY(rhi.mainCommandAllocators); COM_RELEASE(rhi.tempCommandList); COM_RELEASE(rhi.tempCommandAllocator); COM_RELEASE(rhi.swapChain); COM_RELEASE(rhi.computeCommandQueue); COM_RELEASE(rhi.mainCommandQueue); COM_RELEASE(rhi.infoQueue); COM_RELEASE(rhi.umaPool); COM_RELEASE(rhi.allocator); COM_RELEASE(rhi.device); COM_RELEASE(rhi.adapter); COM_RELEASE(rhi.factory); #if defined(D3D_DEBUG) COM_RELEASE(rhi.dxgiInfoQueue); #endif COM_RELEASE(rhi.debug); FreeLibrary(rhi.dxilModule); FreeLibrary(rhi.dxcModule); #if defined(RHI_ENABLE_NVAPI) if(rhi.nvapiActive) { NvAPI_Unload(); rhi.nvapiActive = false; } #endif #if defined(D3D_DEBUG) IDXGIDebug1* debug = NULL; if(SUCCEEDED(DXGIGetDebugInterface1(0, IID_PPV_ARGS(&debug)))) { // DXGI_DEBUG_RLO_ALL is DXGI_DEBUG_RLO_SUMMARY | DXGI_DEBUG_RLO_DETAIL | DXGI_DEBUG_RLO_IGNORE_INTERNAL OutputDebugStringA("**** >>>> CNQ3: calling ReportLiveObjects\n"); const HRESULT hr = debug->ReportLiveObjects(DXGI_DEBUG_ALL, DXGI_DEBUG_RLO_ALL); OutputDebugStringA(va("**** >>>> CNQ3: ReportLiveObjects returned 0x%08X (%s)\n", (unsigned int)hr, GetSystemErrorString(hr))); debug->Release(); } #endif #if defined(RHI_ENABLE_AFTERMATH) if(rhi.aftermathActive) { GFSDK_Aftermath_ReleaseContextHandle(rhi.aftermathMainCommandList); GFSDK_Aftermath_ReleaseContextHandle(rhi.aftermathTempCommandList); GFSDK_Aftermath_ReleaseContextHandle(rhi.aftermathUploadCommandList); rhi.aftermathMainCommandList = NULL; rhi.aftermathTempCommandList = NULL; rhi.aftermathUploadCommandList = NULL; rhi.aftermathCommandList = NULL; rhi.aftermathActive = false; } #endif #undef DESTROY_POOL } void BeginFrame() { if(rhi.frameBegun) { Sys_DebugPrintf("BeginFrame already called!\n"); return; } rhi.frameBegun = true; rhi.beginFrameCounter++; rhi.beforeRenderingUS = Sys_Microseconds(); WaitForSwapChain(); { const UINT64 currentFenceValue = rhi.mainFenceValues[rhi.frameIndex]; #if RHI_DEBUG_FENCE Sys_DebugPrintf("Wait: %d (BeginFrame)\n", (int)currentFenceValue); #endif rhi.mainFence.WaitOnCPU(currentFenceValue); rhi.frameIndex = (rhi.frameIndex + 1) % rhi.renderFrameCount; rhi.mainFenceValues[rhi.frameIndex] = currentFenceValue + 1; rhi.swapChainBufferIndex = rhi.swapChain->GetCurrentBackBufferIndex(); } #if defined(RHI_ENABLE_NVAPI) && defined(RHI_ENABLE_NVAPI_RT_VALIDATION) if(rhi.nvapiActive) { const NvAPI_Status nr = NvAPI_D3D12_FlushRaytracingValidationMessages(rhi.device); Q_assert(nr == NvAPI_Status::NVAPI_OK); } #endif D3D12MA::Budget budget; rhi.allocator->GetBudget(&budget, NULL); rhiInfo.allocatedByteCount = (uint64_t)budget.UsageBytes; DrawGUI(); Q_assert(rhi.commandList == rhi.mainCommandList); rhi.currentRootSignature = RHI_MAKE_NULL_HANDLE(); WaitForTempCommandList(); // wait for pending copies from the upload manager to be finished rhi.upload.WaitToStartDrawing(rhi.mainCommandQueue); rhie.inputToRenderUS = (uint32_t)(Sys_Microseconds() - rhi.beforeInputSamplingUS); // reclaim used memory and start recording D3D(rhi.mainCommandAllocators[rhi.frameIndex]->Reset()); D3D(rhi.commandList->Reset(rhi.mainCommandAllocators[rhi.frameIndex], NULL)); BindDynamicResources(); rhi.frameDurationQueryIndex = CmdBeginDurationQuery(); rhi.commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); static TextureBarrier textureBarriers[MAX_DRAWIMAGES + 1]; static BufferBarrier bufferBarriers[64]; for(uint32_t t = 0; t < rhi.texturesToTransition.count; ++t) { const HTexture handle = rhi.texturesToTransition[t]; const Texture& texture = rhi.textures.Get(handle); textureBarriers[t] = TextureBarrier(handle, texture.desc.initialState); } textureBarriers[rhi.texturesToTransition.count] = TextureBarrier(rhi.renderTargets[rhi.swapChainBufferIndex], ResourceStates::RenderTargetBit); for(uint32_t b = 0; b < rhi.buffersToTransition.count; ++b) { const HBuffer handle = rhi.buffersToTransition[b]; const Buffer& buffer = rhi.buffers.Get(handle); bufferBarriers[b] = BufferBarrier(handle, buffer.desc.initialState); } Barrier(rhi.texturesToTransition.count, textureBarriers, rhi.buffersToTransition.count, bufferBarriers); rhi.texturesToTransition.Clear(); rhi.buffersToTransition.Clear(); for(uint32_t b = 0; b < rhi.buffersToDelete.count; ) { if(rhi.beginFrameCounter >= rhi.buffersToDelete[b].beginFrameCounter) { DestroyBuffer(rhi.buffersToDelete[b].buffer); rhi.buffersToDelete.RemoveUnordered(b); } else { b++; } } CmdInsertDebugLabel("RHI::BeginFrame", 0.8f, 0.8f, 0.8f); } void EndFrame() { if(!rhi.frameBegun) { Sys_DebugPrintf("EndFrame already called!\n"); return; } rhi.frameBegun = false; CmdInsertDebugLabel("RHI::EndFrame", 0.8f, 0.8f, 0.8f); CmdBeginBarrier(); CmdTextureBarrier(rhi.renderTargets[rhi.swapChainBufferIndex], ResourceStates::PresentBit); CmdEndBarrier(); CmdEndDurationQuery(rhi.frameDurationQueryIndex); // needs to happens before the command list is closed ResolveDurationQueries(); // stop recording D3D(rhi.commandList->Close()); #if RHI_DEBUG_FENCE Sys_DebugPrintf("Signal: %d (EndFrame)\n", rhi.mainFenceValues[rhi.frameIndex]); #endif rhi.mainFence.Signal(rhi.mainCommandQueue, rhi.mainFenceValues[rhi.frameIndex]); const int64_t currentTimeUS = Sys_Microseconds(); rhie.inputToPresentUS = (uint32_t)(currentTimeUS - rhi.beforeInputSamplingUS); rhie.renderToPresentUS = (uint32_t)(currentTimeUS - rhi.beforeRenderingUS); if(backEnd.renderFrame) { ID3D12CommandList* commandListArray[] = { rhi.commandList }; rhi.mainCommandQueue->ExecuteCommandLists(ARRAY_LEN(commandListArray), commandListArray); if(!rhi.vsync && com_nextTargetTimeUS > currentTimeUS) { const int64_t remainingUS = com_nextTargetTimeUS - currentTimeUS; Sys_MicroSleep((int)remainingUS); } Present(); static int64_t prevTS = 0; const int64_t currTS = Sys_Microseconds(); const int64_t us = currTS - prevTS; prevTS = currTS; rhie.presentToPresentUS = us; } else { rhie.presentToPresentUS = 0; } } uint32_t GetFrameIndex() { return rhi.frameIndex; } HTexture GetSwapChainTexture() { return rhi.renderTargets[rhi.swapChainBufferIndex]; } HBuffer CreateBuffer(const BufferDesc& rhiDesc) { // alignment must be 64KB (D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT) or 0, which is effectively 64KB. // https://msdn.microsoft.com/en-us/library/windows/desktop/dn903813(v=vs.85).aspx D3D12_RESOURCE_DESC desc = { 0 }; desc.Alignment = 0; // D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT desc.DepthOrArraySize = 1; desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; desc.Flags = D3D12_RESOURCE_FLAG_NONE; desc.Format = DXGI_FORMAT_UNKNOWN; desc.Width = rhiDesc.byteCount; desc.Height = 1; desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; desc.MipLevels = 1; desc.SampleDesc.Count = 1; desc.SampleDesc.Quality = 0; if(rhiDesc.initialState & ResourceStates::UnorderedAccessBit) { desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; } if(rhiDesc.initialState & ResourceStates::RaytracingASBit) { // @NOTE: don't use D3D12_RESOURCE_FLAG_RAYTRACING_ACCELERATION_STRUCTURE // it's reserved for future use and isn't the right one to use desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; } bool transitionNeeded = false; D3D12_RESOURCE_STATES resourceState = D3D12_RESOURCE_STATE_COMMON; D3D12MA::ALLOCATION_DESC allocDesc = { 0 }; allocDesc.HeapType = D3D12_HEAP_TYPE_DEFAULT; if(rhiDesc.initialState == ResourceStates::RaytracingASBit) { resourceState = D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE; } else if(rhiDesc.memoryUsage == MemoryUsage::CPU || rhiDesc.memoryUsage == MemoryUsage::Upload) { allocDesc.HeapType = D3D12_HEAP_TYPE_UPLOAD; resourceState = D3D12_RESOURCE_STATE_GENERIC_READ; // mandated } else if(rhiDesc.memoryUsage == MemoryUsage::Readback) { allocDesc.HeapType = D3D12_HEAP_TYPE_READBACK; resourceState = D3D12_RESOURCE_STATE_COPY_DEST; // mandated desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; } else { transitionNeeded = true; } if(rhiDesc.memoryUsage == MemoryUsage::GPU && rhi.umaPool != NULL) { // we only use the custom heap for buffers that are not supposed to be CPU-visible allocDesc.HeapType = D3D12_HEAP_TYPE_CUSTOM; allocDesc.CustomPool = rhi.umaPool; } allocDesc.Flags = D3D12MA::ALLOCATION_FLAG_STRATEGY_MIN_MEMORY; if(rhiDesc.committedResource) { allocDesc.Flags = (D3D12MA::ALLOCATION_FLAGS)(allocDesc.Flags | D3D12MA::ALLOCATION_FLAG_COMMITTED); } D3D12MA::Allocation* allocation; ID3D12Resource* resource; D3D(rhi.allocator->CreateResource(&allocDesc, &desc, resourceState, NULL, &allocation, IID_PPV_ARGS(&resource))); AllocateAndFixName(rhiDesc); SetDebugName(resource, rhiDesc.name, D3DResourceType::Buffer); Buffer buffer = {}; buffer.desc = rhiDesc; buffer.allocation = allocation; buffer.buffer = resource; buffer.gpuAddress = resource->GetGPUVirtualAddress(); buffer.currentState = resourceState; buffer.shortLifeTime = rhiDesc.shortLifeTime; CreateBufferDescriptors(buffer); const HBuffer hbuffer = rhi.buffers.Add(buffer); if(transitionNeeded) { rhi.buffersToTransition.Add(hbuffer); } return hbuffer; } void DestroyBuffer(HBuffer handle) { Buffer& buffer = rhi.buffers.Get(handle); if(buffer.mapped) { UnmapBuffer(handle); } if(!rhi.useDynamicResources) { if(buffer.srvIndex != InvalidDescriptorIndex) { rhi.descHeapGeneric.Free(buffer.srvIndex); } if(buffer.uavIndex != InvalidDescriptorIndex) { rhi.descHeapGeneric.Free(buffer.uavIndex); } if(buffer.cbvIndex != InvalidDescriptorIndex) { rhi.descHeapGeneric.Free(buffer.cbvIndex); } } COM_RELEASE(buffer.buffer); COM_RELEASE(buffer.allocation); rhi.buffers.Remove(handle); } void DestroyBufferDelayed(HBuffer buffer) { if(IsNullHandle(buffer)) { return; } BufferToDelete b = {}; b.beginFrameCounter = rhi.beginFrameCounter + FrameCount + 1; b.buffer = buffer; rhi.buffersToDelete.Add(b); } uint8_t* MapBuffer(HBuffer handle) { Buffer& buffer = rhi.buffers.Get(handle); if(buffer.mapped) { ri.Error(ERR_FATAL, "Attempted to map buffer '%s' that is already mapped!\n", buffer.desc.name); return NULL; } void* mappedPtr = NULL; D3D(buffer.buffer->Map(0, NULL, &mappedPtr)); buffer.mapped = true; Q_assert(mappedPtr != NULL); return (uint8_t*)mappedPtr; } void UnmapBuffer(HBuffer handle) { Buffer& buffer = rhi.buffers.Get(handle); if(!buffer.mapped) { ri.Error(ERR_FATAL, "Attempted to unmap buffer '%s' that isn't mapped!\n", buffer.desc.name); return; } buffer.buffer->Unmap(0, NULL); buffer.mapped = false; } HTexture CreateTexture(const TextureDesc& rhiDesc) { Q_assert(rhiDesc.width > 0); Q_assert(rhiDesc.height > 0); Q_assert(rhiDesc.sampleCount > 0); Q_assert(rhiDesc.mipCount > 0); Q_assert(rhiDesc.mipCount <= MaxTextureMips); // Alignment 0 is the same as specifying D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT D3D12_RESOURCE_DESC desc = { 0 }; desc.Alignment = 0; desc.DepthOrArraySize = 1; desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; desc.Flags = D3D12_RESOURCE_FLAG_NONE; desc.Format = GetD3DFormat(rhiDesc.format); desc.Width = rhiDesc.width; desc.Height = rhiDesc.height; desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; desc.MipLevels = rhiDesc.mipCount; desc.SampleDesc.Count = rhiDesc.sampleCount; desc.SampleDesc.Quality = 0; if(rhiDesc.allowedState & ResourceStates::UnorderedAccessBit) { desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; } if(rhiDesc.allowedState & ResourceStates::RenderTargetBit) { desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; } if(rhiDesc.allowedState & ResourceStates::DepthAccessBits) { desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; } if((rhiDesc.allowedState & ResourceStates::ShaderAccessBits) == 0) { desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; } if(rhiDesc.depth > 1) { desc.DepthOrArraySize = rhiDesc.depth; desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D; } D3D12MA::ALLOCATION_DESC allocDesc = { 0 }; allocDesc.HeapType = D3D12_HEAP_TYPE_DEFAULT; allocDesc.ExtraHeapFlags = D3D12_HEAP_FLAG_NONE; allocDesc.Flags = D3D12MA::ALLOCATION_FLAG_STRATEGY_MIN_MEMORY; if(rhiDesc.committedResource) { allocDesc.Flags = (D3D12MA::ALLOCATION_FLAGS)(allocDesc.Flags | D3D12MA::ALLOCATION_FLAG_COMMITTED); } D3D12_CLEAR_VALUE clearValue = {}; const D3D12_CLEAR_VALUE* pClearValue = NULL; if(rhiDesc.usePreferredClearValue) { pClearValue = &clearValue; clearValue.Format = desc.Format; if(IsD3DDepthFormat(clearValue.Format)) { clearValue.DepthStencil.Depth = rhiDesc.clearDepth; clearValue.DepthStencil.Stencil = rhiDesc.clearStencil; } else { memcpy(clearValue.Color, rhiDesc.clearColor, sizeof(clearValue.Color)); } } if(rhiDesc.format == TextureFormat::Depth24_Stencil8) { desc.Format = DXGI_FORMAT_R24G8_TYPELESS; } D3D12MA::Allocation* allocation = NULL; ID3D12Resource* resource; if(rhiDesc.nativeResource != NULL) { resource = (ID3D12Resource*)rhiDesc.nativeResource; } else { D3D(rhi.allocator->CreateResource(&allocDesc, &desc, D3D12_RESOURCE_STATE_COPY_DEST, pClearValue, &allocation, IID_PPV_ARGS(&resource))); } AllocateAndFixName(rhiDesc); SetDebugName(resource, rhiDesc.name, D3DResourceType::Texture); uint32_t rtvIndex = InvalidDescriptorIndex; if(rhiDesc.allowedState & ResourceStates::RenderTargetBit) { D3D12_RENDER_TARGET_VIEW_DESC rtv = { 0 }; rtv.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; rtv.Format = desc.Format; rtv.Texture2D.MipSlice = 0; rtv.Texture2D.PlaneSlice = 0; rtvIndex = rhi.descHeapRTVs.CreateRTV(resource, rtv); } uint32_t dsvIndex = InvalidDescriptorIndex; if(rhiDesc.allowedState & ResourceStates::DepthWriteBit) { D3D12_DEPTH_STENCIL_VIEW_DESC dsv = { 0 }; dsv.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; dsv.Format = desc.Format; dsv.Flags = D3D12_DSV_FLAG_NONE; dsv.Texture2D.MipSlice = 0; if(rhiDesc.format == TextureFormat::Depth24_Stencil8) { dsv.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; } dsvIndex = rhi.descHeapDSVs.CreateDSV(resource, dsv); } Texture texture = {}; texture.desc = rhiDesc; texture.allocation = allocation; texture.texture = resource; texture.rtvIndex = rtvIndex; texture.dsvIndex = dsvIndex; texture.currentState = D3D12_RESOURCE_STATE_COPY_DEST; texture.shortLifeTime = rhiDesc.shortLifeTime; CreateTextureDescriptors(texture); const HTexture handle = rhi.textures.Add(texture); if(rhiDesc.nativeResource == NULL) { rhi.texturesToTransition.Add(handle); } return handle; } void DestroyTexture(HTexture handle) { Texture& texture = rhi.textures.Get(handle); if(texture.rtvIndex != InvalidDescriptorIndex) { rhi.descHeapRTVs.Free(texture.rtvIndex); } if(texture.dsvIndex != InvalidDescriptorIndex) { rhi.descHeapDSVs.Free(texture.dsvIndex); } if(!rhi.useDynamicResources) { if(texture.srvIndex != InvalidDescriptorIndex) { rhi.descHeapGeneric.Free(texture.srvIndex); } for(uint32_t m = 0; m < texture.desc.mipCount; ++m) { const uint32_t uavIndex = texture.mips[m].uavIndex; if(uavIndex != InvalidDescriptorIndex) { rhi.descHeapGeneric.Free(uavIndex); } } } COM_RELEASE(texture.texture); COM_RELEASE(texture.allocation); rhi.textures.Remove(handle); } HSampler CreateSampler(const SamplerDesc& rhiDesc) { const D3D12_TEXTURE_ADDRESS_MODE addressMode = GetD3DTextureAddressMode(rhiDesc.wrapMode); D3D12_FILTER filter = GetD3DFilter(rhiDesc.filterMode); UINT maxAnisotropy = r_ext_max_anisotropy->integer; if(filter == D3D12_FILTER_ANISOTROPIC && maxAnisotropy <= 1) { filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; maxAnisotropy = 1; } if(filter != D3D12_FILTER_ANISOTROPIC) { maxAnisotropy = 1; } D3D12_SAMPLER_DESC desc = { 0 }; desc.AddressU = addressMode; desc.AddressV = addressMode; desc.AddressW = addressMode; desc.ComparisonFunc = D3D12_COMPARISON_FUNC_NONE; desc.MaxAnisotropy = maxAnisotropy; desc.MaxLOD = 666.0f; desc.MinLOD = rhiDesc.minLOD; desc.MipLODBias = rhiDesc.mipLODBias; desc.Filter = filter; const uint32_t index = CreateSampler(desc); Sampler sampler; sampler.desc = rhiDesc; sampler.shortLifeTime = rhiDesc.shortLifeTime; sampler.heapIndex = index; const HSampler handle = rhi.samplers.Add(sampler); return handle; } void DestroySampler(HSampler hsampler) { const Sampler& sampler = rhi.samplers.Get(hsampler); if(!rhi.useDynamicResources) { rhi.descHeapSamplers.Free(sampler.heapIndex); } rhi.samplers.Remove(hsampler); } static void AddShaderVisibility(bool outVis[ShaderStage::Count], D3D12_SHADER_VISIBILITY inVis) { switch(inVis) { case D3D12_SHADER_VISIBILITY_VERTEX: outVis[ShaderStage::Vertex] = true; break; case D3D12_SHADER_VISIBILITY_PIXEL: outVis[ShaderStage::Pixel] = true; break; default: break; } } HRootSignature CreateRootSignature(const RootSignatureDesc& rhiDesc) { ASSERT_DR_DISABLED(); RootSignature rhiSignature = {}; rhiSignature.genericTableIndex = UINT32_MAX; rhiSignature.samplerTableIndex = UINT32_MAX; rhiSignature.genericDescCount = 0; rhiSignature.samplerDescCount = rhiDesc.samplerCount; bool shaderVis[ShaderStage::Count] = {}; // // root constants // int parameterCount = 0; D3D12_ROOT_PARAMETER parameters[16]; for(int s = 0; s < ShaderStage::Count; ++s) { if(rhiDesc.constants[s].byteCount > 0) { rhiSignature.constants[s].parameterIndex = parameterCount; D3D12_ROOT_PARAMETER& p = parameters[parameterCount]; p.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; p.Constants.Num32BitValues = AlignUp(rhiDesc.constants[s].byteCount, 4) / 4; p.Constants.RegisterSpace = 0; p.Constants.ShaderRegister = 0; p.ShaderVisibility = GetD3DVisibility((ShaderStage::Id)s); AddShaderVisibility(shaderVis, p.ShaderVisibility); parameterCount++; } } Q_assert(parameterCount <= ShaderStage::Count); // // CBV SRV UAV table // uint32_t onePastLastTableIndex = 0; D3D12_DESCRIPTOR_RANGE genericRanges[ARRAY_LEN(rhiDesc.genericRanges)] = {}; for(uint32_t rangeIndex = 0; rangeIndex < rhiDesc.genericRangeCount; ++rangeIndex) { D3D12_DESCRIPTOR_RANGE& r = genericRanges[rangeIndex]; const RootSignatureDesc::DescriptorRange& rIn = rhiDesc.genericRanges[rangeIndex]; Q_assert(rIn.count > 0); r.BaseShaderRegister = 0; r.NumDescriptors = rIn.count; r.OffsetInDescriptorsFromTableStart = rIn.firstIndex; r.RangeType = GetD3DDescriptorRangeType(rIn.type); r.RegisterSpace = 0; if(rIn.type == DescriptorType::Buffer) { // @TODO: or bump up BaseShaderRegister, or let the user decide r.RegisterSpace = 1; } rhiSignature.genericDescCount += rIn.count; onePastLastTableIndex = max(onePastLastTableIndex, rIn.firstIndex + rIn.count); } // if you trigger this, you have at least 1 gap in your root signature Q_assert(onePastLastTableIndex == rhiSignature.genericDescCount); rhiSignature.genericDescCount = onePastLastTableIndex; // safety net... if(rhiSignature.genericDescCount > 0) { rhiSignature.genericTableIndex = parameterCount; D3D12_ROOT_PARAMETER& p = parameters[parameterCount++]; p.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; p.DescriptorTable.NumDescriptorRanges = rhiDesc.genericRangeCount; p.DescriptorTable.pDescriptorRanges = genericRanges; p.ShaderVisibility = GetD3DVisibility(rhiDesc.genericVisibility); AddShaderVisibility(shaderVis, p.ShaderVisibility); } // // sampler table // D3D12_DESCRIPTOR_RANGE samplerRange = {}; if(rhiDesc.samplerCount > 0) { rhiSignature.samplerTableIndex = parameterCount; D3D12_DESCRIPTOR_RANGE& r = samplerRange; r.BaseShaderRegister = 0; r.NumDescriptors = rhiDesc.samplerCount; r.OffsetInDescriptorsFromTableStart = 0; r.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; r.RegisterSpace = 0; D3D12_ROOT_PARAMETER& p = parameters[parameterCount++]; p.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; p.DescriptorTable.NumDescriptorRanges = 1; p.DescriptorTable.pDescriptorRanges = &samplerRange; p.ShaderVisibility = GetD3DVisibility(rhiDesc.samplerVisibility); AddShaderVisibility(shaderVis, p.ShaderVisibility); } D3D12_ROOT_SIGNATURE_DESC desc = { 0 }; desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE | D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS | D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS | D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS; if(!shaderVis[ShaderStage::Vertex]) { desc.Flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS; } if(!shaderVis[ShaderStage::Pixel]) { desc.Flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS; } if(rhiDesc.usingVertexBuffers) { desc.Flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; } desc.NumParameters = parameterCount; desc.pParameters = parameters; desc.NumStaticSamplers = 0; desc.pStaticSamplers = NULL; ID3DBlob* blob; ID3DBlob* errorBlob; if(FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &blob, &errorBlob))) { ri.Error(ERR_FATAL, "Root signature creation failed!\n%s\n", (const char*)errorBlob->GetBufferPointer()); } COM_RELEASE(errorBlob); ID3D12RootSignature* signature; D3D(rhi.device->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(&signature))); COM_RELEASE(blob); AllocateAndFixName(rhiDesc); SetDebugName(signature, rhiDesc.name, D3DResourceType::RootSignature); rhiSignature.desc = rhiDesc; rhiSignature.signature = signature; rhiSignature.shortLifeTime = rhiDesc.shortLifeTime; return rhi.rootSignatures.Add(rhiSignature); } void DestroyRootSignature(HRootSignature signature) { ASSERT_DR_DISABLED(); COM_RELEASE(rhi.rootSignatures.Get(signature).signature); rhi.rootSignatures.Remove(signature); } HDescriptorTable CreateDescriptorTable(const DescriptorTableDesc& desc) { ASSERT_DR_DISABLED(); const RootSignature& sig = rhi.rootSignatures.Get(desc.rootSignature); const char* srvName = AllocateName(va("%s GPU-visible CBV SRV UAV", desc.name), desc.shortLifeTime); const char* samName = AllocateName(va("%s GPU-visible sampler", desc.name), desc.shortLifeTime); DescriptorTable table = { 0 }; table.genericHeap = CreateDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, sig.genericDescCount, true, srvName); table.samplerHeap = CreateDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, sig.samplerDescCount, true, samName); table.shortLifeTime = desc.shortLifeTime; const Texture& nullTex = rhi.textures.Get(rhi.nullTexture); const Texture& nullRWTex = rhi.textures.Get(rhi.nullRWTexture); const Buffer& nullBuffer = rhi.buffers.Get(rhi.nullBuffer); const Buffer& nullRWBuffer = rhi.buffers.Get(rhi.nullRWBuffer); // bind null CBV SRV UAV resources for(uint32_t r = 0; r < sig.desc.genericRangeCount; ++r) { const RootSignatureDesc::DescriptorRange& range = sig.desc.genericRanges[r]; uint32_t index; switch(range.type) { case DescriptorType::Texture: index = nullTex.srvIndex; break; case DescriptorType::RWTexture: index = nullRWTex.mips[0].uavIndex; break; case DescriptorType::Buffer: index = nullBuffer.srvIndex; break; case DescriptorType::RWBuffer: index = nullRWBuffer.uavIndex; break; default: Q_assert(!"Unsupported descriptor type"); continue; } for(uint32_t i = 0; i < range.count; ++i) { CopyDescriptor(table.genericHeap, range.firstIndex + i, rhi.descHeapGeneric, index); } } // bind null samplers for(uint32_t d = 0; d < sig.desc.samplerCount; ++d) { Handle type, index, gen; DecomposeHandle(&type, &index, &gen, rhi.nullSampler.v); CopyDescriptor(table.samplerHeap, d, rhi.descHeapSamplers, index); } return rhi.descriptorTables.Add(table); } void UpdateDescriptorTable(HDescriptorTable htable, const DescriptorTableUpdate& update) { ASSERT_DR_DISABLED(); Q_assert(update.textures != NULL); DescriptorTable& table = rhi.descriptorTables.Get(htable); if(update.type == DescriptorType::Texture && table.genericHeap) { for(uint32_t i = 0; i < update.resourceCount; ++i) { const Texture& texture = rhi.textures.Get(update.textures[i]); Q_assert(texture.srvIndex != InvalidDescriptorIndex); CopyDescriptor(table.genericHeap, update.firstIndex + i, rhi.descHeapGeneric, texture.srvIndex); } } else if(update.type == DescriptorType::RWBuffer && table.genericHeap) { for(uint32_t i = 0; i < update.resourceCount; ++i) { const Buffer& buffer = rhi.buffers.Get(update.buffers[i]); Q_assert(buffer.uavIndex != InvalidDescriptorIndex); CopyDescriptor(table.genericHeap, update.firstIndex + i, rhi.descHeapGeneric, buffer.uavIndex); } } else if(update.type == DescriptorType::RWTexture && table.genericHeap) { uint32_t destIndex = update.firstIndex; for(uint32_t i = 0; i < update.resourceCount; ++i) { const Texture& texture = rhi.textures.Get(update.textures[i]); uint32_t start; uint32_t end; if(update.uavMipChain) { start = 0; end = texture.desc.mipCount; } else { Q_assert(update.uavMipSlice < texture.desc.mipCount); start = update.uavMipSlice; end = start + 1; } for(uint32_t m = start; m < end; ++m) { Q_assert(texture.mips[m].uavIndex != InvalidDescriptorIndex); CopyDescriptor(table.genericHeap, destIndex++, rhi.descHeapGeneric, texture.mips[m].uavIndex); } } } else if(update.type == DescriptorType::Sampler && table.samplerHeap) { for(uint32_t i = 0; i < update.resourceCount; ++i) { Handle htype, index, gen; DecomposeHandle(&htype, &index, &gen, update.samplers[i].v); Q_assert(index != InvalidDescriptorIndex); CopyDescriptor(table.samplerHeap, update.firstIndex + i, rhi.descHeapSamplers, index); } } else { ri.Error(ERR_FATAL, "UpdateDescriptorTable: unsupported descriptor type\n"); } } void DestroyDescriptorTable(HDescriptorTable handle) { ASSERT_DR_DISABLED(); DescriptorTable& table = rhi.descriptorTables.Get(handle); COM_RELEASE(table.genericHeap); COM_RELEASE(table.samplerHeap); rhi.descriptorTables.Remove(handle); } HPipeline CreateGraphicsPipeline(const GraphicsPipelineDesc& rhiDesc) { if(!rhi.useDynamicResources) { Q_assert(!IsNullHandle(rhiDesc.rootSignature)); Q_assert(rhi.rootSignatures.Get(rhiDesc.rootSignature).desc.pipelineType == PipelineType::Graphics); } D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = { 0 }; desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; // none available so far desc.pRootSignature = rhi.useDynamicResources ? rhi.dynamicResources.rootSignature : rhi.rootSignatures.Get(rhiDesc.rootSignature).signature; desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; desc.SampleDesc.Count = 1; desc.SampleMask = UINT_MAX; UINT semanticIndices[ShaderSemantic::Count] = { 0 }; D3D12_INPUT_ELEMENT_DESC inputElementDescs[MaxVertexAttributes]; for(int a = 0; a < rhiDesc.vertexLayout.attributeCount; ++a) { const VertexAttribute& va = rhiDesc.vertexLayout.attributes[a]; D3D12_INPUT_ELEMENT_DESC& ied = inputElementDescs[a]; ied.SemanticName = GetD3DSemanticName(va.semantic); ied.SemanticIndex = semanticIndices[va.semantic]++; ied.Format = GetD3DFormat(va.dataType, va.vectorLength); ied.InputSlot = va.vertexBufferIndex; ied.AlignedByteOffset = va.structByteOffset; ied.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; ied.InstanceDataStepRate = 0; } desc.InputLayout.NumElements = rhiDesc.vertexLayout.attributeCount; desc.InputLayout.pInputElementDescs = inputElementDescs; for(int t = 0; t < rhiDesc.renderTargetCount; ++t) { const GraphicsPipelineDesc::RenderTarget& rtIn = rhiDesc.renderTargets[t]; D3D12_RENDER_TARGET_BLEND_DESC& rtOut = desc.BlendState.RenderTarget[t]; rtOut.BlendEnable = TRUE; rtOut.BlendOp = D3D12_BLEND_OP_ADD; rtOut.BlendOpAlpha = D3D12_BLEND_OP_ADD; rtOut.LogicOpEnable = FALSE; rtOut.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; // RGBA rtOut.SrcBlend = GetD3DSourceBlend(rtIn.q3BlendMode); rtOut.DestBlend = GetD3DDestBlend(rtIn.q3BlendMode); rtOut.SrcBlendAlpha = GetAlphaBlendFromColorBlend(rtOut.SrcBlend); rtOut.DestBlendAlpha = GetAlphaBlendFromColorBlend(rtOut.DestBlend); if(rtOut.SrcBlend == D3D12_BLEND_ONE && rtOut.DestBlend == D3D12_BLEND_ZERO) { rtOut.BlendEnable = FALSE; } desc.RTVFormats[t] = GetD3DFormat(rtIn.format); } desc.NumRenderTargets = rhiDesc.renderTargetCount; desc.DepthStencilState.DepthEnable = rhiDesc.depthStencil.enableDepthTest ? TRUE : FALSE; desc.DepthStencilState.DepthFunc = GetD3DComparisonFunction(rhiDesc.depthStencil.depthComparison); desc.DepthStencilState.DepthWriteMask = rhiDesc.depthStencil.enableDepthWrites ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; desc.DepthStencilState.StencilEnable = rhiDesc.depthStencil.enableStencil; desc.DepthStencilState.StencilReadMask = rhiDesc.depthStencil.stencilReadMask; desc.DepthStencilState.StencilWriteMask = rhiDesc.depthStencil.stencilWriteMask; desc.DepthStencilState.BackFace.StencilFunc = GetD3DComparisonFunction(rhiDesc.depthStencil.backFace.comparison); desc.DepthStencilState.BackFace.StencilPassOp = GetD3DStencilOp(rhiDesc.depthStencil.backFace.passOp); desc.DepthStencilState.BackFace.StencilFailOp = GetD3DStencilOp(rhiDesc.depthStencil.backFace.failOp); desc.DepthStencilState.BackFace.StencilDepthFailOp = GetD3DStencilOp(rhiDesc.depthStencil.backFace.depthFailOp); desc.DepthStencilState.FrontFace.StencilFunc = GetD3DComparisonFunction(rhiDesc.depthStencil.frontFace.comparison); desc.DepthStencilState.FrontFace.StencilPassOp = GetD3DStencilOp(rhiDesc.depthStencil.frontFace.passOp); desc.DepthStencilState.FrontFace.StencilFailOp = GetD3DStencilOp(rhiDesc.depthStencil.frontFace.failOp); desc.DepthStencilState.FrontFace.StencilDepthFailOp = GetD3DStencilOp(rhiDesc.depthStencil.frontFace.depthFailOp); desc.DSVFormat = GetD3DFormat(rhiDesc.depthStencil.depthStencilFormat); desc.VS.pShaderBytecode = rhiDesc.vertexShader.data; desc.VS.BytecodeLength = rhiDesc.vertexShader.byteCount; desc.PS.pShaderBytecode = rhiDesc.pixelShader.data; desc.PS.BytecodeLength = rhiDesc.pixelShader.byteCount; desc.RasterizerState.AntialiasedLineEnable = FALSE; desc.RasterizerState.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; desc.RasterizerState.CullMode = GetD3DCullMode(rhiDesc.rasterizer.cullMode); desc.RasterizerState.FrontCounterClockwise = TRUE; desc.RasterizerState.DepthBias = rhiDesc.rasterizer.polygonOffset ? 1 : 0; desc.RasterizerState.DepthBiasClamp = 0.0f; desc.RasterizerState.SlopeScaledDepthBias = rhiDesc.rasterizer.polygonOffset ? 1.0f : 0.0f; desc.RasterizerState.FillMode = rhiDesc.rasterizer.wireFrame ? D3D12_FILL_MODE_WIREFRAME : D3D12_FILL_MODE_SOLID; desc.RasterizerState.ForcedSampleCount = 0; desc.RasterizerState.MultisampleEnable = FALSE; desc.RasterizerState.DepthClipEnable = rhiDesc.rasterizer.clampDepth ? FALSE : TRUE; ID3D12PipelineState* pso; D3D(rhi.device->CreateGraphicsPipelineState(&desc, IID_PPV_ARGS(&pso))); AllocateAndFixName(rhiDesc); SetDebugName(pso, rhiDesc.name, D3DResourceType::PipelineState); Pipeline rhiPipeline; rhiPipeline.type = PipelineType::Graphics; rhiPipeline.graphicsDesc = rhiDesc; rhiPipeline.pso = pso; rhiPipeline.shortLifeTime = rhiDesc.shortLifeTime; return rhi.pipelines.Add(rhiPipeline); } HPipeline CreateComputePipeline(const ComputePipelineDesc& rhiDesc) { if(!rhi.useDynamicResources) { Q_assert(!IsNullHandle(rhiDesc.rootSignature)); Q_assert(rhi.rootSignatures.Get(rhiDesc.rootSignature).desc.pipelineType == PipelineType::Compute); } D3D12_COMPUTE_PIPELINE_STATE_DESC desc = { 0 }; desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; // none available so far desc.pRootSignature = rhi.useDynamicResources ? rhi.dynamicResources.rootSignature : rhi.rootSignatures.Get(rhiDesc.rootSignature).signature; desc.CS.pShaderBytecode = rhiDesc.shader.data; desc.CS.BytecodeLength = rhiDesc.shader.byteCount; ID3D12PipelineState* pso; D3D(rhi.device->CreateComputePipelineState(&desc, IID_PPV_ARGS(&pso))); AllocateAndFixName(rhiDesc); SetDebugName(pso, rhiDesc.name, D3DResourceType::PipelineState); Pipeline rhiPipeline; rhiPipeline.type = PipelineType::Compute; rhiPipeline.computeDesc = rhiDesc; rhiPipeline.pso = pso; rhiPipeline.shortLifeTime = rhiDesc.shortLifeTime; return rhi.pipelines.Add(rhiPipeline); } void DestroyPipeline(HPipeline pipeline) { COM_RELEASE(rhi.pipelines.Get(pipeline).pso); rhi.pipelines.Remove(pipeline); } HShader CreateShader(const ShaderDesc& desc) { IDxcBlobEncoding* blobEncoding; D3D(rhi.dxcUtils->CreateBlob(desc.source, desc.sourceLength, CP_ACP, &blobEncoding)); LPCWSTR targetW = L"???"; LPCSTR targetName = "???"; switch(desc.stage) { case ShaderStage::Vertex: targetW = L"vs_6_0"; targetName = "vs"; break; case ShaderStage::Pixel: targetW = L"ps_6_0"; targetName = "ps"; break; case ShaderStage::Compute: targetW = L"cs_6_0"; targetName = "cs"; break; default: Q_assert(0); break; } wchar_t entryPointW[256]; MultiByteToWideChar(CP_ACP, 0, desc.entryPoint, -1, entryPointW, ARRAY_LEN(entryPointW)); struct MacroW { wchar_t macro[256]; }; MacroW macros[16]; Q_assert(desc.macroCount <= ARRAY_LEN(macros)); LPCWSTR arguments[64]; UINT32 argumentCount = 0; #define PushArg(Arg) arguments[argumentCount++] = Arg PushArg(L"E"); PushArg(L"-E"); PushArg(entryPointW); PushArg(L"-T"); PushArg(targetW); PushArg(DXC_ARG_WARNINGS_ARE_ERRORS); // -WX #if defined(D3D_DEBUG) PushArg(DXC_ARG_DEBUG); // -Zi embeds debug info PushArg(DXC_ARG_SKIP_OPTIMIZATIONS); // -Od disables optimizations PushArg(DXC_ARG_ENABLE_STRICTNESS); // -Ges enables strict mode PushArg(DXC_ARG_IEEE_STRICTNESS); // -Gis forces IEEE strictness PushArg(L"-Qembed_debug"); // -Qembed_debug embeds debug info in shader container #else PushArg(L"-Qstrip_debug"); PushArg(L"-Qstrip_reflect"); PushArg(DXC_ARG_OPTIMIZATION_LEVEL3); // -O3 #endif PushArg(L"-D"); PushArg(desc.stage == ShaderStage::Vertex ? L"VERTEX_SHADER=1" : L"VERTEX_SHADER=0"); PushArg(L"-D"); PushArg(desc.stage == ShaderStage::Pixel ? L"PIXEL_SHADER=1" : L"PIXEL_SHADER=0"); PushArg(L"-D"); PushArg(desc.stage == ShaderStage::Compute ? L"COMPUTE_SHADER=1" : L"COMPUTE_SHADER=0"); for(uint32_t m = 0; m < desc.macroCount; ++m) { const char* input = va("%s=%s", desc.macros[m].name, desc.macros[m].value); MacroW& output = macros[m]; MultiByteToWideChar(CP_ACP, 0, input, -1, output.macro, ARRAY_LEN(output.macro)); PushArg(L"-D"); PushArg(output.macro); } #undef PushArg Q_assert(argumentCount <= ARRAY_LEN(arguments)); DxcBuffer sourceBuffer = {}; sourceBuffer.Ptr = blobEncoding->GetBufferPointer(); sourceBuffer.Size = blobEncoding->GetBufferSize(); sourceBuffer.Encoding = 0; IDxcResult* result = NULL; HRESULT hr = S_OK; if(FAILED(rhi.dxcCompiler->Compile(&sourceBuffer, arguments, argumentCount, NULL, IID_PPV_ARGS(&result))) || FAILED(result->GetStatus(&hr)) || FAILED(hr)) { IDxcBlobUtf8* errors; if(result != NULL && SUCCEEDED(result->GetOutput(DXC_OUT_ERRORS, IID_PPV_ARGS(&errors), NULL)) && errors->GetStringLength() > 0) { ri.Error(ERR_FATAL, "Shader (%s) compilation failed:\n%s\n", targetName, (const char*)errors->GetBufferPointer()); } else { ri.Error(ERR_FATAL, "Shader (%s) compilation failed:\n", targetName); } return RHI_MAKE_NULL_HANDLE(); } IDxcBlob* shaderBlob; D3D(result->GetOutput(DXC_OUT_OBJECT, IID_PPV_ARGS(&shaderBlob), NULL)); blobEncoding->Release(); result->Release(); Shader shader; shader.blob = shaderBlob; return rhi.shaders.Add(shader); } ShaderByteCode GetShaderByteCode(HShader shader) { IDxcBlob* const blob = rhi.shaders.Get(shader).blob; ShaderByteCode byteCode; byteCode.data = blob->GetBufferPointer(); byteCode.byteCount = blob->GetBufferSize(); return byteCode; } void DestroyShader(HShader shader) { COM_RELEASE(rhi.shaders.Get(shader).blob); rhi.shaders.Remove(shader); } void CmdBindRenderTargets(uint32_t colorCount, const HTexture* colorTargets, const HTexture* depthStencilTarget) { Q_assert(CanWriteCommands()); Q_assert(colorCount > 0 || colorTargets == NULL); D3D12_CPU_DESCRIPTOR_HANDLE rtvHandles[MaxRenderTargets] = {}; for(uint32_t t = 0; t < colorCount; ++t) { const uint32_t rtvIndex = rhi.textures.Get(colorTargets[t]).rtvIndex; rtvHandles[t] = rhi.descHeapRTVs.GetCPUHandle(rtvIndex); } D3D12_CPU_DESCRIPTOR_HANDLE* dsvHandlePtr = NULL; D3D12_CPU_DESCRIPTOR_HANDLE dsvHandle; if(depthStencilTarget != NULL) { const Texture& depthStencil = rhi.textures.Get(*depthStencilTarget); dsvHandle = rhi.descHeapDSVs.GetCPUHandle(depthStencil.dsvIndex); dsvHandlePtr = &dsvHandle; } rhi.commandList->OMSetRenderTargets(colorCount, rtvHandles, FALSE, dsvHandlePtr); } void CmdBindRootSignature(HRootSignature rootSignature) { Q_assert(CanWriteCommands()); const RootSignature& sig = rhi.rootSignatures.Get(rootSignature); if(sig.desc.pipelineType == PipelineType::Graphics && rootSignature != rhi.currentRootSignature) { rhi.currentRootSignature = rootSignature; rhi.commandList->SetGraphicsRootSignature(sig.signature); } else if(sig.desc.pipelineType == PipelineType::Compute) { rhi.commandList->SetComputeRootSignature(sig.signature); } } void CmdBindDescriptorTable(HRootSignature sigHandle, HDescriptorTable handle) { Q_assert(CanWriteCommands()); const DescriptorTable& table = rhi.descriptorTables.Get(handle); const RootSignature& sig = rhi.rootSignatures.Get(sigHandle); UINT heapCount = 0; ID3D12DescriptorHeap* heaps[2]; if(sig.genericTableIndex != UINT32_MAX) { heaps[heapCount++] = table.genericHeap; } if(sig.samplerTableIndex != UINT32_MAX) { heaps[heapCount++] = table.samplerHeap; } rhi.commandList->SetDescriptorHeaps(heapCount, heaps); if(sig.genericTableIndex != UINT32_MAX) { if(sig.desc.pipelineType == PipelineType::Graphics) { rhi.commandList->SetGraphicsRootDescriptorTable(sig.genericTableIndex, table.genericHeap->GetGPUDescriptorHandleForHeapStart()); } else if(sig.desc.pipelineType == PipelineType::Compute) { rhi.commandList->SetComputeRootDescriptorTable(sig.genericTableIndex, table.genericHeap->GetGPUDescriptorHandleForHeapStart()); } } if(sig.samplerTableIndex != UINT32_MAX) { if(sig.desc.pipelineType == PipelineType::Graphics) { rhi.commandList->SetGraphicsRootDescriptorTable(sig.samplerTableIndex, table.samplerHeap->GetGPUDescriptorHandleForHeapStart()); } else if(sig.desc.pipelineType == PipelineType::Compute) { rhi.commandList->SetComputeRootDescriptorTable(sig.samplerTableIndex, table.samplerHeap->GetGPUDescriptorHandleForHeapStart()); } } } void CmdBindPipeline(HPipeline pipeline) { Q_assert(CanWriteCommands()); const Pipeline& pipe = rhi.pipelines.Get(pipeline); rhi.commandList->SetPipelineState(pipe.pso); } void CmdBindVertexBuffers(uint32_t count, const HBuffer* vertexBuffers, const uint32_t* byteStrides, const uint32_t* startByteOffsets) { Q_assert(CanWriteCommands()); Q_assert(count <= MaxVertexBuffers); count = min(count, MaxVertexBuffers); D3D12_VERTEX_BUFFER_VIEW views[MaxVertexBuffers]; for(uint32_t v = 0; v < count; ++v) { const Buffer& buffer = rhi.buffers.Get(vertexBuffers[v]); const uint32_t offset = startByteOffsets ? startByteOffsets[v] : 0; views[v].BufferLocation = buffer.gpuAddress + offset; views[v].SizeInBytes = buffer.desc.byteCount - offset; views[v].StrideInBytes = byteStrides[v]; } rhi.commandList->IASetVertexBuffers(0, count, views); } void CmdBindIndexBuffer(HBuffer indexBuffer, IndexType::Id type, uint32_t startByteOffset) { Q_assert(CanWriteCommands()); const Buffer& buffer = rhi.buffers.Get(indexBuffer); D3D12_INDEX_BUFFER_VIEW view = { 0 }; view.BufferLocation = buffer.gpuAddress + startByteOffset; view.Format = GetD3DIndexFormat(type); view.SizeInBytes = (UINT)(buffer.desc.byteCount - startByteOffset); rhi.commandList->IASetIndexBuffer(&view); } void CmdSetViewport(uint32_t x, uint32_t y, uint32_t w, uint32_t h, float minDepth, float maxDepth) { Q_assert(CanWriteCommands()); D3D12_VIEWPORT viewport; viewport.TopLeftX = x; viewport.TopLeftY = y; viewport.Width = w; viewport.Height = h; viewport.MinDepth = minDepth; viewport.MaxDepth = maxDepth; rhi.commandList->RSSetViewports(1, &viewport); } void CmdSetScissor(uint32_t x, uint32_t y, uint32_t w, uint32_t h) { Q_assert(CanWriteCommands()); D3D12_RECT rect; rect.left = x; rect.top = y; rect.right = x + w; rect.bottom = y + h; rhi.commandList->RSSetScissorRects(1, &rect); } void CmdSetRootConstants(HRootSignature rootSignature, ShaderStage::Id shaderType, const void* constants) { ASSERT_DR_DISABLED(); Q_assert(CanWriteCommands()); Q_assert(constants); const RootSignature& sig = rhi.rootSignatures.Get(rootSignature); const UINT parameterIndex = sig.constants[shaderType].parameterIndex; const UINT constantCount = sig.desc.constants[shaderType].byteCount / 4; CmdBindRootSignature(rootSignature); if(sig.desc.pipelineType == PipelineType::Graphics) { rhi.commandList->SetGraphicsRoot32BitConstants(parameterIndex, constantCount, constants, 0); } else if(sig.desc.pipelineType == PipelineType::Compute) { rhi.commandList->SetComputeRoot32BitConstants(parameterIndex, constantCount, constants, 0); } } void CmdSetGraphicsRootConstants(uint32_t byteOffset, uint32_t byteCount, const void* constants) { ASSERT_DR_ENABLED(); Q_assert(CanWriteCommands()); SetRootConstants(byteOffset, byteCount, constants, true); } void CmdSetComputeRootConstants(uint32_t byteOffset, uint32_t byteCount, const void* constants) { ASSERT_DR_ENABLED(); Q_assert(CanWriteCommands()); SetRootConstants(byteOffset, byteCount, constants, false); } void CmdDraw(uint32_t vertexCount, uint32_t firstVertex) { Q_assert(CanWriteCommands()); rhi.commandList->DrawInstanced(vertexCount, 1, firstVertex, 0); } void CmdDrawIndexed(uint32_t indexCount, uint32_t firstIndex, uint32_t firstVertex) { Q_assert(CanWriteCommands()); rhi.commandList->DrawIndexedInstanced(indexCount, 1, firstIndex, firstVertex, 0); } void CmdDispatch(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) { Q_assert(CanWriteCommands()); rhi.commandList->Dispatch(groupCountX, groupCountY, groupCountZ); } void CmdDispatchIndirect(HBuffer hbuffer, uint32_t byteOffset) { Q_assert(CanWriteCommands()); Q_assert(byteOffset % 4 == 0); const Buffer& buffer = rhi.buffers.Get(hbuffer); rhi.commandList->ExecuteIndirect(rhi.indirectDispatchSignature, 1, buffer.buffer, (UINT64)byteOffset, NULL, 0); } uint32_t CmdBeginDurationQuery() { Q_assert(CanWriteCommands()); FrameQueries& fq = rhi.frameQueries[rhi.frameIndex]; Q_assert(fq.durationQueryCount < MaxDurationQueries); if(fq.durationQueryCount >= MaxDurationQueries) { return UINT32_MAX; } const uint32_t durationIndex = fq.durationQueryCount; const UINT timeStampBeginIndex = durationIndex * 2; rhi.commandList->EndQuery(rhi.timeStampHeaps[rhi.frameIndex], D3D12_QUERY_TYPE_TIMESTAMP, timeStampBeginIndex); DurationQuery& query = fq.durationQueries[durationIndex]; if(backEnd.renderFrame) { Q_assert(query.state == QueryState::Free); } query.state = QueryState::Begun; fq.durationQueryCount++; return durationIndex; } void CmdEndDurationQuery(uint32_t durationIndex) { Q_assert(CanWriteCommands()); FrameQueries& fq = rhi.frameQueries[rhi.frameIndex]; Q_assert(durationIndex < fq.durationQueryCount); if(durationIndex >= fq.durationQueryCount) { return; } DurationQuery& query = fq.durationQueries[durationIndex]; Q_assert(query.state == QueryState::Begun); const UINT timeStampEndIndex = durationIndex * 2 + 1; rhi.commandList->EndQuery(rhi.timeStampHeaps[rhi.frameIndex], D3D12_QUERY_TYPE_TIMESTAMP, timeStampEndIndex); query.state = QueryState::Ended; } void CmdBeginBarrier() { Q_assert(CanWriteCommands()); Q_assert(rhi.textureBarrierCount == 0); Q_assert(rhi.bufferBarrierCount == 0); Q_assert(!rhi.barrierOpen); Q_assert(rhi.barrierCommandList == NULL); rhi.barrierCommandList = rhi.commandList; rhi.textureBarrierCount = 0; rhi.bufferBarrierCount = 0; rhi.barrierOpen = true; } void CmdTextureBarrier(HTexture texture, ResourceStates::Flags newState) { Q_assert(CanWriteCommands()); Q_assert(rhi.barrierOpen); Q_assert(rhi.commandList == rhi.barrierCommandList); Q_assert(rhi.textureBarrierCount < ARRAY_LEN(rhi.textureBarriers)); if(rhi.textureBarrierCount < ARRAY_LEN(rhi.textureBarriers)) { TextureBarrier* const barrier = &rhi.textureBarriers[rhi.textureBarrierCount++]; barrier->texture = texture; barrier->newState = newState; } else { ri.Error(ERR_FATAL, "Not enough texture barrier storage!\n"); } } void CmdBufferBarrier(HBuffer buffer, ResourceStates::Flags newState) { Q_assert(CanWriteCommands()); Q_assert(rhi.barrierOpen); Q_assert(rhi.commandList == rhi.barrierCommandList); Q_assert(rhi.bufferBarrierCount < ARRAY_LEN(rhi.bufferBarriers)); if(rhi.bufferBarrierCount < ARRAY_LEN(rhi.bufferBarriers)) { BufferBarrier* const barrier = &rhi.bufferBarriers[rhi.bufferBarrierCount++]; barrier->buffer = buffer; barrier->newState = newState; } else { ri.Error(ERR_FATAL, "Not enough buffer barrier storage!\n"); } } void CmdEndBarrier() { Q_assert(CanWriteCommands()); Q_assert(rhi.barrierOpen); Q_assert(rhi.commandList == rhi.barrierCommandList); if(rhi.textureBarrierCount > 0 || rhi.bufferBarrierCount > 0) { Barrier(rhi.textureBarrierCount, rhi.textureBarriers, rhi.bufferBarrierCount, rhi.bufferBarriers); } rhi.barrierCommandList = NULL; rhi.textureBarrierCount = 0; rhi.bufferBarrierCount = 0; rhi.barrierOpen = false; } void CmdClearColorTarget(HTexture texture, const vec4_t clearColor, const Rect* rect) { Q_assert(CanWriteCommands()); D3D12_RECT* d3dRectPtr = NULL; D3D12_RECT d3dRect = {}; UINT rectCount = 0; if(rect != NULL) { rectCount = 1; d3dRect.left = rect->x; d3dRect.top = rect->y; d3dRect.right = rect->x + rect->w; d3dRect.bottom = rect->y + rect->h; d3dRectPtr = &d3dRect; } const Texture& renderTarget = rhi.textures.Get(texture); const D3D12_CPU_DESCRIPTOR_HANDLE rtvHandle = rhi.descHeapRTVs.GetCPUHandle(renderTarget.rtvIndex); rhi.commandList->ClearRenderTargetView(rtvHandle, clearColor, rectCount, d3dRectPtr); } void CmdClearDepthStencilTarget(HTexture texture, bool clearDepth, float depth, bool clearStencil, uint8_t stencil, const Rect* rect) { Q_assert(CanWriteCommands()); Q_assert(clearDepth || clearStencil); if(!clearDepth && !clearStencil) { return; } D3D12_RECT* d3dRectPtr = NULL; D3D12_RECT d3dRect = {}; UINT rectCount = 0; if(rect != NULL) { rectCount = 1; d3dRect.left = rect->x; d3dRect.top = rect->y; d3dRect.right = rect->x + rect->w; d3dRect.bottom = rect->y + rect->h; d3dRectPtr = &d3dRect; } D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0; if(clearDepth) { flags |= D3D12_CLEAR_FLAG_DEPTH; } if(clearStencil) { flags |= D3D12_CLEAR_FLAG_STENCIL; } const Texture& depthStencil = rhi.textures.Get(texture); const D3D12_CPU_DESCRIPTOR_HANDLE dsvHandle = rhi.descHeapDSVs.GetCPUHandle(depthStencil.dsvIndex); rhi.commandList->ClearDepthStencilView(dsvHandle, flags, depth, stencil, rectCount, d3dRectPtr); } void CmdClearTextureUAV(HTexture htexture, uint32_t mipIndex, const uint32_t* values) { ASSERT_DR_ENABLED(); Q_assert(CanWriteCommands()); Q_assert(values); static_assert(sizeof(UINT) == 4, "sizeof(UINT) isn't 4 as expected"); const Texture& texture = rhi.textures.Get(htexture); Q_assert(mipIndex < texture.desc.mipCount); const uint32_t descIndex = texture.mips[mipIndex].uavIndex; const UINT descSize = rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = rhi.dynamicResources.genericCPUDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = rhi.dynamicResources.genericDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); cpuHandle.ptr += descIndex * descSize; gpuHandle.ptr += descIndex * descSize; rhi.commandList->ClearUnorderedAccessViewUint(gpuHandle, cpuHandle, texture.texture, values, 0, NULL); } void CmdClearBufferUAV(HBuffer hbuffer, uint32_t value) { ASSERT_DR_ENABLED(); Q_assert(CanWriteCommands()); static_assert(sizeof(UINT) == 4, "sizeof(UINT) isn't 4 as expected"); const Buffer& buffer = rhi.buffers.Get(hbuffer); const uint32_t descIndex = buffer.uavIndex; const UINT descSize = rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = rhi.dynamicResources.genericCPUDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = rhi.dynamicResources.genericDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); cpuHandle.ptr += descIndex * descSize; gpuHandle.ptr += descIndex * descSize; const UINT values[4] = { value, value, value, value }; rhi.commandList->ClearUnorderedAccessViewUint(gpuHandle, cpuHandle, buffer.buffer, values, 0, NULL); } void CmdInsertDebugLabel(const char* name, float r, float g, float b) { Q_assert(CanWriteCommands()); Q_assert(name); if(rhi.pix.SetMarkerOnCommandList != NULL) { rhi.pix.SetMarkerOnCommandList(rhi.commandList, BGRAUIntFromFloat(r, g, b), name); } else { rhi.commandList->SetMarker(1, name, strlen(name) + 1); } #if defined(RHI_ENABLE_AFTERMATH) if(rhi.aftermathActive) { const GFSDK_Aftermath_Result result = GFSDK_Aftermath_SetEventMarker(rhi.aftermathCommandList, name, strlen(name) + 1); Q_assert(result == GFSDK_Aftermath_Result_Success); } #endif } void CmdBeginDebugLabel(const char* name, float r, float g, float b) { Q_assert(CanWriteCommands()); Q_assert(name); Q_assert(name[0] != '\0'); if(rhi.pix.canBeginAndEnd) { rhi.pix.BeginEventOnCommandList(rhi.commandList, BGRAUIntFromFloat(r, g, b), name); } else { rhi.commandList->BeginEvent(1, name, strlen(name) + 1); } #if defined(RHI_ENABLE_AFTERMATH) if(rhi.aftermathActive) { const char* const markerString = va("Begin: %s", name); const GFSDK_Aftermath_Result result = GFSDK_Aftermath_SetEventMarker(rhi.aftermathCommandList, markerString, strlen(markerString) + 1); Q_assert(result == GFSDK_Aftermath_Result_Success); Q_assert(rhi.aftermathMarkerDepth < ARRAY_LEN(rhi.aftermathMarkers)); RHIPrivate::AftermathMarker& marker = rhi.aftermathMarkers[rhi.aftermathMarkerDepth++]; Q_strncpyz(marker.string, name, sizeof(marker.string)); } #endif } void CmdEndDebugLabel() { Q_assert(CanWriteCommands()); if(rhi.pix.canBeginAndEnd) { rhi.pix.EndEventOnCommandList(rhi.commandList); } else { rhi.commandList->EndEvent(); } #if defined(RHI_ENABLE_AFTERMATH) if(rhi.aftermathActive) { Q_assert(rhi.aftermathMarkerDepth > 0); const RHIPrivate::AftermathMarker& marker = rhi.aftermathMarkers[rhi.aftermathMarkerDepth - 1]; Q_assert(marker.string[0] != '\0'); const char* const markerString = va("End: %s", marker.string); const GFSDK_Aftermath_Result result = GFSDK_Aftermath_SetEventMarker(rhi.aftermathCommandList, markerString, strlen(markerString) + 1); Q_assert(result == GFSDK_Aftermath_Result_Success); rhi.aftermathMarkerDepth--; } #endif } void CmdSetStencilReference(uint8_t stencilRef) { rhi.commandList->OMSetStencilRef((UINT)stencilRef); } void CmdCopyBuffer(HBuffer dest, HBuffer source) { Q_assert(CanWriteCommands()); const Buffer& dst = rhi.buffers.Get(dest); const Buffer& src = rhi.buffers.Get(source); const UINT64 byteCount = min(src.desc.byteCount, dst.desc.byteCount); rhi.commandList->CopyBufferRegion(dst.buffer, 0, src.buffer, 0, byteCount); } void CmdCopyBuffer(HBuffer dest, uint32_t destOffset, HBuffer source, uint32_t sourceOffset, uint32_t byteCount) { Q_assert(CanWriteCommands()); const Buffer& dst = rhi.buffers.Get(dest); const Buffer& src = rhi.buffers.Get(source); Q_assert(destOffset + byteCount <= dst.desc.byteCount); Q_assert(sourceOffset + byteCount <= src.desc.byteCount); rhi.commandList->CopyBufferRegion(dst.buffer, destOffset, src.buffer, sourceOffset, byteCount); } void CmdCopyTexture(HTexture dest, HTexture source) { Q_assert(CanWriteCommands()); Q_assert(dest != source); const Texture& dst = rhi.textures.Get(dest); const Texture& src = rhi.textures.Get(source); Q_assert(dst.desc.width == src.desc.width); Q_assert(dst.desc.height == src.desc.height); Q_assert(dst.desc.depth == src.desc.depth); Q_assert(dst.desc.mipCount == src.desc.mipCount); Q_assert(dst.desc.format == src.desc.format); rhi.commandList->CopyResource(dst.texture, src.texture); } void CmdSetShadingRate(ShadingRate::Id shadingRate) { Q_assert(CanWriteCommands()); if(!rhi.baseVRSSupport) { return; } if(!rhi.extendedVRSSupport) { switch(shadingRate) { case ShadingRate::SR_2x4: case ShadingRate::SR_4x2: case ShadingRate::SR_4x4: shadingRate = ShadingRate::SR_2x2; break; default: break; } } rhi.commandList->RSSetShadingRate(GetD3DShadingRate(shadingRate), NULL); } uint32_t GetDurationCount() { return rhi.resolvedQueries.durationQueryCount; } void GetDurations(uint32_t* gpuMicroSeconds) { memcpy(gpuMicroSeconds, rhi.resolvedQueries.gpuMicroSeconds, rhi.resolvedQueries.durationQueryCount * sizeof(uint32_t)); } uint8_t* BeginBufferUpload(HBuffer buffer, uint32_t destByteOffset, uint32_t byteCount) { Q_assert(!IsNullHandle(buffer)); return rhi.upload.BeginBufferUpload(buffer, destByteOffset, byteCount); } void EndBufferUpload(HBuffer buffer) { rhi.upload.EndBufferUpload(buffer); } void BeginTextureUpload(MappedTexture& mappedTexture, HTexture texture) { Q_assert(!IsNullHandle(texture)); rhi.upload.BeginTextureUpload(mappedTexture, texture); } void EndTextureUpload() { rhi.upload.EndTextureUpload(); } void BeginTempCommandList() { Q_assert(!rhi.frameBegun); Q_assert(rhi.commandList == rhi.mainCommandList); rhi.commandList = rhi.tempCommandList; #if defined(RHI_ENABLE_AFTERMATH) if(rhi.aftermathActive) { rhi.aftermathCommandList = rhi.aftermathTempCommandList; } #endif // CPU wait for the temp command list to be done executing on the GPU WaitForTempCommandList(); // GPU wait for the copy queue to be done executing on the GPU rhi.upload.WaitToStartDrawing(rhi.computeCommandQueue); BindDynamicResources(); } void EndTempCommandList() { Q_assert(!rhi.frameBegun); Q_assert(rhi.commandList == rhi.tempCommandList); rhi.commandList = rhi.mainCommandList; #if defined(RHI_ENABLE_AFTERMATH) if(rhi.aftermathActive) { rhi.aftermathCommandList = rhi.aftermathMainCommandList; } #endif // execute and wait on the temporary command list ID3D12CommandQueue* const queue = rhi.computeCommandQueue; rhi.tempCommandList->Close(); ID3D12CommandList* tempCommandListArray[] = { rhi.tempCommandList }; queue->ExecuteCommandLists(ARRAY_LEN(tempCommandListArray), tempCommandListArray); rhi.tempFenceValue++; rhi.tempFence.Signal(queue, rhi.tempFenceValue); rhi.tempCommandListOpen = false; } void WaitForTempCommandList() { rhi.tempFence.WaitOnCPU(rhi.tempFenceValue); if(rhi.tempCommandListOpen) { rhi.tempCommandList->Close(); } D3D(rhi.tempCommandAllocator->Reset()); D3D(rhi.tempCommandList->Reset(rhi.tempCommandAllocator, NULL)); rhi.tempCommandListOpen = true; } void BeginTextureReadback(MappedTexture& mappedTexture, HTexture htexture) { rhi.readback.BeginTextureReadback(mappedTexture, htexture); } void EndTextureReadback() { rhi.readback.EndTextureReadback(); } void WaitUntilDeviceIsIdle() { // direct queue rhi.mainFenceValues[rhi.frameIndex]++; #if RHI_DEBUG_FENCE Sys_DebugPrintf("Signal: %d (WaitUntilDeviceIsIdle)\n", (int)rhi.mainFenceValues[rhi.frameIndex]); Sys_DebugPrintf("Wait: %d (WaitUntilDeviceIsIdle)\n", (int)rhi.mainFenceValues[rhi.frameIndex]); #endif rhi.mainFence.Signal(rhi.mainCommandQueue, rhi.mainFenceValues[rhi.frameIndex]); rhi.mainFence.WaitOnCPU(rhi.mainFenceValues[rhi.frameIndex]); // compute queue rhi.tempFence.WaitOnCPU(rhi.tempFenceValue); // upload queue rhi.upload.fence.WaitOnCPU(rhi.upload.fenceValue); } void SubmitAndContinue() { ASSERT_DR_ENABLED(); Q_assert(rhi.commandList == rhi.mainCommandList); CmdInsertDebugLabel("RHI::SubmitAndWaitOnCPU", 0.8f, 0.8f, 0.8f); rhi.frameBegun = false; D3D(rhi.commandList->Close()); ID3D12CommandList* commandListArray[] = { rhi.commandList }; rhi.mainCommandQueue->ExecuteCommandLists(ARRAY_LEN(commandListArray), commandListArray); const UINT64 currentFenceValue = rhi.mainFenceValues[rhi.frameIndex]; #if RHI_DEBUG_FENCE Sys_DebugPrintf("Signal: %d (SubmitAndWaitOnCPU)\n", (int)currentFenceValue); Sys_DebugPrintf("Wait: %d (SubmitAndWaitOnCPU)\n", (int)currentFenceValue); #endif rhi.mainFence.Signal(rhi.mainCommandQueue, currentFenceValue); rhi.mainFence.WaitOnCPU(currentFenceValue); rhi.mainFenceValues[rhi.frameIndex] = currentFenceValue + 1; D3D(rhi.mainCommandAllocators[rhi.frameIndex]->Reset()); D3D(rhi.commandList->Reset(rhi.mainCommandAllocators[rhi.frameIndex], NULL)); BindDynamicResources(); rhi.commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); rhi.currentRootSignature = RHI_MAKE_NULL_HANDLE(); rhi.frameBegun = true; } uint32_t GetTextureIndexSRV(HTexture htexture) { ASSERT_DR_ENABLED(); Q_assert(!IsNullHandle(htexture)); Texture& texture = rhi.textures.Get(htexture); return texture.srvIndex; } uint32_t GetTextureIndexUAV(HTexture htexture, uint32_t mipIndex) { ASSERT_DR_ENABLED(); Q_assert(!IsNullHandle(htexture)); Texture& texture = rhi.textures.Get(htexture); Q_assert(mipIndex < texture.desc.mipCount); return texture.mips[mipIndex].uavIndex; } uint32_t GetBufferIndexSRV(HBuffer hbuffer) { ASSERT_DR_ENABLED(); Q_assert(!IsNullHandle(hbuffer)); Buffer& buffer = rhi.buffers.Get(hbuffer); return buffer.srvIndex; } uint32_t GetBufferIndexUAV(HBuffer hbuffer) { ASSERT_DR_ENABLED(); Q_assert(!IsNullHandle(hbuffer)); Buffer& buffer = rhi.buffers.Get(hbuffer); return buffer.uavIndex; } uint32_t GetBufferIndexCBV(HBuffer hbuffer) { ASSERT_DR_ENABLED(); Q_assert(!IsNullHandle(hbuffer)); Buffer& buffer = rhi.buffers.Get(hbuffer); return buffer.cbvIndex; } uint32_t GetSamplerIndex(HSampler hsampler) { ASSERT_DR_ENABLED(); Q_assert(!IsNullHandle(hsampler)); const Sampler& sampler = rhi.samplers.Get(hsampler); return sampler.heapIndex; } void CmdBarrierUAV() { ASSERT_DR_ENABLED(); Q_assert(CanWriteCommands()); D3D12_RESOURCE_BARRIER barrier = {}; barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; barrier.UAV.pResource = NULL; rhi.commandList->ResourceBarrier(1, &barrier); } void PrintGPUList() { CreateAdapterList(); ri.Printf(PRINT_ALL, "%s0^7. Default\n", S_COLOR_VAL); for(uint32_t i = 0; i < rhi.gpuCount; ++i) { ri.Printf(PRINT_ALL, "%s%d^7. %s\n", S_COLOR_VAL, (int)i + 1, rhi.gpus[i].name); } } void CmdCreateBLAS(HBuffer* blasBuffer, const BLASDesc& rhiDesc) { ASSERT_DR_ENABLED(); Q_assert(rhi.commandList == rhi.tempCommandList); Q_assert(rhi.tempCommandListOpen); Q_assert(blasBuffer); Q_assert(!IsNullHandle(rhiDesc.vertexBuffer)); Q_assert(!IsNullHandle(rhiDesc.indexBuffer)); Q_assert(rhiDesc.meshCount > 0); Q_assert(rhiDesc.meshes); const D3D12_GPU_VIRTUAL_ADDRESS baseVertexAddress = rhi.buffers.Get(rhiDesc.vertexBuffer).gpuAddress; const D3D12_GPU_VIRTUAL_ADDRESS baseIndexAddress = rhi.buffers.Get(rhiDesc.indexBuffer).gpuAddress; if(rhiDesc.meshCount > rhi.rtGeoDescCount) { const uint32_t meshCount = max(rhiDesc.meshCount, 2 * rhi.rtGeoDescCount); const size_t byteCount = meshCount * sizeof(D3D12_RAYTRACING_GEOMETRY_DESC); rhi.rtGeoDescs = (D3D12_RAYTRACING_GEOMETRY_DESC*)realloc(rhi.rtGeoDescs, byteCount); if(rhi.rtGeoDescs == NULL) { ri.Error(ERR_FATAL, "Failed to allocate %d D3D12_RAYTRACING_GEOMETRY_DESC instances\n", (int)meshCount); } rhi.rtGeoDescCount = rhiDesc.meshCount; } for(uint32_t i = 0; i < rhiDesc.meshCount; ++i) { const BLASMeshDesc& mesh = rhiDesc.meshes[i]; D3D12_RAYTRACING_GEOMETRY_DESC& geoDesc = rhi.rtGeoDescs[i]; geoDesc.Type = D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES; geoDesc.Flags = mesh.isFullyOpaque ? D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE : D3D12_RAYTRACING_GEOMETRY_FLAG_NONE; geoDesc.Triangles.IndexFormat = DXGI_FORMAT_R32_UINT; geoDesc.Triangles.IndexCount = mesh.indexCount; geoDesc.Triangles.IndexBuffer = baseIndexAddress + mesh.firstIndex * sizeof(uint32_t); geoDesc.Triangles.VertexFormat = DXGI_FORMAT_R32G32B32_FLOAT; geoDesc.Triangles.VertexCount = mesh.vertexCount; geoDesc.Triangles.VertexBuffer.StartAddress = baseVertexAddress + mesh.firstVertex * sizeof(vec3_t); geoDesc.Triangles.VertexBuffer.StrideInBytes = sizeof(vec3_t); geoDesc.Triangles.Transform3x4 = NULL; } D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = {}; inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL; inputs.Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_NONE; inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; inputs.NumDescs = rhiDesc.meshCount; inputs.pGeometryDescs = rhi.rtGeoDescs; D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO info = {}; rhi.device->GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &info); if(info.ResultDataMaxSizeInBytes >= UINT64(4ull << 30ull) || info.ScratchDataSizeInBytes >= UINT64(4ull << 30ull)) { ri.Error(ERR_FATAL, "Attempted to create a BLAS larger than 4 GB!\n"); } EnsureBufferIsThisLarge(rhi.raytracingScratchBuffer, "RTAS scratch", ResourceStates::UnorderedAccessBit, (uint32_t)info.ScratchDataSizeInBytes); EnsureBufferIsThisLarge(*blasBuffer, rhiDesc.name, ResourceStates::RaytracingASBit, (uint32_t)info.ResultDataMaxSizeInBytes); // dest + src: D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC rtasDesc = {}; rtasDesc.SourceAccelerationStructureData = 0; rtasDesc.DestAccelerationStructureData = rhi.buffers.Get(*blasBuffer).gpuAddress; rtasDesc.ScratchAccelerationStructureData = rhi.buffers.Get(rhi.raytracingScratchBuffer).gpuAddress; rtasDesc.Inputs = inputs; #if defined(RHI_ENABLE_AFTERMATH) CmdInsertDebugLabel("BLAS: Before build"); #endif rhi.commandList->BuildRaytracingAccelerationStructure(&rtasDesc, 0, NULL); #if defined(RHI_ENABLE_AFTERMATH) CmdInsertDebugLabel("BLAS: After build"); #endif CmdBeginBarrier(); CmdBufferBarrier(*blasBuffer, ResourceStates::UnorderedAccessBit); CmdEndBarrier(); #if defined(RHI_ENABLE_AFTERMATH) CmdInsertDebugLabel("BLAS: After barrier"); #endif } void CmdCreateTLAS(HBuffer* tlasBuffer, const TLASDesc& rhiDesc) { ASSERT_DR_ENABLED(); Q_assert(rhi.commandList == rhi.tempCommandList); Q_assert(rhi.tempCommandListOpen); Q_assert(tlasBuffer != NULL); Q_assert(rhiDesc.instances); Q_assert(rhiDesc.instanceCount > 0); EnsureBufferIsThisLarge(rhi.raytracingInstanceBuffer, "RT TLAS instance", ResourceStates::Common, rhiDesc.instanceCount * sizeof(D3D12_RAYTRACING_INSTANCE_DESC)); D3D12_RAYTRACING_INSTANCE_DESC* const instanceDescs = (D3D12_RAYTRACING_INSTANCE_DESC*)BeginBufferUpload(rhi.raytracingInstanceBuffer); for(uint32_t i = 0; i < rhiDesc.instanceCount; ++i) { const TLASInstanceDesc& rhiInstDesc = rhiDesc.instances[i]; D3D12_RAYTRACING_INSTANCE_DESC instDesc = {}; instDesc.AccelerationStructure = rhi.buffers.Get(rhiInstDesc.blasBuffer).gpuAddress; switch(rhiInstDesc.cullMode) { case CT_FRONT_SIDED: instDesc.Flags = D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE; break; case CT_BACK_SIDED: instDesc.Flags = D3D12_RAYTRACING_INSTANCE_FLAG_NONE; break; default: instDesc.Flags = D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE; break; } instDesc.InstanceContributionToHitGroupIndex = 0; // @TODO: do we care for this? instDesc.InstanceID = rhiInstDesc.instanceId; instDesc.InstanceMask = rhiInstDesc.instanceMask; instDesc.Transform[0][0] = rhiInstDesc.transform[0]; // @TODO: confirm order instDesc.Transform[1][0] = rhiInstDesc.transform[1]; instDesc.Transform[2][0] = rhiInstDesc.transform[2]; instDesc.Transform[0][1] = rhiInstDesc.transform[3]; instDesc.Transform[1][1] = rhiInstDesc.transform[4]; instDesc.Transform[2][1] = rhiInstDesc.transform[5]; instDesc.Transform[0][2] = rhiInstDesc.transform[6]; instDesc.Transform[1][2] = rhiInstDesc.transform[7]; instDesc.Transform[2][2] = rhiInstDesc.transform[8]; instDesc.Transform[0][3] = rhiInstDesc.translation[0]; instDesc.Transform[1][3] = rhiInstDesc.translation[1]; instDesc.Transform[2][3] = rhiInstDesc.translation[2]; memcpy(&instanceDescs[i], &instDesc, sizeof(D3D12_RAYTRACING_INSTANCE_DESC)); } EndBufferUpload(rhi.raytracingInstanceBuffer); // GPU wait for the copy queue to be done executing on the GPU rhi.upload.WaitToStartDrawing(rhi.computeCommandQueue); // InstanceDescs: D3D12_RAYTRACING_INSTANCE_DESC_BYTE_ALIGNMENT D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = {}; inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL; inputs.Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_NONE; inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; inputs.NumDescs = rhiDesc.instanceCount; inputs.InstanceDescs = rhi.buffers.Get(rhi.raytracingInstanceBuffer).gpuAddress; D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO info = {}; rhi.device->GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &info); if(info.ResultDataMaxSizeInBytes >= UINT64(4ull << 30ull) || info.ScratchDataSizeInBytes >= UINT64(4ull << 30ull)) { ri.Error(ERR_FATAL, "Attempted to create a BLAS larger than 4 GB!\n"); } EnsureBufferIsThisLarge(rhi.raytracingScratchBuffer, "RTAS scratch", ResourceStates::UnorderedAccessBit, (uint32_t)info.ScratchDataSizeInBytes); EnsureBufferIsThisLarge(*tlasBuffer, "RT TLAS", ResourceStates::RaytracingASBit, (uint32_t)info.ResultDataMaxSizeInBytes); // dest + src: D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC rtasDesc = {}; rtasDesc.DestAccelerationStructureData = rhi.buffers.Get(*tlasBuffer).gpuAddress; rtasDesc.ScratchAccelerationStructureData = rhi.buffers.Get(rhi.raytracingScratchBuffer).gpuAddress; rtasDesc.Inputs = inputs; #if defined(RHI_ENABLE_AFTERMATH) CmdInsertDebugLabel("TLAS: Before build"); #endif rhi.commandList->BuildRaytracingAccelerationStructure(&rtasDesc, 0, NULL); #if defined(RHI_ENABLE_AFTERMATH) CmdInsertDebugLabel("TLAS: After build"); #endif CmdBeginBarrier(); CmdBufferBarrier(*tlasBuffer, ResourceStates::UnorderedAccessBit); CmdEndBarrier(); #if defined(RHI_ENABLE_AFTERMATH) CmdInsertDebugLabel("TLAS: After barrier"); #endif } } void R_WaitBeforeInputSampling() { RHI::WaitForSwapChain(); RHI::rhi.beforeInputSamplingUS = Sys_Microseconds(); } /* PIX CAPTURE API WOES Never got the PIX programmable capture API to work PIXBeginCapture returns "not implemented" // before include #define USE_PIX 1 // before creating the device PIXLoadLatestWinPixGpuCapturerLibrary(); HRESULT hr = PIXSetTargetWindow(GetActiveWindow()); Check(hr, "PIXSetTargetWindow"); // whenever... PIXCaptureParameters params = {}; params.GpuCaptureParameters.FileName = L"temp.wpix"; HRESULT hr = PIXBeginCapture(0, ¶ms); Check(hr, "PIXBeginCapture"); The legacy API fails as well DXGIGetDebugInterface1 returns "no such interface supported" #include IDXGraphicsAnalysis* graphicsAnalysis; D3D(DXGIGetDebugInterface1(0, IID_PPV_ARGS(&graphicsAnalysis))); */