mirror of
https://bitbucket.org/CPMADevs/cnq3
synced 2024-12-01 16:31:54 +00:00
5923 lines
184 KiB
C++
5923 lines
184 KiB
C++
/*
|
|
===========================================================================
|
|
Copyright (C) 2022-2024 Gian 'myT' Schellenbaum
|
|
|
|
This file is part of Challenge Quake 3 (CNQ3).
|
|
|
|
Challenge Quake 3 is free software; you can redistribute it
|
|
and/or modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the License,
|
|
or (at your option) any later version.
|
|
|
|
Challenge Quake 3 is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with Challenge Quake 3. If not, see <https://www.gnu.org/licenses/>.
|
|
===========================================================================
|
|
*/
|
|
// Direct3D 12 Rendering Hardware Interface
|
|
|
|
|
|
#if defined(_DEBUG)
|
|
#define D3D_DEBUG
|
|
#endif
|
|
#define D3D_AGILITY_SDK
|
|
//#define D3D_GPU_BASED_VALIDATION
|
|
//#define RHI_DEBUG_FENCE
|
|
//#define RHI_ENABLE_NVAPI
|
|
//#define RHI_ENABLE_NVAPI_RT_VALIDATION
|
|
//#define RHI_ENABLE_AFTERMATH
|
|
|
|
#if defined(D3D_DEBUG) && defined(RHI_ENABLE_AFTERMATH)
|
|
#error You can't enable NVIDIA Aftermath when the Direct3D Debug Layer is active.
|
|
#endif
|
|
|
|
|
|
#include "rhi_local.h"
|
|
#include <Windows.h>
|
|
#include "d3d12/d3d12.h"
|
|
#include <dxgi1_6.h>
|
|
#if defined(D3D_DEBUG)
|
|
#include <dxgidebug.h>
|
|
#endif
|
|
#include "d3d12/dxcapi.h"
|
|
#include <dwmapi.h> // for DwmGetCompositionTimingInfo
|
|
#define D3D12MA_D3D12_HEADERS_ALREADY_INCLUDED
|
|
#include "D3D12MemAlloc.h"
|
|
#if defined(RHI_ENABLE_NVAPI)
|
|
#include <nvapi.h>
|
|
#pragma comment(lib, "nvapi64.lib")
|
|
#endif
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
#include <GFSDK_Aftermath.h>
|
|
#pragma comment(lib, "GFSDK_Aftermath_Lib.x64.lib")
|
|
#endif
|
|
#include "../pix/pix3.h"
|
|
#include "../client/cl_imgui.h"
|
|
|
|
|
|
// @TODO: grab from ri.GetNextTargetTimeUS instead
|
|
extern int64_t com_nextTargetTimeUS;
|
|
|
|
|
|
#if defined(D3D_DEBUG) || defined(D3D_AGILITY_SDK)
|
|
extern "C" { __declspec(dllexport) extern const UINT D3D12SDKVersion = D3D12_SDK_VERSION; }
|
|
extern "C" { __declspec(dllexport) extern const char* D3D12SDKPath = u8".\\cnq3\\"; }
|
|
#endif
|
|
|
|
|
|
RHIExport rhie;
|
|
RHIInfo rhiInfo;
|
|
|
|
|
|
#define VENDORID_INVALID 0xDEAD
|
|
#define VENDORID_NVIDIA 0x10DE
|
|
#define VENDORID_INTEL 0x8086
|
|
#define VENDORID_AMD 0x1002
|
|
|
|
#define DXGI_FORMAT_LIST(X) \
|
|
X(UNKNOWN) \
|
|
X(R32G32B32A32_TYPELESS) \
|
|
X(R32G32B32A32_FLOAT) \
|
|
X(R32G32B32A32_UINT) \
|
|
X(R32G32B32A32_SINT) \
|
|
X(R32G32B32_TYPELESS) \
|
|
X(R32G32B32_FLOAT) \
|
|
X(R32G32B32_UINT) \
|
|
X(R32G32B32_SINT) \
|
|
X(R16G16B16A16_TYPELESS) \
|
|
X(R16G16B16A16_FLOAT) \
|
|
X(R16G16B16A16_UNORM) \
|
|
X(R16G16B16A16_UINT) \
|
|
X(R16G16B16A16_SNORM) \
|
|
X(R16G16B16A16_SINT) \
|
|
X(R32G32_TYPELESS) \
|
|
X(R32G32_FLOAT) \
|
|
X(R32G32_UINT) \
|
|
X(R32G32_SINT) \
|
|
X(R32G8X24_TYPELESS) \
|
|
X(D32_FLOAT_S8X24_UINT) \
|
|
X(R32_FLOAT_X8X24_TYPELESS) \
|
|
X(X32_TYPELESS_G8X24_UINT) \
|
|
X(R10G10B10A2_TYPELESS) \
|
|
X(R10G10B10A2_UNORM) \
|
|
X(R10G10B10A2_UINT) \
|
|
X(R11G11B10_FLOAT) \
|
|
X(R8G8B8A8_TYPELESS) \
|
|
X(R8G8B8A8_UNORM) \
|
|
X(R8G8B8A8_UNORM_SRGB) \
|
|
X(R8G8B8A8_UINT) \
|
|
X(R8G8B8A8_SNORM) \
|
|
X(R8G8B8A8_SINT) \
|
|
X(R16G16_TYPELESS) \
|
|
X(R16G16_FLOAT) \
|
|
X(R16G16_UNORM) \
|
|
X(R16G16_UINT) \
|
|
X(R16G16_SNORM) \
|
|
X(R16G16_SINT) \
|
|
X(R32_TYPELESS) \
|
|
X(D32_FLOAT) \
|
|
X(R32_FLOAT) \
|
|
X(R32_UINT) \
|
|
X(R32_SINT) \
|
|
X(R24G8_TYPELESS) \
|
|
X(D24_UNORM_S8_UINT) \
|
|
X(R24_UNORM_X8_TYPELESS) \
|
|
X(X24_TYPELESS_G8_UINT) \
|
|
X(R8G8_TYPELESS) \
|
|
X(R8G8_UNORM) \
|
|
X(R8G8_UINT) \
|
|
X(R8G8_SNORM) \
|
|
X(R8G8_SINT) \
|
|
X(R16_TYPELESS) \
|
|
X(R16_FLOAT) \
|
|
X(D16_UNORM) \
|
|
X(R16_UNORM) \
|
|
X(R16_UINT) \
|
|
X(R16_SNORM) \
|
|
X(R16_SINT) \
|
|
X(R8_TYPELESS) \
|
|
X(R8_UNORM) \
|
|
X(R8_UINT) \
|
|
X(R8_SNORM) \
|
|
X(R8_SINT) \
|
|
X(A8_UNORM) \
|
|
X(R1_UNORM) \
|
|
X(R9G9B9E5_SHAREDEXP) \
|
|
X(R8G8_B8G8_UNORM) \
|
|
X(G8R8_G8B8_UNORM) \
|
|
X(BC1_TYPELESS) \
|
|
X(BC1_UNORM) \
|
|
X(BC1_UNORM_SRGB) \
|
|
X(BC2_TYPELESS) \
|
|
X(BC2_UNORM) \
|
|
X(BC2_UNORM_SRGB) \
|
|
X(BC3_TYPELESS) \
|
|
X(BC3_UNORM) \
|
|
X(BC3_UNORM_SRGB) \
|
|
X(BC4_TYPELESS) \
|
|
X(BC4_UNORM) \
|
|
X(BC4_SNORM) \
|
|
X(BC5_TYPELESS) \
|
|
X(BC5_UNORM) \
|
|
X(BC5_SNORM) \
|
|
X(B5G6R5_UNORM) \
|
|
X(B5G5R5A1_UNORM) \
|
|
X(B8G8R8A8_UNORM) \
|
|
X(B8G8R8X8_UNORM) \
|
|
X(R10G10B10_XR_BIAS_A2_UNORM) \
|
|
X(B8G8R8A8_TYPELESS) \
|
|
X(B8G8R8A8_UNORM_SRGB) \
|
|
X(B8G8R8X8_TYPELESS) \
|
|
X(B8G8R8X8_UNORM_SRGB) \
|
|
X(BC6H_TYPELESS) \
|
|
X(BC6H_UF16) \
|
|
X(BC6H_SF16) \
|
|
X(BC7_TYPELESS) \
|
|
X(BC7_UNORM) \
|
|
X(BC7_UNORM_SRGB) \
|
|
X(AYUV) \
|
|
X(Y410) \
|
|
X(Y416) \
|
|
X(NV12) \
|
|
X(P010) \
|
|
X(P016) \
|
|
X(420_OPAQUE) \
|
|
X(YUY2) \
|
|
X(Y210) \
|
|
X(Y216) \
|
|
X(NV11) \
|
|
X(AI44) \
|
|
X(IA44) \
|
|
X(P8) \
|
|
X(A8P8) \
|
|
X(B4G4R4A4_UNORM) \
|
|
X(P208) \
|
|
X(V208) \
|
|
X(V408) \
|
|
X(SAMPLER_FEEDBACK_MIN_MIP_OPAQUE) \
|
|
X(SAMPLER_FEEDBACK_MIP_REGION_USED_OPAQUE)
|
|
|
|
|
|
namespace RHI
|
|
{
|
|
// D3D_FEATURE_LEVEL_12_0 is the minimum to ensure at least Resource Binding Tier 2:
|
|
// - unlimited SRVs
|
|
// - 14 CBVs
|
|
// - 64 UAVs
|
|
// - 2048 samplers
|
|
static const D3D_FEATURE_LEVEL FeatureLevel = D3D_FEATURE_LEVEL_12_0;
|
|
|
|
struct ResourceType
|
|
{
|
|
enum Id
|
|
{
|
|
// @NOTE: a valid type never being 0 means we can discard 0 handles right away
|
|
Invalid,
|
|
Buffer,
|
|
Texture,
|
|
Sampler,
|
|
RootSignature,
|
|
DescriptorTable,
|
|
Pipeline,
|
|
DurationQuery,
|
|
Shader,
|
|
Count
|
|
};
|
|
};
|
|
|
|
#define D3D_RESOURCE_LIST(R) \
|
|
R(CommandQueue, "command queue") \
|
|
R(CommandAllocator, "command allocator") \
|
|
R(PipelineState, "pipeline state") \
|
|
R(CommandList, "command list") \
|
|
R(Fence, "fence") \
|
|
R(RootSignature, "root signature") \
|
|
R(DescriptorHeap, "descriptor heap") \
|
|
R(Heap, "heap") \
|
|
R(QueryHeap, "query heap") \
|
|
R(Texture, "texture") \
|
|
R(Buffer, "buffer") \
|
|
R(Sampler, "samplers")
|
|
|
|
#define R(Enum, Name) Enum,
|
|
struct D3DResourceType
|
|
{
|
|
enum Id
|
|
{
|
|
D3D_RESOURCE_LIST(R)
|
|
Count
|
|
};
|
|
};
|
|
#undef R
|
|
|
|
#define R(Enum, Name) Name,
|
|
static const char* D3DResourceNames[] =
|
|
{
|
|
D3D_RESOURCE_LIST(R)
|
|
""
|
|
};
|
|
#undef R
|
|
|
|
#undef D3D_RESOURCE_LIST
|
|
|
|
struct Buffer
|
|
{
|
|
BufferDesc desc;
|
|
D3D12MA::Allocation* allocation;
|
|
ID3D12Resource* buffer;
|
|
D3D12_GPU_VIRTUAL_ADDRESS gpuAddress;
|
|
D3D12_RESOURCE_STATES currentState;
|
|
uint32_t cbvIndex;
|
|
uint32_t srvIndex;
|
|
uint32_t uavIndex;
|
|
bool mapped;
|
|
bool uploading;
|
|
UINT64 uploadByteOffset;
|
|
bool shortLifeTime = false;
|
|
};
|
|
|
|
struct Texture
|
|
{
|
|
TextureDesc desc;
|
|
D3D12MA::Allocation* allocation;
|
|
ID3D12Resource* texture;
|
|
uint32_t srvIndex;
|
|
uint32_t rtvIndex;
|
|
uint32_t dsvIndex;
|
|
D3D12_RESOURCE_STATES currentState;
|
|
struct Mip
|
|
{
|
|
uint32_t uavIndex;
|
|
}
|
|
mips[MaxTextureMips];
|
|
bool uploading;
|
|
uint32_t uploadByteOffset;
|
|
bool shortLifeTime = false;
|
|
};
|
|
|
|
struct RootSignature
|
|
{
|
|
struct PerStageConstants
|
|
{
|
|
UINT parameterIndex;
|
|
};
|
|
RootSignatureDesc desc;
|
|
ID3D12RootSignature* signature;
|
|
PerStageConstants constants[ShaderStage::Count];
|
|
UINT genericTableIndex;
|
|
UINT samplerTableIndex;
|
|
UINT genericDescCount;
|
|
UINT samplerDescCount;
|
|
bool shortLifeTime = false;
|
|
};
|
|
|
|
struct DescriptorTable
|
|
{
|
|
ID3D12DescriptorHeap* genericHeap; // SRV, CBV, UAV
|
|
ID3D12DescriptorHeap* samplerHeap;
|
|
bool shortLifeTime = false;
|
|
};
|
|
|
|
struct Pipeline
|
|
{
|
|
GraphicsPipelineDesc graphicsDesc;
|
|
ComputePipelineDesc computeDesc;
|
|
ID3D12PipelineState* pso = NULL;
|
|
PipelineType::Id type = PipelineType::Graphics;
|
|
bool shortLifeTime = false;
|
|
};
|
|
|
|
struct Shader
|
|
{
|
|
IDxcBlob* blob = NULL;
|
|
bool shortLifeTime = false;
|
|
};
|
|
|
|
struct Sampler
|
|
{
|
|
SamplerDesc desc;
|
|
uint32_t heapIndex = UINT32_MAX;
|
|
bool shortLifeTime = true;
|
|
};
|
|
|
|
struct QueryState
|
|
{
|
|
enum Id
|
|
{
|
|
Free, // ready to be (re-)used
|
|
Begun, // first call done, not resolved yet
|
|
Ended, // second call done, not resolved yet
|
|
Count
|
|
};
|
|
};
|
|
|
|
struct Fence
|
|
{
|
|
void Create(UINT64 value, const char* name);
|
|
void Signal(ID3D12CommandQueue* queue, UINT64 value);
|
|
void WaitOnCPU(UINT64 value);
|
|
void WaitOnGPU(ID3D12CommandQueue* queue, UINT64 value);
|
|
bool HasCompleted(UINT64 value);
|
|
void Release();
|
|
|
|
ID3D12Fence* fence;
|
|
HANDLE event;
|
|
};
|
|
|
|
struct UploadManager
|
|
{
|
|
void Create();
|
|
void Release();
|
|
uint8_t* BeginBufferUpload(HBuffer buffer);
|
|
void EndBufferUpload(HBuffer buffer);
|
|
void BeginTextureUpload(MappedTexture& mappedTexture, HTexture texture);
|
|
void EndTextureUpload();
|
|
void WaitToStartDrawing(ID3D12CommandQueue* commandQueue);
|
|
|
|
ID3D12CommandQueue* commandQueue;
|
|
ID3D12CommandAllocator* commandAllocator;
|
|
ID3D12GraphicsCommandList* commandList;
|
|
|
|
HBuffer uploadHBuffer;
|
|
uint32_t bufferByteCount;
|
|
uint32_t bufferByteOffset;
|
|
uint8_t* mappedBuffer;
|
|
|
|
Fence fence;
|
|
UINT64 fenceValue;
|
|
|
|
HTexture currentTexture;
|
|
int bufferUploadCounter;
|
|
bool multiBufferUpload;
|
|
bool needsRewind;
|
|
int batchTextureCount;
|
|
int batchBufferCount;
|
|
|
|
private:
|
|
void WaitToStartUploading(uint32_t uploadByteCount);
|
|
void EndOfBufferReached();
|
|
};
|
|
|
|
struct ReadbackManager
|
|
{
|
|
void Create();
|
|
void Release();
|
|
void ResizeIfNeeded();
|
|
void BeginTextureReadback(MappedTexture& mappedTexture, HTexture texture);
|
|
void EndTextureReadback();
|
|
|
|
ID3D12CommandAllocator* readbackCommandAllocator;
|
|
ID3D12GraphicsCommandList* readbackCommandList;
|
|
HBuffer readbackBuffer;
|
|
Fence readbackFence;
|
|
UINT64 readbackFenceValue;
|
|
uint32_t bufferByteCount;
|
|
};
|
|
|
|
struct DescriptorHeap
|
|
{
|
|
void Create(D3D12_DESCRIPTOR_HEAP_TYPE type, uint32_t size, uint16_t* freeListItems, const char* name);
|
|
void Release();
|
|
uint32_t Allocate();
|
|
void Free(uint32_t index);
|
|
D3D12_CPU_DESCRIPTOR_HANDLE GetCPUHandle(uint32_t index);
|
|
uint32_t CreateSRV(ID3D12Resource* resource, D3D12_SHADER_RESOURCE_VIEW_DESC& desc);
|
|
uint32_t CreateUAV(ID3D12Resource* resource, D3D12_UNORDERED_ACCESS_VIEW_DESC& desc);
|
|
uint32_t CreateRTV(ID3D12Resource* resource, D3D12_RENDER_TARGET_VIEW_DESC& desc);
|
|
uint32_t CreateDSV(ID3D12Resource* resource, D3D12_DEPTH_STENCIL_VIEW_DESC& desc);
|
|
uint32_t CreateCBV(D3D12_CONSTANT_BUFFER_VIEW_DESC& desc);
|
|
uint32_t CreateSampler(D3D12_SAMPLER_DESC& desc);
|
|
|
|
StaticFreeList<uint16_t, InvalidDescriptorIndex> freeList;
|
|
ID3D12DescriptorHeap* heap;
|
|
D3D12_CPU_DESCRIPTOR_HANDLE startAddress;
|
|
UINT descriptorSize;
|
|
D3D12_DESCRIPTOR_HEAP_TYPE type;
|
|
};
|
|
|
|
struct DurationQuery
|
|
{
|
|
QueryState::Id state;
|
|
};
|
|
|
|
struct FrameQueries
|
|
{
|
|
DurationQuery durationQueries[MaxDurationQueries];
|
|
uint32_t durationQueryCount;
|
|
};
|
|
|
|
struct ResolvedQueries
|
|
{
|
|
uint32_t gpuMicroSeconds[MaxDurationQueries];
|
|
uint32_t durationQueryCount;
|
|
};
|
|
|
|
struct PIX
|
|
{
|
|
typedef void(WINAPI* BeginEventOnCommandListPtr)(ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString);
|
|
typedef void(WINAPI* EndEventOnCommandListPtr)(ID3D12GraphicsCommandList* commandList);
|
|
typedef void(WINAPI* SetMarkerOnCommandListPtr)(ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString);
|
|
|
|
BeginEventOnCommandListPtr BeginEventOnCommandList;
|
|
EndEventOnCommandListPtr EndEventOnCommandList;
|
|
SetMarkerOnCommandListPtr SetMarkerOnCommandList;
|
|
|
|
HMODULE module;
|
|
bool canBeginAndEnd;
|
|
};
|
|
|
|
struct DynamicResources
|
|
{
|
|
struct DescriptorRange
|
|
{
|
|
void Init(D3D12_DESCRIPTOR_RANGE_TYPE type, uint32_t start, uint32_t count);
|
|
uint32_t Allocate(bool slotAtIndex0 = false);
|
|
|
|
D3D12_DESCRIPTOR_RANGE_TYPE type;
|
|
uint32_t start;
|
|
uint32_t count;
|
|
uint32_t index;
|
|
bool reservedSlotUsed;
|
|
};
|
|
|
|
static const uint32_t MaxDescriptorsSRV = 65536;
|
|
static const uint32_t MaxDescriptorsUAV = 65536;
|
|
static const uint32_t MaxDescriptorsCBV = 65536;
|
|
static const uint32_t MaxDescriptorsSamplers = 1024;
|
|
static const uint32_t MaxDescriptorsGeneric = MaxDescriptorsSRV + MaxDescriptorsUAV + MaxDescriptorsCBV;
|
|
|
|
ID3D12RootSignature* rootSignature;
|
|
ID3D12DescriptorHeap* genericDescriptorHeap; // CPU write, GPU read
|
|
ID3D12DescriptorHeap* samplerDescriptorHeap; // CPU write, GPU read
|
|
ID3D12DescriptorHeap* genericCPUDescriptorHeap; // CPU read/write, for UAV clears and the like
|
|
DescriptorRange srvIndex;
|
|
DescriptorRange uavIndex;
|
|
DescriptorRange cbvIndex;
|
|
DescriptorRange samplerIndex;
|
|
};
|
|
|
|
struct BufferBarrier
|
|
{
|
|
BufferBarrier() = default;
|
|
BufferBarrier(HBuffer buffer_, ResourceStates::Flags newState_)
|
|
{
|
|
buffer = buffer_;
|
|
newState = newState_;
|
|
}
|
|
|
|
HBuffer buffer = RHI_MAKE_NULL_HANDLE();
|
|
ResourceStates::Flags newState = ResourceStates::Common;
|
|
};
|
|
|
|
struct TextureBarrier
|
|
{
|
|
TextureBarrier() = default;
|
|
TextureBarrier(HTexture texture_, ResourceStates::Flags newState_)
|
|
{
|
|
texture = texture_;
|
|
newState = newState_;
|
|
}
|
|
|
|
HTexture texture = RHI_MAKE_NULL_HANDLE();
|
|
ResourceStates::Flags newState = ResourceStates::Common;
|
|
};
|
|
|
|
struct GPU
|
|
{
|
|
char name[256];
|
|
LUID uniqueId;
|
|
};
|
|
|
|
struct BufferToDelete
|
|
{
|
|
HBuffer buffer;
|
|
uint32_t beginFrameCounter;
|
|
};
|
|
|
|
struct RHIPrivate
|
|
{
|
|
bool initialized;
|
|
|
|
ID3D12Debug* debug; // can be NULL
|
|
ID3D12InfoQueue* infoQueue; // can be NULL
|
|
#if defined(D3D_DEBUG)
|
|
IDXGIInfoQueue* dxgiInfoQueue; // can be NULL
|
|
IDXGIFactory2* factory;
|
|
#else
|
|
IDXGIFactory1* factory;
|
|
#endif
|
|
IDXGIAdapter1* adapter;
|
|
ID3D12Device5* device;
|
|
D3D12MA::Allocator* allocator;
|
|
D3D12MA::Pool* umaPool; // only non-NULL when using a cache-coherent UMA adapter
|
|
ID3D12CommandQueue* mainCommandQueue;
|
|
ID3D12CommandQueue* computeCommandQueue;
|
|
IDXGISwapChain3* swapChain;
|
|
HTexture renderTargets[FrameCount];
|
|
ID3D12CommandAllocator* mainCommandAllocators[FrameCount];
|
|
ID3D12GraphicsCommandList6* mainCommandList;
|
|
ID3D12CommandAllocator* tempCommandAllocator;
|
|
ID3D12GraphicsCommandList6* tempCommandList;
|
|
bool tempCommandListOpen;
|
|
ID3D12GraphicsCommandList6* commandList; // not owned, don't release it!
|
|
uint32_t swapChainBufferCount;
|
|
uint32_t renderFrameCount;
|
|
HANDLE frameLatencyWaitableObject;
|
|
bool frameLatencyWaitNeeded;
|
|
UINT frameIndex;
|
|
UINT swapChainBufferIndex;
|
|
Fence mainFence;
|
|
UINT64 mainFenceValues[FrameCount];
|
|
Fence tempFence;
|
|
UINT64 tempFenceValue;
|
|
ID3D12QueryHeap* timeStampHeaps[FrameCount];
|
|
HBuffer timeStampBuffers[FrameCount];
|
|
uint32_t frameDurationQueryIndex;
|
|
HRootSignature currentRootSignature;
|
|
bool isTearingSupported;
|
|
bool vsync;
|
|
bool frameBegun;
|
|
bool baseVRSSupport;
|
|
bool extendedVRSSupport;
|
|
bool useDynamicResources;
|
|
DynamicResources dynamicResources;
|
|
ID3D12CommandSignature* indirectDispatchSignature;
|
|
UINT vendorId;
|
|
char umdVersionString[64];
|
|
uint16_t umdVersionSplit[4];
|
|
uint64_t umdVersion;
|
|
|
|
HMODULE dxcModule;
|
|
HMODULE dxilModule;
|
|
IDxcUtils* dxcUtils;
|
|
IDxcCompiler3* dxcCompiler;
|
|
|
|
uint16_t descriptorFreeListData[MaxCPUDescriptors];
|
|
DescriptorHeap descHeapGeneric;
|
|
DescriptorHeap descHeapSamplers;
|
|
DescriptorHeap descHeapRTVs;
|
|
DescriptorHeap descHeapDSVs;
|
|
|
|
#define POOL(Type, Size) StaticPool<Type, H##Type, ResourceType::Type, Size>
|
|
POOL(Buffer, 128) buffers;
|
|
POOL(Texture, MAX_DRAWIMAGES * 2) textures;
|
|
POOL(RootSignature, 64) rootSignatures;
|
|
POOL(DescriptorTable, 64) descriptorTables;
|
|
POOL(Pipeline, 256) pipelines;
|
|
POOL(Shader, 16) shaders;
|
|
POOL(Sampler, 128) samplers;
|
|
#undef POOL
|
|
|
|
#define DESTROY_POOL_LIST(POOL) \
|
|
POOL(buffers, DestroyBuffer) \
|
|
POOL(textures, DestroyTexture) \
|
|
POOL(rootSignatures, DestroyRootSignature) \
|
|
POOL(descriptorTables, DestroyDescriptorTable) \
|
|
POOL(pipelines, DestroyPipeline) \
|
|
POOL(shaders, DestroyShader) \
|
|
POOL(samplers, DestroySampler)
|
|
|
|
// null resources, no manual clean-up needed
|
|
HTexture nullTexture; // SRV
|
|
HTexture nullRWTexture; // UAV
|
|
HBuffer nullBuffer; // CBV
|
|
HBuffer nullRWBuffer; // UAV
|
|
HSampler nullSampler;
|
|
|
|
byte persStringData[64 << 10];
|
|
byte tempStringData[64 << 10];
|
|
char adapterName[256];
|
|
LinearAllocator persStringAllocator;
|
|
LinearAllocator tempStringAllocator;
|
|
UploadManager upload;
|
|
ReadbackManager readback;
|
|
StaticUnorderedArray<HTexture, MAX_DRAWIMAGES> texturesToTransition;
|
|
StaticUnorderedArray<HBuffer, 64> buffersToTransition;
|
|
StaticUnorderedArray<BufferToDelete, 64> buffersToDelete;
|
|
FrameQueries frameQueries[FrameCount];
|
|
ResolvedQueries resolvedQueries;
|
|
PIX pix;
|
|
int64_t beforeInputSamplingUS;
|
|
int64_t beforeRenderingUS;
|
|
GPU gpus[16];
|
|
uint32_t gpuCount;
|
|
HBuffer raytracingScratchBuffer;
|
|
HBuffer raytracingInstanceBuffer;
|
|
uint32_t beginFrameCounter;
|
|
D3D12_RAYTRACING_GEOMETRY_DESC* rtGeoDescs;
|
|
uint32_t rtGeoDescCount;
|
|
|
|
// immediate-mode barrier API
|
|
TextureBarrier textureBarriers[64];
|
|
BufferBarrier bufferBarriers[64];
|
|
uint32_t textureBarrierCount;
|
|
uint32_t bufferBarrierCount;
|
|
ID3D12GraphicsCommandList* barrierCommandList;
|
|
bool barrierOpen;
|
|
|
|
// NVIDIA NVAPI
|
|
#if defined(RHI_ENABLE_NVAPI)
|
|
bool nvapiActive;
|
|
#endif
|
|
|
|
// NVIDIA Aftermath
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
struct AftermathMarker
|
|
{
|
|
char string[64];
|
|
};
|
|
AftermathMarker aftermathMarkers[64]; // stack: markers can be nested
|
|
uint32_t aftermathMarkerDepth = 0;
|
|
GFSDK_Aftermath_ContextHandle aftermathMainCommandList;
|
|
GFSDK_Aftermath_ContextHandle aftermathTempCommandList;
|
|
GFSDK_Aftermath_ContextHandle aftermathUploadCommandList;
|
|
GFSDK_Aftermath_ContextHandle aftermathCommandList; // active command list
|
|
bool aftermathActive;
|
|
#endif
|
|
};
|
|
|
|
static RHIPrivate rhi;
|
|
|
|
#define COM_RELEASE(p) do { if(p) { p->Release(); p = NULL; } } while((void)0,0)
|
|
#define COM_RELEASE_ARRAY(a) do { for(int i = 0; i < ARRAY_LEN(a); ++i) { COM_RELEASE(a[i]); } } while((void)0,0)
|
|
|
|
#define D3D(Exp) Check((Exp), #Exp)
|
|
|
|
#if defined(near)
|
|
# undef near
|
|
#endif
|
|
|
|
#if defined(far)
|
|
# undef far
|
|
#endif
|
|
|
|
#if !defined(D3DDDIERR_DEVICEREMOVED)
|
|
# define D3DDDIERR_DEVICEREMOVED ((HRESULT)0x88760870L)
|
|
#endif
|
|
|
|
#define ASSERT_DR_ENABLED() ASSERT_OR_DIE(rhi.useDynamicResources, "RHI API requires DR on")
|
|
#define ASSERT_DR_DISABLED() ASSERT_OR_DIE(!rhi.useDynamicResources, "RHI API requires DR off")
|
|
|
|
static const char* GetSystemErrorString(HRESULT hr)
|
|
{
|
|
// FormatMessage might not always give us the string we want but that's ok,
|
|
// we always print the original error code anyhow
|
|
static char systemErrorStr[1024];
|
|
const DWORD written = FormatMessageA(
|
|
FORMAT_MESSAGE_FROM_SYSTEM, NULL, (DWORD)hr, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
|
|
systemErrorStr, sizeof(systemErrorStr) - 1, NULL);
|
|
if(written == 0)
|
|
{
|
|
// we have nothing valid
|
|
Q_strncpyz(systemErrorStr, "???", sizeof(systemErrorStr));
|
|
}
|
|
else
|
|
{
|
|
// remove the trailing whitespace
|
|
char* s = systemErrorStr + strlen(systemErrorStr) - 1;
|
|
while(s >= systemErrorStr)
|
|
{
|
|
if(*s == '\r' || *s == '\n' || *s == '\t' || *s == ' ')
|
|
{
|
|
*s-- = '\0';
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return systemErrorStr;
|
|
}
|
|
|
|
static bool Check(HRESULT hr, const char* function)
|
|
{
|
|
if(SUCCEEDED(hr))
|
|
{
|
|
return true;
|
|
}
|
|
|
|
// fatal error mode always on for now
|
|
ri.Error(ERR_FATAL, "'%s' failed with code 0x%08X (%s)\n", function, (unsigned int)hr, GetSystemErrorString(hr));
|
|
return false;
|
|
}
|
|
|
|
static const char* GetUTF8String(const WCHAR* wideStr, const char* defaultUTF8Str)
|
|
{
|
|
static char utf8Str[256];
|
|
const char* utf8StrPtr = defaultUTF8Str;
|
|
if(WideCharToMultiByte(CP_UTF8, 0, wideStr, -1, utf8Str, sizeof(utf8Str), NULL, NULL) > 0)
|
|
{
|
|
utf8StrPtr = utf8Str;
|
|
}
|
|
|
|
return utf8StrPtr;
|
|
}
|
|
|
|
static const WCHAR* GetWideString(const char* utf8Str, const WCHAR* defaultWideStr)
|
|
{
|
|
static WCHAR wideStr[256];
|
|
const WCHAR* wideStrPtr = defaultWideStr;
|
|
if(MultiByteToWideChar(CP_UTF8, 0, utf8Str, -1, wideStr, ARRAY_LEN(wideStr)) > 0)
|
|
{
|
|
wideStrPtr = wideStr;
|
|
}
|
|
|
|
return wideStrPtr;
|
|
}
|
|
|
|
static void SetDebugName(ID3D12DeviceChild* resource, const char* resourceName, D3DResourceType::Id resourceType)
|
|
{
|
|
if(resourceName == NULL || (uint32_t)resourceType >= D3DResourceType::Count)
|
|
{
|
|
return;
|
|
}
|
|
|
|
const char* const name = va("%s %s", resourceName, D3DResourceNames[resourceType]);
|
|
|
|
// ID3D12Object::SetName is a Unicode wrapper for
|
|
// ID3D12Object::SetPrivateData with WKPDID_D3DDebugObjectNameW
|
|
// it was good enough for RenderDoc and PIX, but not Nsight
|
|
//resource->SetPrivateData(WKPDID_D3DDebugObjectName, strlen(name), name);
|
|
resource->SetName(GetWideString(name, L"???"));
|
|
}
|
|
|
|
static uint32_t GetBytesPerPixel(TextureFormat::Id format)
|
|
{
|
|
switch(format)
|
|
{
|
|
case TextureFormat::R32G32B32A32_Float:
|
|
return 16;
|
|
case TextureFormat::R16G16B16A16_UNorm:
|
|
case TextureFormat::R16G16B16A16_Float:
|
|
case TextureFormat::R32G32_Float:
|
|
case TextureFormat::R32G32_UInt:
|
|
return 8;
|
|
case TextureFormat::R8G8B8A8_UNorm:
|
|
case TextureFormat::Depth32_Float:
|
|
case TextureFormat::Depth24_Stencil8:
|
|
case TextureFormat::R10G10B10A2_UNorm:
|
|
case TextureFormat::R32_UInt:
|
|
case TextureFormat::R16G16_SNorm:
|
|
case TextureFormat::R16G16_Float:
|
|
return 4;
|
|
case TextureFormat::R16_UInt:
|
|
case TextureFormat::R16_Float:
|
|
case TextureFormat::R8G8_UNorm:
|
|
return 2;
|
|
case TextureFormat::R8_UNorm:
|
|
return 1;
|
|
default:
|
|
Q_assert(!"Unsupported texture format");
|
|
return 4;
|
|
}
|
|
}
|
|
|
|
static ID3D12DescriptorHeap* CreateDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE type, UINT size, bool shaderVisible, const char* name)
|
|
{
|
|
if(size == 0)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
ID3D12DescriptorHeap* heap;
|
|
D3D12_DESCRIPTOR_HEAP_DESC heapDesc = { 0 };
|
|
heapDesc.Type = type;
|
|
heapDesc.Flags = shaderVisible ? D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE : D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
|
|
heapDesc.NumDescriptors = size;
|
|
heapDesc.NodeMask = 0;
|
|
D3D(rhi.device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&heap)));
|
|
SetDebugName(heap, name, D3DResourceType::DescriptorHeap);
|
|
|
|
return heap;
|
|
}
|
|
|
|
static uint32_t GetReadbackTextureByteCount()
|
|
{
|
|
// we base the resolution on the render targets, not the swap chain images
|
|
// this allows us to e.g. capture videos at 4K while displaying a 720p window
|
|
D3D12_RESOURCE_DESC textureDesc = {};
|
|
textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
|
textureDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
|
|
textureDesc.Width = glConfig.vidWidth;
|
|
textureDesc.Height = glConfig.vidHeight;
|
|
textureDesc.DepthOrArraySize = 1;
|
|
textureDesc.MipLevels = 1;
|
|
textureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
textureDesc.SampleDesc.Count = 1;
|
|
textureDesc.SampleDesc.Quality = 0;
|
|
textureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
|
textureDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
|
|
|
|
D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout;
|
|
rhi.device->GetCopyableFootprints(&textureDesc, 0, 1, 0, &layout, NULL, NULL, NULL);
|
|
const uint32_t byteCount = (uint32_t)(layout.Footprint.RowPitch * layout.Footprint.Height);
|
|
|
|
return byteCount;
|
|
}
|
|
|
|
void Fence::Create(UINT64 value, const char* name)
|
|
{
|
|
D3D(rhi.device->CreateFence(value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&fence)));
|
|
SetDebugName(fence, name, D3DResourceType::Fence);
|
|
|
|
event = CreateEvent(NULL, FALSE, FALSE, NULL);
|
|
if(event == NULL)
|
|
{
|
|
Check(HRESULT_FROM_WIN32(GetLastError()), "CreateEvent");
|
|
}
|
|
}
|
|
|
|
void Fence::Signal(ID3D12CommandQueue* queue, UINT64 value)
|
|
{
|
|
D3D(queue->Signal(fence, value));
|
|
}
|
|
|
|
void Fence::WaitOnCPU(UINT64 value)
|
|
{
|
|
if(fence->GetCompletedValue() < value)
|
|
{
|
|
D3D(fence->SetEventOnCompletion(value, event));
|
|
WaitForSingleObjectEx(event, INFINITE, FALSE);
|
|
}
|
|
}
|
|
|
|
void Fence::WaitOnGPU(ID3D12CommandQueue* queue, UINT64 value)
|
|
{
|
|
D3D(queue->Wait(fence, value));
|
|
}
|
|
|
|
bool Fence::HasCompleted(UINT64 value)
|
|
{
|
|
return fence->GetCompletedValue() >= value;
|
|
}
|
|
|
|
void Fence::Release()
|
|
{
|
|
CloseHandle(event);
|
|
event = NULL;
|
|
COM_RELEASE(fence);
|
|
}
|
|
|
|
void UploadManager::Create()
|
|
{
|
|
BufferDesc bufferDesc("upload", 128 << 20, ResourceStates::CopyDestinationBit);
|
|
bufferDesc.memoryUsage = MemoryUsage::Upload;
|
|
uploadHBuffer = CreateBuffer(bufferDesc);
|
|
bufferByteCount = bufferDesc.byteCount;
|
|
bufferByteOffset = 0;
|
|
mappedBuffer = MapBuffer(uploadHBuffer);
|
|
|
|
D3D12_COMMAND_QUEUE_DESC queueDesc = { 0 };
|
|
queueDesc.Type = D3D12_COMMAND_LIST_TYPE_COPY;
|
|
queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
|
|
queueDesc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
|
|
queueDesc.NodeMask = 0;
|
|
D3D(rhi.device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&commandQueue)));
|
|
SetDebugName(commandQueue, "upload", D3DResourceType::CommandQueue);
|
|
|
|
D3D(rhi.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COPY, IID_PPV_ARGS(&commandAllocator)));
|
|
SetDebugName(commandAllocator, "upload", D3DResourceType::CommandAllocator);
|
|
|
|
D3D(rhi.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COPY, commandAllocator, NULL, IID_PPV_ARGS(&commandList)));
|
|
SetDebugName(commandList, "upload", D3DResourceType::CommandList);
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
if(rhi.aftermathActive)
|
|
{
|
|
// @NOTE: Aftermath context creation must happen on an opened command list
|
|
Q_assert(commandList != NULL);
|
|
const GFSDK_Aftermath_Result result = GFSDK_Aftermath_DX12_CreateContextHandle(commandList, &rhi.aftermathUploadCommandList);
|
|
Q_assert(result == GFSDK_Aftermath_Result_Success);
|
|
}
|
|
#endif
|
|
D3D(commandList->Close());
|
|
|
|
fence.Create(0, "upload");
|
|
fenceValue = 0;
|
|
|
|
currentTexture = RHI_MAKE_NULL_HANDLE();
|
|
|
|
bufferUploadCounter = 0;
|
|
multiBufferUpload = false;
|
|
needsRewind = false;
|
|
batchTextureCount = 0;
|
|
batchBufferCount = 0;
|
|
}
|
|
|
|
void UploadManager::Release()
|
|
{
|
|
UnmapBuffer(uploadHBuffer);
|
|
fence.Release();
|
|
COM_RELEASE(commandQueue);
|
|
COM_RELEASE(commandList);
|
|
COM_RELEASE(commandAllocator);
|
|
}
|
|
|
|
uint8_t* UploadManager::BeginBufferUpload(HBuffer userHBuffer)
|
|
{
|
|
Q_assert(bufferUploadCounter >= 0);
|
|
bufferUploadCounter++;
|
|
if(bufferUploadCounter > 1)
|
|
{
|
|
multiBufferUpload = true;
|
|
}
|
|
|
|
Buffer& userBuffer = rhi.buffers.Get(userHBuffer);
|
|
Q_assert(!userBuffer.uploading);
|
|
|
|
uint8_t* mapped = NULL;
|
|
Q_assert(userBuffer.desc.memoryUsage != MemoryUsage::Readback);
|
|
if(userBuffer.desc.memoryUsage == MemoryUsage::GPU &&
|
|
rhi.umaPool == NULL)
|
|
{
|
|
const uint32_t uploadByteCount = userBuffer.desc.byteCount;
|
|
WaitToStartUploading(uploadByteCount);
|
|
|
|
mapped = mappedBuffer + bufferByteOffset;
|
|
userBuffer.uploadByteOffset = bufferByteOffset;
|
|
|
|
bufferByteOffset = AlignUp<uint32_t>(bufferByteOffset + uploadByteCount, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
|
|
|
|
if(multiBufferUpload)
|
|
{
|
|
needsRewind = true;
|
|
}
|
|
batchBufferCount++;
|
|
}
|
|
else
|
|
{
|
|
mapped = (uint8_t*)MapBuffer(userHBuffer);
|
|
Q_assert(mapped != NULL);
|
|
}
|
|
|
|
userBuffer.uploading = true;
|
|
|
|
return mapped;
|
|
}
|
|
|
|
void UploadManager::EndBufferUpload(HBuffer userHBuffer)
|
|
{
|
|
bufferUploadCounter--;
|
|
Q_assert(bufferUploadCounter >= 0);
|
|
|
|
Buffer& userBuffer = rhi.buffers.Get(userHBuffer);
|
|
Q_assert(userBuffer.uploading);
|
|
|
|
Buffer& uploadBuffer = rhi.buffers.Get(uploadHBuffer);
|
|
|
|
if(!userBuffer.mapped)
|
|
{
|
|
D3D(commandList->Reset(commandAllocator, NULL));
|
|
|
|
const UINT64 byteCount = min(userBuffer.desc.byteCount, uploadBuffer.desc.byteCount);
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
const char* const marker = va("Upload: CopyBufferRegion -> %s (%s)", userBuffer.desc.name, Com_FormatBytes(byteCount));
|
|
const GFSDK_Aftermath_Result result = GFSDK_Aftermath_SetEventMarker(rhi.aftermathUploadCommandList, marker, strlen(marker) + 1);
|
|
Q_assert(result == GFSDK_Aftermath_Result_Success);
|
|
#endif
|
|
commandList->CopyBufferRegion(userBuffer.buffer, 0, uploadBuffer.buffer, userBuffer.uploadByteOffset, byteCount);
|
|
|
|
ID3D12CommandList* commandLists[] = { commandList };
|
|
D3D(commandList->Close());
|
|
commandQueue->ExecuteCommandLists(ARRAY_LEN(commandLists), commandLists);
|
|
fenceValue++;
|
|
commandQueue->Signal(fence.fence, fenceValue);
|
|
}
|
|
else
|
|
{
|
|
UnmapBuffer(userHBuffer);
|
|
}
|
|
|
|
userBuffer.uploading = false;
|
|
|
|
if(bufferUploadCounter == 0 && multiBufferUpload)
|
|
{
|
|
if(needsRewind)
|
|
{
|
|
EndOfBufferReached();
|
|
needsRewind = false;
|
|
}
|
|
multiBufferUpload = false;
|
|
}
|
|
}
|
|
|
|
void UploadManager::BeginTextureUpload(MappedTexture& mappedTexture, HTexture htexture)
|
|
{
|
|
Q_assert(IsNullHandle(currentTexture));
|
|
|
|
Texture& texture = rhi.textures.Get(htexture);
|
|
Q_assert(!texture.uploading);
|
|
|
|
const D3D12_RESOURCE_DESC textureDesc = texture.texture->GetDesc();
|
|
D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout;
|
|
UINT64 uploadByteCount;
|
|
rhi.device->GetCopyableFootprints(&textureDesc, 0, 1, 0, &layout, NULL, NULL, &uploadByteCount);
|
|
WaitToStartUploading(uploadByteCount);
|
|
|
|
const UINT sourcePitch = (UINT)(texture.desc.width * GetBytesPerPixel(texture.desc.format));
|
|
mappedTexture.mappedData = mappedBuffer + bufferByteOffset;
|
|
mappedTexture.columnCount = texture.desc.width;
|
|
mappedTexture.rowCount = texture.desc.height;
|
|
mappedTexture.sliceCount = texture.desc.depth;
|
|
mappedTexture.srcRowByteCount = sourcePitch;
|
|
mappedTexture.dstRowByteCount = AlignUp<uint32_t>(layout.Footprint.RowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
|
|
|
|
texture.uploadByteOffset = bufferByteOffset;
|
|
texture.uploading = true;
|
|
bufferByteOffset = AlignUp<uint32_t>(bufferByteOffset + uploadByteCount, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);
|
|
currentTexture = htexture;
|
|
batchTextureCount++;
|
|
}
|
|
|
|
void UploadManager::EndTextureUpload()
|
|
{
|
|
Q_assert(!IsNullHandle(currentTexture));
|
|
|
|
const HTexture htexture = currentTexture;
|
|
Texture& texture = rhi.textures.Get(htexture);
|
|
Q_assert(texture.uploading);
|
|
|
|
const D3D12_RESOURCE_DESC textureDesc = texture.texture->GetDesc();
|
|
D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout;
|
|
rhi.device->GetCopyableFootprints(&textureDesc, 0, 1, 0, &layout, NULL, NULL, NULL);
|
|
|
|
Buffer& buffer = rhi.buffers.Get(uploadHBuffer);
|
|
D3D12_TEXTURE_COPY_LOCATION dstLoc = { 0 };
|
|
D3D12_TEXTURE_COPY_LOCATION srcLoc = { 0 };
|
|
dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
dstLoc.pResource = texture.texture;
|
|
dstLoc.SubresourceIndex = 0;
|
|
srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
|
srcLoc.pResource = buffer.buffer;
|
|
srcLoc.PlacedFootprint = layout;
|
|
srcLoc.PlacedFootprint.Offset = texture.uploadByteOffset;
|
|
D3D12_BOX srcBox = { 0 };
|
|
srcBox.left = 0;
|
|
srcBox.top = 0;
|
|
srcBox.front = 0;
|
|
srcBox.right = textureDesc.Width;
|
|
srcBox.bottom = textureDesc.Height;
|
|
srcBox.back = textureDesc.DepthOrArraySize;
|
|
|
|
D3D(commandList->Reset(commandAllocator, NULL));
|
|
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
const char* const marker = va("Upload: CopyTextureRegion -> %s", texture.desc.name);
|
|
const GFSDK_Aftermath_Result result = GFSDK_Aftermath_SetEventMarker(rhi.aftermathUploadCommandList, marker, strlen(marker) + 1);
|
|
Q_assert(result == GFSDK_Aftermath_Result_Success);
|
|
#endif
|
|
|
|
commandList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, &srcBox);
|
|
|
|
ID3D12CommandList* commandLists[] = { commandList };
|
|
D3D(commandList->Close());
|
|
commandQueue->ExecuteCommandLists(ARRAY_LEN(commandLists), commandLists);
|
|
fenceValue++;
|
|
commandQueue->Signal(fence.fence, fenceValue);
|
|
|
|
texture.uploading = false;
|
|
currentTexture = RHI_MAKE_NULL_HANDLE();
|
|
}
|
|
|
|
void UploadManager::WaitToStartDrawing(ID3D12CommandQueue* commandQueue_)
|
|
{
|
|
fence.WaitOnGPU(commandQueue_, fenceValue);
|
|
}
|
|
|
|
void UploadManager::WaitToStartUploading(uint32_t uploadByteCount)
|
|
{
|
|
if(uploadByteCount > bufferByteCount)
|
|
{
|
|
ri.Error(ERR_FATAL, "Upload request too large!\n");
|
|
}
|
|
|
|
if(bufferByteOffset + uploadByteCount > bufferByteCount)
|
|
{
|
|
EndOfBufferReached();
|
|
}
|
|
}
|
|
|
|
void UploadManager::EndOfBufferReached()
|
|
{
|
|
ri.Printf(PRINT_DEVELOPER, "Waiting for GPU upload: %s (%d T, %d B)\n",
|
|
Com_FormatBytes(bufferByteOffset),
|
|
batchTextureCount,
|
|
batchBufferCount);
|
|
fence.WaitOnCPU(fenceValue);
|
|
D3D(commandAllocator->Reset());
|
|
bufferByteOffset = 0;
|
|
batchTextureCount = 0;
|
|
batchBufferCount = 0;
|
|
}
|
|
|
|
void ReadbackManager::Create()
|
|
{
|
|
D3D(rhi.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&readbackCommandAllocator)));
|
|
SetDebugName(readbackCommandAllocator, "readback", D3DResourceType::CommandAllocator);
|
|
|
|
D3D(rhi.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, readbackCommandAllocator, NULL, IID_PPV_ARGS(&readbackCommandList)));
|
|
SetDebugName(readbackCommandList, "readback", D3DResourceType::CommandList);
|
|
D3D(readbackCommandList->Close());
|
|
|
|
const uint32_t byteCount = GetReadbackTextureByteCount();
|
|
BufferDesc desc("readback", byteCount, ResourceStates::CopyDestinationBit);
|
|
desc.memoryUsage = MemoryUsage::Readback;
|
|
readbackBuffer = CreateBuffer(desc);
|
|
bufferByteCount = byteCount;
|
|
|
|
readbackFence.Create(readbackFenceValue, "readback");
|
|
}
|
|
|
|
void ReadbackManager::Release()
|
|
{
|
|
readbackFence.Release();
|
|
COM_RELEASE(readbackCommandList);
|
|
COM_RELEASE(readbackCommandAllocator);
|
|
}
|
|
|
|
void ReadbackManager::ResizeIfNeeded()
|
|
{
|
|
const uint32_t byteCount = GetReadbackTextureByteCount();
|
|
if(byteCount <= bufferByteCount)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// @NOTE: this is called after the device has become idle
|
|
DestroyBuffer(readbackBuffer);
|
|
|
|
BufferDesc desc("readback", byteCount, ResourceStates::CopyDestinationBit);
|
|
desc.memoryUsage = MemoryUsage::Readback;
|
|
readbackBuffer = CreateBuffer(desc);
|
|
bufferByteCount = byteCount;
|
|
}
|
|
|
|
void ReadbackManager::BeginTextureReadback(MappedTexture& mappedTexture, HTexture htexture)
|
|
{
|
|
D3D(readbackCommandAllocator->Reset());
|
|
D3D(readbackCommandList->Reset(readbackCommandAllocator, NULL));
|
|
|
|
Texture& texture = rhi.textures.Get(htexture);
|
|
Q_assert(texture.desc.format == TextureFormat::R8G8B8A8_UNorm);
|
|
const D3D12_RESOURCE_DESC textureDesc = texture.texture->GetDesc();
|
|
D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout;
|
|
rhi.device->GetCopyableFootprints(&textureDesc, 0, 1, 0, &layout, NULL, NULL, NULL);
|
|
Q_assert(layout.Footprint.Format == DXGI_FORMAT_R8G8B8A8_UNORM);
|
|
Q_assert(layout.Footprint.Width == texture.desc.width);
|
|
Q_assert(layout.Footprint.Height == texture.desc.height);
|
|
|
|
Buffer& buffer = rhi.buffers.Get(readbackBuffer);
|
|
D3D12_TEXTURE_COPY_LOCATION dstLoc = { 0 };
|
|
D3D12_TEXTURE_COPY_LOCATION srcLoc = { 0 };
|
|
dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
|
dstLoc.pResource = buffer.buffer;
|
|
dstLoc.PlacedFootprint = layout;
|
|
dstLoc.PlacedFootprint.Offset = 0;
|
|
srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
srcLoc.pResource = texture.texture;
|
|
srcLoc.SubresourceIndex = 0;
|
|
D3D12_BOX srcBox = { 0 };
|
|
srcBox.left = 0;
|
|
srcBox.top = 0;
|
|
srcBox.front = 0;
|
|
srcBox.right = textureDesc.Width;
|
|
srcBox.bottom = textureDesc.Height;
|
|
srcBox.back = 1;
|
|
|
|
const D3D12_RESOURCE_STATES prevState = texture.currentState;
|
|
|
|
// @TODO: use CmdBarrier
|
|
D3D12_RESOURCE_BARRIER barrier = {};
|
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
barrier.Transition.pResource = texture.texture;
|
|
barrier.Transition.StateBefore = prevState;
|
|
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
|
if(texture.currentState != D3D12_RESOURCE_STATE_COPY_SOURCE)
|
|
{
|
|
readbackCommandList->ResourceBarrier(1, &barrier);
|
|
texture.currentState = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
|
}
|
|
readbackCommandList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, &srcBox);
|
|
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
|
barrier.Transition.StateAfter = prevState;
|
|
if(texture.currentState != prevState)
|
|
{
|
|
readbackCommandList->ResourceBarrier(1, &barrier);
|
|
texture.currentState = prevState;
|
|
}
|
|
|
|
D3D(readbackCommandList->Close());
|
|
ID3D12CommandList* commandListArray[] = { readbackCommandList };
|
|
rhi.mainCommandQueue->ExecuteCommandLists(ARRAY_LEN(commandListArray), commandListArray);
|
|
|
|
readbackFenceValue++;
|
|
readbackFence.Signal(rhi.mainCommandQueue, readbackFenceValue);
|
|
readbackFence.WaitOnCPU(readbackFenceValue);
|
|
|
|
mappedTexture.mappedData = MapBuffer(readbackBuffer);
|
|
mappedTexture.rowCount = layout.Footprint.Height;
|
|
mappedTexture.columnCount = layout.Footprint.Width;
|
|
mappedTexture.srcRowByteCount = layout.Footprint.RowPitch;
|
|
mappedTexture.dstRowByteCount = 0;
|
|
}
|
|
|
|
void ReadbackManager::EndTextureReadback()
|
|
{
|
|
UnmapBuffer(readbackBuffer);
|
|
}
|
|
|
|
void DescriptorHeap::Create(D3D12_DESCRIPTOR_HEAP_TYPE heapType, uint32_t size, uint16_t* freeListItems, const char* name)
|
|
{
|
|
heap = CreateDescriptorHeap(heapType, size, false, name);
|
|
freeList.Init(freeListItems, size);
|
|
startAddress = heap->GetCPUDescriptorHandleForHeapStart();
|
|
descriptorSize = rhi.device->GetDescriptorHandleIncrementSize(heapType);
|
|
type = heapType;
|
|
}
|
|
|
|
void DescriptorHeap::Release()
|
|
{
|
|
COM_RELEASE(heap);
|
|
}
|
|
|
|
uint32_t DescriptorHeap::Allocate()
|
|
{
|
|
return freeList.Allocate();
|
|
}
|
|
|
|
void DescriptorHeap::Free(uint32_t index)
|
|
{
|
|
freeList.Free(index);
|
|
}
|
|
|
|
D3D12_CPU_DESCRIPTOR_HANDLE DescriptorHeap::GetCPUHandle(uint32_t index)
|
|
{
|
|
D3D12_CPU_DESCRIPTOR_HANDLE handle = startAddress;
|
|
handle.ptr += index * descriptorSize;
|
|
|
|
return handle;
|
|
}
|
|
|
|
uint32_t DescriptorHeap::CreateSRV(ID3D12Resource* resource, D3D12_SHADER_RESOURCE_VIEW_DESC& desc)
|
|
{
|
|
Q_assert(resource);
|
|
Q_assert(type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
|
|
const uint32_t index = freeList.Allocate();
|
|
rhi.device->CreateShaderResourceView(resource, &desc, GetCPUHandle(index));
|
|
|
|
return index;
|
|
}
|
|
|
|
uint32_t DescriptorHeap::CreateUAV(ID3D12Resource* resource, D3D12_UNORDERED_ACCESS_VIEW_DESC& desc)
|
|
{
|
|
Q_assert(resource);
|
|
Q_assert(type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
|
|
const uint32_t index = freeList.Allocate();
|
|
rhi.device->CreateUnorderedAccessView(resource, NULL, &desc, GetCPUHandle(index));
|
|
|
|
return index;
|
|
}
|
|
|
|
uint32_t DescriptorHeap::CreateRTV(ID3D12Resource* resource, D3D12_RENDER_TARGET_VIEW_DESC& desc)
|
|
{
|
|
Q_assert(resource);
|
|
Q_assert(type == D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
|
|
|
|
const uint32_t index = freeList.Allocate();
|
|
rhi.device->CreateRenderTargetView(resource, &desc, GetCPUHandle(index));
|
|
|
|
return index;
|
|
}
|
|
|
|
uint32_t DescriptorHeap::CreateDSV(ID3D12Resource* resource, D3D12_DEPTH_STENCIL_VIEW_DESC& desc)
|
|
{
|
|
Q_assert(resource);
|
|
Q_assert(type == D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
|
|
|
|
const uint32_t index = freeList.Allocate();
|
|
rhi.device->CreateDepthStencilView(resource, &desc, GetCPUHandle(index));
|
|
|
|
return index;
|
|
}
|
|
|
|
uint32_t DescriptorHeap::CreateCBV(D3D12_CONSTANT_BUFFER_VIEW_DESC& desc)
|
|
{
|
|
Q_assert(type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
|
|
const uint32_t index = freeList.Allocate();
|
|
rhi.device->CreateConstantBufferView(&desc, GetCPUHandle(index));
|
|
|
|
return index;
|
|
}
|
|
|
|
uint32_t DescriptorHeap::CreateSampler(D3D12_SAMPLER_DESC& desc)
|
|
{
|
|
Q_assert(type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
|
|
|
|
const uint32_t index = freeList.Allocate();
|
|
rhi.device->CreateSampler(&desc, GetCPUHandle(index));
|
|
|
|
return index;
|
|
}
|
|
|
|
void DynamicResources::DescriptorRange::Init(D3D12_DESCRIPTOR_RANGE_TYPE type_, uint32_t start_, uint32_t count_)
|
|
{
|
|
Q_assert(count_ > 0);
|
|
|
|
type = type_;
|
|
start = start_;
|
|
count = count_;
|
|
index = start_ + 1;
|
|
reservedSlotUsed = false;
|
|
}
|
|
|
|
uint32_t DynamicResources::DescriptorRange::Allocate(bool slotAtIndex0)
|
|
{
|
|
if(slotAtIndex0)
|
|
{
|
|
ASSERT_OR_DIE(!reservedSlotUsed, "Can only use 1 reserved slot");
|
|
reservedSlotUsed = true;
|
|
|
|
return start;
|
|
}
|
|
|
|
ASSERT_OR_DIE(index + 1 < start + count, "Not enough descriptors");
|
|
|
|
return index++;
|
|
}
|
|
|
|
static const char* GetDeviceRemovedReasonString(HRESULT reason)
|
|
{
|
|
switch(reason)
|
|
{
|
|
case DXGI_ERROR_DEVICE_HUNG: return "device hung";
|
|
case DXGI_ERROR_DEVICE_REMOVED: return "device removed";
|
|
case DXGI_ERROR_DEVICE_RESET: return "device reset";
|
|
case DXGI_ERROR_DRIVER_INTERNAL_ERROR: return "internal driver error";
|
|
case DXGI_ERROR_INVALID_CALL: return "invalid call";
|
|
case S_OK: return "no error";
|
|
default: return va("unknown error code 0x%08X", (unsigned int)reason);
|
|
}
|
|
}
|
|
|
|
static DXGI_GPU_PREFERENCE GetGPUPreference(int preference)
|
|
{
|
|
switch(preference)
|
|
{
|
|
case GPUPREF_HIGHPERF: return DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE;
|
|
case GPUPREF_LOWPOWER: return DXGI_GPU_PREFERENCE_MINIMUM_POWER;
|
|
default: return DXGI_GPU_PREFERENCE_UNSPECIFIED;
|
|
}
|
|
}
|
|
|
|
static bool IsSuitableAdapter(IDXGIAdapter1* adapter)
|
|
{
|
|
HRESULT hr = S_OK;
|
|
|
|
DXGI_ADAPTER_DESC1 desc;
|
|
hr = adapter->GetDesc1(&desc);
|
|
if(FAILED(hr))
|
|
{
|
|
ri.Printf(PRINT_WARNING, "D3D12: IDXGIAdapter1::GetDesc1 failed with code 0x%08X (%s)\n",
|
|
(unsigned int)hr, GetSystemErrorString(hr));
|
|
return false;
|
|
}
|
|
|
|
if(desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE)
|
|
{
|
|
//ri.Printf(PRINT_WARNING, "D3D12: '%s' is not real hardware\n",
|
|
//GetUTF8Name(desc.Description, "unknown adapter"));
|
|
return false;
|
|
}
|
|
|
|
hr = D3D12CreateDevice(adapter, FeatureLevel, __uuidof(ID3D12Device), NULL);
|
|
if(FAILED(hr))
|
|
{
|
|
ri.Printf(PRINT_WARNING, "D3D12: can't create device for '%s' with code 0x%08X (%s)\n",
|
|
GetUTF8String(desc.Description, "unknown adapter"), (unsigned int)hr, GetSystemErrorString(hr));
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static void CreateAdapterList()
|
|
{
|
|
IDXGIAdapter1* adapter = NULL;
|
|
UINT enumIndex = 0;
|
|
rhi.gpuCount = 0;
|
|
while(rhi.gpuCount < ARRAY_LEN(rhi.gpus) &&
|
|
SUCCEEDED(rhi.factory->EnumAdapters1(enumIndex++, &adapter)))
|
|
{
|
|
DXGI_ADAPTER_DESC1 desc;
|
|
if(IsSuitableAdapter(adapter) && SUCCEEDED(adapter->GetDesc1(&desc)))
|
|
{
|
|
GPU& gpu = rhi.gpus[rhi.gpuCount++];
|
|
gpu.uniqueId = desc.AdapterLuid;
|
|
Q_strncpyz(gpu.name, GetUTF8String(desc.Description, "???"), sizeof(gpu.name));
|
|
}
|
|
COM_RELEASE(adapter);
|
|
}
|
|
}
|
|
|
|
static IDXGIAdapter1* GetAdapterAtIndex(int gpuIndex)
|
|
{
|
|
if(gpuIndex < 0 || gpuIndex >= ARRAY_LEN(rhi.gpus))
|
|
{
|
|
ri.Printf(PRINT_WARNING, "GPU index %d is invalid", gpuIndex + 1);
|
|
return NULL;
|
|
}
|
|
const LUID uniqueId = rhi.gpus[gpuIndex].uniqueId;
|
|
|
|
IDXGIAdapter1* adapter = NULL;
|
|
UINT enumIndex = 0;
|
|
while(SUCCEEDED(rhi.factory->EnumAdapters1(enumIndex++, &adapter)))
|
|
{
|
|
DXGI_ADAPTER_DESC1 desc;
|
|
if(SUCCEEDED(adapter->GetDesc1(&desc)) &&
|
|
desc.AdapterLuid.LowPart == uniqueId.LowPart &&
|
|
desc.AdapterLuid.HighPart == uniqueId.HighPart)
|
|
{
|
|
return adapter;
|
|
}
|
|
COM_RELEASE(adapter);
|
|
}
|
|
|
|
ri.Printf(PRINT_WARNING, "GPU at index %d (%s) is no longer available", gpuIndex + 1, rhi.gpus[gpuIndex].name);
|
|
return NULL;
|
|
}
|
|
|
|
static IDXGIAdapter1* FindMostSuitableAdapter(IDXGIFactory1* factory, int enginePreference)
|
|
{
|
|
IDXGIAdapter1* adapter = NULL;
|
|
IDXGIFactory6* factory6 = NULL;
|
|
if(SUCCEEDED(factory->QueryInterface(IID_PPV_ARGS(&factory6))))
|
|
{
|
|
const DXGI_GPU_PREFERENCE dxgiPreference = GetGPUPreference(enginePreference);
|
|
|
|
UINT i = 0;
|
|
while(SUCCEEDED(factory6->EnumAdapterByGpuPreference(i++, dxgiPreference, IID_PPV_ARGS(&adapter))))
|
|
{
|
|
if(IsSuitableAdapter(adapter))
|
|
{
|
|
COM_RELEASE(factory6);
|
|
return adapter;
|
|
}
|
|
COM_RELEASE(adapter);
|
|
}
|
|
}
|
|
COM_RELEASE(factory6);
|
|
|
|
UINT i = 0;
|
|
while(SUCCEEDED(rhi.factory->EnumAdapters1(i++, &adapter)))
|
|
{
|
|
if(IsSuitableAdapter(adapter))
|
|
{
|
|
return adapter;
|
|
}
|
|
COM_RELEASE(adapter);
|
|
}
|
|
|
|
ri.Error(ERR_FATAL, "No suitable DXGI adapter was found!\n");
|
|
return NULL;
|
|
}
|
|
|
|
static void Present()
|
|
{
|
|
UINT flags;
|
|
UINT swapInterval;
|
|
if(r_vsync->integer)
|
|
{
|
|
swapInterval = 1;
|
|
flags = 0;
|
|
}
|
|
else
|
|
{
|
|
swapInterval = 0;
|
|
flags = rhi.isTearingSupported ? DXGI_PRESENT_ALLOW_TEARING : 0;
|
|
}
|
|
const HRESULT hr = rhi.swapChain->Present(swapInterval, flags);
|
|
rhi.frameLatencyWaitNeeded = true;
|
|
|
|
#if defined(RHI_ENABLE_NVAPI) && defined(RHI_ENABLE_NVAPI_RT_VALIDATION)
|
|
if(rhi.nvapiActive &&
|
|
(hr == DXGI_ERROR_DEVICE_REMOVED ||
|
|
hr == D3DDDIERR_DEVICEREMOVED ||
|
|
hr == DXGI_ERROR_DEVICE_RESET))
|
|
{
|
|
NvAPI_D3D12_FlushRaytracingValidationMessages(rhi.device);
|
|
}
|
|
#endif
|
|
|
|
enum PresentError
|
|
{
|
|
PE_NONE,
|
|
PE_DEVICE_REMOVED,
|
|
PE_DEVICE_RESET
|
|
};
|
|
PresentError presentError = PE_NONE;
|
|
HRESULT deviceRemovedReason = S_OK;
|
|
if(hr == DXGI_ERROR_DEVICE_REMOVED || hr == D3DDDIERR_DEVICEREMOVED)
|
|
{
|
|
deviceRemovedReason = rhi.device->GetDeviceRemovedReason();
|
|
if(deviceRemovedReason == DXGI_ERROR_DEVICE_RESET)
|
|
{
|
|
presentError = PE_DEVICE_RESET;
|
|
}
|
|
else
|
|
{
|
|
presentError = PE_DEVICE_REMOVED;
|
|
}
|
|
}
|
|
else if(hr == DXGI_ERROR_DEVICE_RESET)
|
|
{
|
|
presentError = PE_DEVICE_RESET;
|
|
}
|
|
#if defined(D3D_DEBUG)
|
|
else if(hr != S_OK)
|
|
{
|
|
Sys_DebugPrintf("Present error: 0x%08X (%s)\n", (unsigned int)hr, GetSystemErrorString(hr));
|
|
}
|
|
#endif
|
|
|
|
if(presentError == PE_DEVICE_REMOVED)
|
|
{
|
|
ri.Error(ERR_FATAL, "Direct3D device was removed! Reason: %s\n", GetDeviceRemovedReasonString(deviceRemovedReason));
|
|
}
|
|
else if(presentError == PE_DEVICE_RESET)
|
|
{
|
|
ri.Printf(PRINT_ERROR, "Direct3D device was reset! Restarting the video system...\n");
|
|
Cbuf_AddText("vid_restart\n");
|
|
}
|
|
}
|
|
|
|
#if defined(_DEBUG)
|
|
static bool CanWriteCommands()
|
|
{
|
|
// @TODO: check that the command list is open
|
|
return rhi.commandList != NULL;
|
|
}
|
|
#endif
|
|
|
|
template<typename T, typename HT, Handle RT, int N>
|
|
static void DestroyPool(StaticPool<T, HT, RT, N>& pool, void (*DestroyResource)(HT), bool fullShutDown)
|
|
{
|
|
T* resource;
|
|
HT handle;
|
|
for(int i = 0; pool.FindNext(&resource, &handle, &i);)
|
|
{
|
|
if(fullShutDown || resource->shortLifeTime)
|
|
{
|
|
(*DestroyResource)(handle);
|
|
}
|
|
}
|
|
|
|
if(fullShutDown)
|
|
{
|
|
pool.Clear();
|
|
}
|
|
}
|
|
|
|
static const char* AllocateName(const char* name, bool shortLifeTime)
|
|
{
|
|
LinearAllocator& allocator = shortLifeTime ? rhi.tempStringAllocator : rhi.persStringAllocator;
|
|
|
|
return allocator.Allocate(name);
|
|
}
|
|
|
|
template<typename T>
|
|
static void AllocateAndFixName(const T& desc)
|
|
{
|
|
((BufferDesc&)desc).name = AllocateName(desc.name, desc.shortLifeTime);
|
|
}
|
|
|
|
static DXGI_FORMAT GetD3DIndexFormat(IndexType::Id type)
|
|
{
|
|
return type == IndexType::UInt16 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT;
|
|
}
|
|
|
|
static D3D12_SHADER_VISIBILITY GetD3DVisibility(ShaderStage::Id shaderType)
|
|
{
|
|
switch(shaderType)
|
|
{
|
|
case ShaderStage::Vertex: return D3D12_SHADER_VISIBILITY_VERTEX;
|
|
case ShaderStage::Pixel: return D3D12_SHADER_VISIBILITY_PIXEL;
|
|
case ShaderStage::Compute: return D3D12_SHADER_VISIBILITY_ALL;
|
|
default: Q_assert(!"Unsupported shader type"); return D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
}
|
|
|
|
static D3D12_SHADER_VISIBILITY GetD3DVisibility(ShaderStages::Flags flags)
|
|
{
|
|
if(__popcnt(flags & ShaderStages::AllGraphicsBits) > 1)
|
|
{
|
|
return D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
|
|
if(flags & ShaderStages::VertexBit)
|
|
{
|
|
return D3D12_SHADER_VISIBILITY_VERTEX;
|
|
}
|
|
|
|
if(flags & ShaderStages::PixelBit)
|
|
{
|
|
return D3D12_SHADER_VISIBILITY_PIXEL;
|
|
}
|
|
|
|
return D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
|
|
static D3D12_DESCRIPTOR_RANGE_TYPE GetD3DDescriptorRangeType(DescriptorType::Id descType)
|
|
{
|
|
switch(descType)
|
|
{
|
|
case DescriptorType::Texture: return D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
case DescriptorType::Buffer: return D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
|
|
case DescriptorType::RWTexture: return D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
|
case DescriptorType::RWBuffer: return D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
|
case DescriptorType::Sampler: return D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
|
|
default: Q_assert(!"Unsupported descriptor type"); return D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
}
|
|
}
|
|
|
|
static const char* GetD3DSemanticName(ShaderSemantic::Id semantic)
|
|
{
|
|
switch(semantic)
|
|
{
|
|
case ShaderSemantic::Position: return "POSITION";
|
|
case ShaderSemantic::Normal: return "NORMAL";
|
|
case ShaderSemantic::TexCoord: return "TEXCOORD";
|
|
case ShaderSemantic::Color: return "COLOR";
|
|
default: Q_assert(!"Unsupported shader semantic"); return "";
|
|
}
|
|
}
|
|
|
|
static DXGI_FORMAT GetD3DFormat(DataType::Id dataType, uint32_t vectorLength)
|
|
{
|
|
if(vectorLength < 1 || vectorLength > 4)
|
|
{
|
|
Q_assert(!"Invalid vector length");
|
|
return DXGI_FORMAT_UNKNOWN;
|
|
}
|
|
|
|
switch(dataType)
|
|
{
|
|
case DataType::Float32:
|
|
switch(vectorLength)
|
|
{
|
|
case 1: return DXGI_FORMAT_R32_FLOAT;
|
|
case 2: return DXGI_FORMAT_R32G32_FLOAT;
|
|
case 3: return DXGI_FORMAT_R32G32B32_FLOAT;
|
|
case 4: return DXGI_FORMAT_R32G32B32A32_FLOAT;
|
|
}
|
|
case DataType::UInt32:
|
|
switch(vectorLength)
|
|
{
|
|
case 1: return DXGI_FORMAT_R32_UINT;
|
|
case 2: return DXGI_FORMAT_R32G32_UINT;
|
|
case 3: return DXGI_FORMAT_R32G32B32_UINT;
|
|
case 4: return DXGI_FORMAT_R32G32B32A32_UINT;
|
|
}
|
|
case DataType::UNorm8:
|
|
switch(vectorLength)
|
|
{
|
|
case 1: return DXGI_FORMAT_R8_UNORM;
|
|
case 2: return DXGI_FORMAT_R8G8_UNORM;
|
|
case 3: Q_assert(!"Unsupported format"); return DXGI_FORMAT_UNKNOWN;
|
|
case 4: return DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
}
|
|
default: Q_assert(!"Unsupported data type"); return DXGI_FORMAT_UNKNOWN;
|
|
}
|
|
}
|
|
|
|
static D3D12_COMPARISON_FUNC GetD3DComparisonFunction(ComparisonFunction::Id function)
|
|
{
|
|
switch(function)
|
|
{
|
|
case ComparisonFunction::Always: return D3D12_COMPARISON_FUNC_ALWAYS;
|
|
case ComparisonFunction::Equal: return D3D12_COMPARISON_FUNC_EQUAL;
|
|
case ComparisonFunction::Greater: return D3D12_COMPARISON_FUNC_GREATER;
|
|
case ComparisonFunction::GreaterEqual: return D3D12_COMPARISON_FUNC_GREATER_EQUAL;
|
|
case ComparisonFunction::Less: return D3D12_COMPARISON_FUNC_LESS;
|
|
case ComparisonFunction::LessEqual: return D3D12_COMPARISON_FUNC_LESS_EQUAL;
|
|
case ComparisonFunction::Never: return D3D12_COMPARISON_FUNC_NEVER;
|
|
case ComparisonFunction::NotEqual: return D3D12_COMPARISON_FUNC_NOT_EQUAL;
|
|
default: Q_assert(!"Unsupported comparison function"); return D3D12_COMPARISON_FUNC_ALWAYS;
|
|
}
|
|
}
|
|
|
|
static DXGI_FORMAT GetD3DFormat(TextureFormat::Id format)
|
|
{
|
|
switch(format)
|
|
{
|
|
case TextureFormat::R8G8B8A8_UNorm: return DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
case TextureFormat::R16G16B16A16_UNorm: return DXGI_FORMAT_R16G16B16A16_UNORM;
|
|
case TextureFormat::R32G32_Float: return DXGI_FORMAT_R32G32_FLOAT;
|
|
case TextureFormat::R16G16B16A16_Float: return DXGI_FORMAT_R16G16B16A16_FLOAT;
|
|
case TextureFormat::R32G32B32A32_Float: return DXGI_FORMAT_R32G32B32A32_FLOAT;
|
|
case TextureFormat::Depth32_Float: return DXGI_FORMAT_D32_FLOAT;
|
|
case TextureFormat::Depth24_Stencil8: return DXGI_FORMAT_D24_UNORM_S8_UINT;
|
|
case TextureFormat::R8G8_UNorm: return DXGI_FORMAT_R8G8_UNORM;
|
|
case TextureFormat::R8_UNorm: return DXGI_FORMAT_R8_UNORM;
|
|
case TextureFormat::R10G10B10A2_UNorm: return DXGI_FORMAT_R10G10B10A2_UNORM;
|
|
case TextureFormat::R16_UInt: return DXGI_FORMAT_R16_UINT;
|
|
case TextureFormat::R32_UInt: return DXGI_FORMAT_R32_UINT;
|
|
case TextureFormat::R32G32_UInt: return DXGI_FORMAT_R32G32_UINT;
|
|
case TextureFormat::R16G16_SNorm: return DXGI_FORMAT_R16G16_SNORM;
|
|
case TextureFormat::R16G16_Float: return DXGI_FORMAT_R16G16_FLOAT;
|
|
case TextureFormat::R16_Float: return DXGI_FORMAT_R16_FLOAT;
|
|
default: Q_assert(!"Unsupported texture format"); return DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
}
|
|
}
|
|
|
|
static D3D12_CULL_MODE GetD3DCullMode(cullType_t cullMode)
|
|
{
|
|
switch(cullMode)
|
|
{
|
|
case CT_TWO_SIDED: return D3D12_CULL_MODE_NONE;
|
|
case CT_BACK_SIDED: return D3D12_CULL_MODE_BACK;
|
|
case CT_FRONT_SIDED: return D3D12_CULL_MODE_FRONT;
|
|
default: Q_assert(!"Unsupported cull mode"); return D3D12_CULL_MODE_NONE;
|
|
}
|
|
}
|
|
|
|
static D3D12_TEXTURE_ADDRESS_MODE GetD3DTextureAddressMode(textureWrap_t wrap)
|
|
{
|
|
switch(wrap)
|
|
{
|
|
case TW_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_WRAP;
|
|
case TW_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
|
default: Q_assert(!"Unsupported texture wrap mode"); return D3D12_TEXTURE_ADDRESS_MODE_WRAP;
|
|
}
|
|
}
|
|
|
|
static D3D12_FILTER GetD3DFilter(TextureFilter::Id filter)
|
|
{
|
|
switch(filter)
|
|
{
|
|
case TextureFilter::Point: return D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR;
|
|
case TextureFilter::Linear: return D3D12_FILTER_MIN_MAG_MIP_LINEAR;
|
|
case TextureFilter::Anisotropic: return D3D12_FILTER_ANISOTROPIC;
|
|
default: Q_assert(!"Unsupported texture filter mode"); return D3D12_FILTER_MIN_MAG_MIP_LINEAR;
|
|
}
|
|
}
|
|
|
|
static D3D12_STENCIL_OP GetD3DStencilOp(StencilOp::Id stencilOp)
|
|
{
|
|
switch(stencilOp)
|
|
{
|
|
case StencilOp::Keep: return D3D12_STENCIL_OP_KEEP;
|
|
case StencilOp::Zero: return D3D12_STENCIL_OP_ZERO;
|
|
case StencilOp::Replace: return D3D12_STENCIL_OP_REPLACE;
|
|
case StencilOp::SaturatedIncrement: return D3D12_STENCIL_OP_INCR_SAT;
|
|
case StencilOp::SaturatedDecrement: return D3D12_STENCIL_OP_DECR_SAT;
|
|
case StencilOp::Invert: return D3D12_STENCIL_OP_INVERT;
|
|
case StencilOp::WrappedIncrement: return D3D12_STENCIL_OP_INCR;
|
|
case StencilOp::WrappedDecrement: return D3D12_STENCIL_OP_DECR;
|
|
default: Q_assert(!"Unsupported stencop operation"); return D3D12_STENCIL_OP_REPLACE;
|
|
}
|
|
}
|
|
|
|
static D3D12_RESOURCE_STATES GetD3DResourceStates(ResourceStates::Flags flags)
|
|
{
|
|
#define ADD_BITS(RHIBit, D3DBits) \
|
|
if(flags & ResourceStates::RHIBit) \
|
|
{ \
|
|
states |= D3DBits; \
|
|
}
|
|
|
|
D3D12_RESOURCE_STATES states = D3D12_RESOURCE_STATE_COMMON;
|
|
ADD_BITS(VertexBufferBit, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER);
|
|
ADD_BITS(IndexBufferBit, D3D12_RESOURCE_STATE_INDEX_BUFFER);
|
|
ADD_BITS(ConstantBufferBit, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER);
|
|
ADD_BITS(RenderTargetBit, D3D12_RESOURCE_STATE_RENDER_TARGET);
|
|
ADD_BITS(VertexShaderAccessBit, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
|
|
ADD_BITS(PixelShaderAccessBit, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
|
ADD_BITS(ComputeShaderAccessBit, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
|
|
ADD_BITS(CopySourceBit, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
|
ADD_BITS(CopyDestinationBit, D3D12_RESOURCE_STATE_COPY_DEST);
|
|
ADD_BITS(DepthReadBit, D3D12_RESOURCE_STATE_DEPTH_READ);
|
|
ADD_BITS(DepthWriteBit, D3D12_RESOURCE_STATE_DEPTH_WRITE);
|
|
ADD_BITS(UnorderedAccessBit, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
|
ADD_BITS(PresentBit, D3D12_RESOURCE_STATE_PRESENT);
|
|
ADD_BITS(RaytracingASBit, D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE);
|
|
ADD_BITS(IndirectDispatchBit, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
|
|
|
|
return states;
|
|
|
|
#undef ADD_BITS
|
|
}
|
|
|
|
static D3D12_BLEND GetD3DSourceBlend(uint32_t stateBits)
|
|
{
|
|
switch(stateBits & GLS_SRCBLEND_BITS)
|
|
{
|
|
case 0: return D3D12_BLEND_ONE;
|
|
case GLS_SRCBLEND_ZERO: return D3D12_BLEND_ZERO;
|
|
case GLS_SRCBLEND_ONE: return D3D12_BLEND_ONE;
|
|
case GLS_SRCBLEND_DST_COLOR: return D3D12_BLEND_DEST_COLOR;
|
|
case GLS_SRCBLEND_ONE_MINUS_DST_COLOR: return D3D12_BLEND_INV_DEST_COLOR;
|
|
case GLS_SRCBLEND_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
|
|
case GLS_SRCBLEND_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
|
|
case GLS_SRCBLEND_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
|
|
case GLS_SRCBLEND_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
|
|
case GLS_SRCBLEND_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT;
|
|
default: Q_assert(!"Unsupported source blend mode"); return D3D12_BLEND_ONE;
|
|
}
|
|
}
|
|
|
|
static D3D12_BLEND GetD3DDestBlend(uint32_t stateBits)
|
|
{
|
|
switch(stateBits & GLS_DSTBLEND_BITS)
|
|
{
|
|
case 0: return D3D12_BLEND_ZERO;
|
|
case GLS_DSTBLEND_ZERO: return D3D12_BLEND_ZERO;
|
|
case GLS_DSTBLEND_ONE: return D3D12_BLEND_ONE;
|
|
case GLS_DSTBLEND_SRC_COLOR: return D3D12_BLEND_SRC_COLOR;
|
|
case GLS_DSTBLEND_ONE_MINUS_SRC_COLOR: return D3D12_BLEND_INV_SRC_COLOR;
|
|
case GLS_DSTBLEND_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
|
|
case GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
|
|
case GLS_DSTBLEND_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
|
|
case GLS_DSTBLEND_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
|
|
default: Q_assert(!"Unsupported dest blend mode"); return D3D12_BLEND_ONE;
|
|
}
|
|
}
|
|
|
|
D3D12_SHADING_RATE GetD3DShadingRate(ShadingRate::Id shadingRate)
|
|
{
|
|
switch(shadingRate)
|
|
{
|
|
case ShadingRate::SR_1x1: return D3D12_SHADING_RATE_1X1;
|
|
case ShadingRate::SR_1x2: return D3D12_SHADING_RATE_1X2;
|
|
case ShadingRate::SR_2x1: return D3D12_SHADING_RATE_2X1;
|
|
case ShadingRate::SR_2x2: return D3D12_SHADING_RATE_2X2;
|
|
case ShadingRate::SR_2x4: return D3D12_SHADING_RATE_2X4;
|
|
case ShadingRate::SR_4x2: return D3D12_SHADING_RATE_4X2;
|
|
case ShadingRate::SR_4x4: return D3D12_SHADING_RATE_4X4;
|
|
default: Q_assert(!"Unsupported shading rate"); return D3D12_SHADING_RATE_1X1;
|
|
}
|
|
}
|
|
|
|
static D3D12_BLEND GetAlphaBlendFromColorBlend(D3D12_BLEND colorBlend)
|
|
{
|
|
switch(colorBlend)
|
|
{
|
|
case D3D12_BLEND_SRC_COLOR: return D3D12_BLEND_SRC_ALPHA;
|
|
case D3D12_BLEND_INV_SRC_COLOR: return D3D12_BLEND_INV_SRC_ALPHA;
|
|
case D3D12_BLEND_DEST_COLOR: return D3D12_BLEND_DEST_ALPHA;
|
|
case D3D12_BLEND_INV_DEST_COLOR: return D3D12_BLEND_INV_DEST_ALPHA;
|
|
default: return colorBlend;
|
|
}
|
|
}
|
|
|
|
static bool IsD3DDepthFormat(DXGI_FORMAT format)
|
|
{
|
|
switch(format)
|
|
{
|
|
case DXGI_FORMAT_D16_UNORM:
|
|
case DXGI_FORMAT_D24_UNORM_S8_UINT:
|
|
case DXGI_FORMAT_D32_FLOAT:
|
|
case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static const char* GetNameForD3DResourceStates(D3D12_RESOURCE_STATES states)
|
|
{
|
|
switch(states)
|
|
{
|
|
case D3D12_RESOURCE_STATE_COMMON: return "common/present";
|
|
case D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER: return "vertex/constant buffer";
|
|
case D3D12_RESOURCE_STATE_INDEX_BUFFER: return "index buffer";
|
|
case D3D12_RESOURCE_STATE_RENDER_TARGET: return "render target";
|
|
case D3D12_RESOURCE_STATE_UNORDERED_ACCESS: return "UAV";
|
|
case D3D12_RESOURCE_STATE_DEPTH_WRITE: return "depth write";
|
|
case D3D12_RESOURCE_STATE_DEPTH_READ: return "depth read";
|
|
case D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE: return "non-pixel shader resource";
|
|
case D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE: return "pixel shader resource";
|
|
case D3D12_RESOURCE_STATE_COPY_DEST: return "copy destination";
|
|
case D3D12_RESOURCE_STATE_COPY_SOURCE: return "copy source";
|
|
case D3D12_RESOURCE_STATE_GENERIC_READ: return "generic read";
|
|
case D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE: return "generic shader resource";
|
|
case D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE: return "raytracing acceleration structure";
|
|
default: return "???";
|
|
}
|
|
}
|
|
|
|
static const char* GetNameForD3DFormat(DXGI_FORMAT format)
|
|
{
|
|
switch(format)
|
|
{
|
|
#define FORMAT(Enum) case DXGI_FORMAT_##Enum: return #Enum;
|
|
DXGI_FORMAT_LIST(FORMAT)
|
|
default: return "???";
|
|
#undef FORMAT
|
|
}
|
|
}
|
|
|
|
static const char* GetHeapTypeName(D3D12_HEAP_TYPE type)
|
|
{
|
|
switch(type)
|
|
{
|
|
case D3D12_HEAP_TYPE_DEFAULT: return "GPU";
|
|
case D3D12_HEAP_TYPE_UPLOAD: return "upload";
|
|
case D3D12_HEAP_TYPE_READBACK: return "readback";
|
|
case D3D12_HEAP_TYPE_CUSTOM: return "UMA";
|
|
default: Q_assert(!"Unsupported heap type"); return "unknown";
|
|
}
|
|
}
|
|
|
|
static const char* GetResourceHeapName(ID3D12Resource* resource)
|
|
{
|
|
D3D12_HEAP_PROPERTIES props;
|
|
D3D12_HEAP_FLAGS flags;
|
|
if(SUCCEEDED(resource->GetHeapProperties(&props, &flags)))
|
|
{
|
|
return GetHeapTypeName(props.Type);
|
|
}
|
|
|
|
return "unknown";
|
|
}
|
|
|
|
static void ValidateResourceStateForBarrier(D3D12_RESOURCE_STATES state)
|
|
{
|
|
if(state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS ||
|
|
state == D3D12_RESOURCE_STATE_DEPTH_WRITE)
|
|
{
|
|
return;
|
|
}
|
|
|
|
const D3D12_RESOURCE_STATES readOnly[] =
|
|
{
|
|
D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER,
|
|
D3D12_RESOURCE_STATE_INDEX_BUFFER,
|
|
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
|
|
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
|
|
D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT,
|
|
D3D12_RESOURCE_STATE_COPY_SOURCE,
|
|
D3D12_RESOURCE_STATE_DEPTH_READ
|
|
};
|
|
const D3D12_RESOURCE_STATES readWrite[] =
|
|
{
|
|
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
|
D3D12_RESOURCE_STATE_DEPTH_WRITE
|
|
};
|
|
const D3D12_RESOURCE_STATES writeOnly[] =
|
|
{
|
|
D3D12_RESOURCE_STATE_COPY_DEST,
|
|
D3D12_RESOURCE_STATE_RENDER_TARGET,
|
|
D3D12_RESOURCE_STATE_STREAM_OUT
|
|
};
|
|
|
|
int rBits = 0;
|
|
int wBits = 0;
|
|
|
|
for(auto bit : readOnly)
|
|
{
|
|
if(state & bit)
|
|
{
|
|
rBits++;
|
|
}
|
|
}
|
|
for(auto bit : readWrite)
|
|
{
|
|
if(state & bit)
|
|
{
|
|
rBits++;
|
|
wBits++;
|
|
}
|
|
}
|
|
for(auto bit : writeOnly)
|
|
{
|
|
if(state & bit)
|
|
{
|
|
wBits++;
|
|
}
|
|
}
|
|
|
|
// MS: "At most one write bit can be set."
|
|
Q_assert(wBits == 0 || wBits == 1);
|
|
|
|
if(wBits == 1)
|
|
{
|
|
// MS: "If any write bit is set, then no read bit may be set."
|
|
Q_assert(rBits == 0);
|
|
}
|
|
}
|
|
|
|
// returns true if the barrier should be used
|
|
static bool SetBarrier(
|
|
D3D12_RESOURCE_STATES& currentState, D3D12_RESOURCE_BARRIER& barrier,
|
|
ResourceStates::Flags newState, ID3D12Resource* resource)
|
|
{
|
|
const D3D12_RESOURCE_STATES before = currentState;
|
|
const D3D12_RESOURCE_STATES after = GetD3DResourceStates(newState);
|
|
ValidateResourceStateForBarrier(before);
|
|
ValidateResourceStateForBarrier(after);
|
|
|
|
if((before & after & D3D12_RESOURCE_STATE_UNORDERED_ACCESS) != 0 ||
|
|
((before & D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE) != 0 &&
|
|
(after & D3D12_RESOURCE_STATE_UNORDERED_ACCESS) != 0))
|
|
{
|
|
// note that UAV barriers are unnecessary in a bunch of cases:
|
|
// - before/after access is read-only
|
|
// - before/after access is write-only, but to different ranges
|
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
|
|
barrier.UAV.pResource = resource;
|
|
}
|
|
else
|
|
{
|
|
if(before == after)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
barrier.Transition.pResource = resource;
|
|
barrier.Transition.StateBefore = before;
|
|
barrier.Transition.StateAfter = after;
|
|
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
currentState = after;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static void ResolveDurationQueries()
|
|
{
|
|
const uint32_t frameIndex = (rhi.frameIndex + 1) % rhi.renderFrameCount;
|
|
const HBuffer hbuffer = rhi.timeStampBuffers[frameIndex];
|
|
const Buffer& buffer = rhi.buffers.Get(hbuffer);
|
|
|
|
#if defined(D3D_DEBUG)
|
|
if(r_vsync->integer)
|
|
{
|
|
Q_assert(rhi.frameIndex == 0);
|
|
Q_assert(frameIndex == 0);
|
|
}
|
|
#endif
|
|
|
|
FrameQueries& fq = rhi.frameQueries[frameIndex];
|
|
if(fq.durationQueryCount == 0)
|
|
{
|
|
rhi.resolvedQueries.durationQueryCount = 0;
|
|
return;
|
|
}
|
|
|
|
UINT64 gpuFrequencyU64;
|
|
if(FAILED(rhi.mainCommandQueue->GetTimestampFrequency(&gpuFrequencyU64)))
|
|
{
|
|
for(uint32_t q = 0; q < fq.durationQueryCount; ++q)
|
|
{
|
|
DurationQuery& dq = fq.durationQueries[q];
|
|
dq.state = QueryState::Free;
|
|
}
|
|
fq.durationQueryCount = 0;
|
|
rhi.resolvedQueries.durationQueryCount = 0;
|
|
}
|
|
const double gpuFrequencyF64 = (double)gpuFrequencyU64;
|
|
|
|
const UINT timestampQueryCount = fq.durationQueryCount * 2;
|
|
rhi.commandList->ResolveQueryData(rhi.timeStampHeaps[frameIndex], D3D12_QUERY_TYPE_TIMESTAMP, 0, timestampQueryCount, buffer.buffer, 0);
|
|
const UINT64* const timeStamps = (const UINT64*)MapBuffer(hbuffer);
|
|
|
|
uint32_t* const gpuMicroSeconds = rhi.resolvedQueries.gpuMicroSeconds;
|
|
for(uint32_t q = 0; q < fq.durationQueryCount; ++q)
|
|
{
|
|
DurationQuery& dq = fq.durationQueries[q];
|
|
Q_assert(dq.state == QueryState::Ended);
|
|
if(dq.state != QueryState::Ended)
|
|
{
|
|
gpuMicroSeconds[q] = 0;
|
|
dq.state = QueryState::Free;
|
|
continue;
|
|
}
|
|
|
|
const UINT timeStampBeginIndex = q * 2;
|
|
const UINT timeStampEndIndex = timeStampBeginIndex + 1;
|
|
const UINT64 beginTime = timeStamps[timeStampBeginIndex];
|
|
const UINT64 endTime = timeStamps[timeStampEndIndex];
|
|
if(endTime > beginTime)
|
|
{
|
|
const UINT64 elapsed = endTime - beginTime;
|
|
gpuMicroSeconds[q] = (uint32_t)((elapsed / gpuFrequencyF64) * 1000000.0);
|
|
}
|
|
else
|
|
{
|
|
gpuMicroSeconds[q] = 0;
|
|
}
|
|
|
|
dq.state = QueryState::Free;
|
|
}
|
|
rhi.resolvedQueries.durationQueryCount = fq.durationQueryCount;
|
|
fq.durationQueryCount = 0;
|
|
|
|
UnmapBuffer(hbuffer);
|
|
}
|
|
|
|
static void GrabSwapChainTextures()
|
|
{
|
|
for(uint32_t b = 0; b < rhi.swapChainBufferCount; ++b)
|
|
{
|
|
ID3D12Resource* renderTarget;
|
|
D3D(rhi.swapChain->GetBuffer(b, IID_PPV_ARGS(&renderTarget)));
|
|
|
|
TextureDesc desc(va("swap chain #%d", b + 1), glConfig.vidWidth, glConfig.vidHeight);
|
|
desc.nativeResource = renderTarget;
|
|
desc.initialState = ResourceStates::PresentBit;
|
|
desc.allowedState = ResourceStates::PresentBit | ResourceStates::RenderTargetBit;
|
|
rhi.renderTargets[b] = CreateTexture(desc);
|
|
}
|
|
}
|
|
|
|
static void GetMonitorRefreshRate()
|
|
{
|
|
DWM_TIMING_INFO info = {};
|
|
info.cbSize = sizeof(info);
|
|
if(SUCCEEDED(DwmGetCompositionTimingInfo(NULL, &info)))
|
|
{
|
|
rhie.monitorFrameDurationMS = 1000.0f * ((float)(info.rateRefresh.uiDenominator) / (float)info.rateRefresh.uiNumerator);
|
|
}
|
|
else
|
|
{
|
|
rhie.monitorFrameDurationMS = 0.0f;
|
|
}
|
|
|
|
if(r_vsync->integer == 0)
|
|
{
|
|
const float maxFPS = ri.Cvar_Get("com_maxfps", "125", CVAR_ARCHIVE)->value;
|
|
rhie.targetFrameDurationMS = 1000.0f / maxFPS;
|
|
|
|
}
|
|
else if(rhie.monitorFrameDurationMS > 0.0f)
|
|
{
|
|
rhie.targetFrameDurationMS = rhie.monitorFrameDurationMS;
|
|
}
|
|
else
|
|
{
|
|
rhie.targetFrameDurationMS = 1.0f / 120.0f; // 120 Hz by default
|
|
}
|
|
}
|
|
|
|
static void CreateNullResources()
|
|
{
|
|
{
|
|
TextureDesc desc("null", 1, 1);
|
|
rhi.nullTexture = CreateTexture(desc);
|
|
}
|
|
{
|
|
TextureDesc desc("null RW", 1, 1);
|
|
desc.format = TextureFormat::R8G8B8A8_UNorm;
|
|
desc.initialState = ResourceStates::UnorderedAccessBit;
|
|
desc.allowedState = ResourceStates::UnorderedAccessBit | ResourceStates::PixelShaderAccessBit;
|
|
rhi.nullRWTexture = CreateTexture(desc);
|
|
}
|
|
{
|
|
BufferDesc desc("null", 256, ResourceStates::ShaderAccessBits);
|
|
desc.memoryUsage = MemoryUsage::GPU;
|
|
rhi.nullBuffer = CreateBuffer(desc);
|
|
}
|
|
{
|
|
BufferDesc desc("null RW", 256, ResourceStates::UnorderedAccessBit);
|
|
desc.memoryUsage = MemoryUsage::GPU;
|
|
rhi.nullRWBuffer = CreateBuffer(desc);
|
|
}
|
|
rhi.nullSampler = CreateSampler(SamplerDesc());
|
|
}
|
|
|
|
static void CopyDescriptor(ID3D12DescriptorHeap* dstHeap, uint32_t dstIndex, DescriptorHeap& srcHeap, uint32_t srcIndex)
|
|
{
|
|
Q_assert(srcIndex != InvalidDescriptorIndex);
|
|
D3D12_CPU_DESCRIPTOR_HANDLE dstHandle = dstHeap->GetCPUDescriptorHandleForHeapStart();
|
|
dstHandle.ptr += dstIndex * srcHeap.descriptorSize;
|
|
rhi.device->CopyDescriptorsSimple(1, dstHandle, srcHeap.GetCPUHandle(srcIndex), srcHeap.type);
|
|
}
|
|
|
|
static UINT BGRAUIntFromFloat(float r, float g, float b)
|
|
{
|
|
const BYTE br = (BYTE)(Com_Clamp(0.0f, 1.0f, r) * 255.0f);
|
|
const BYTE bg = (BYTE)(Com_Clamp(0.0f, 1.0f, g) * 255.0f);
|
|
const BYTE bb = (BYTE)(Com_Clamp(0.0f, 1.0f, b) * 255.0f);
|
|
|
|
return PIX_COLOR(br, bg, bb);
|
|
}
|
|
|
|
static bool IsTearingSupported()
|
|
{
|
|
HMODULE library = LoadLibraryA("DXGI.dll");
|
|
if(library == NULL)
|
|
{
|
|
ri.Printf(PRINT_WARNING, "D3D12: DXGI.dll couldn't be found or opened\n");
|
|
return false;
|
|
}
|
|
|
|
typedef HRESULT(WINAPI* PFN_CreateDXGIFactory)(REFIID riid, _Out_ void** ppFactory);
|
|
PFN_CreateDXGIFactory pCreateDXGIFactory = (PFN_CreateDXGIFactory)GetProcAddress(library, "CreateDXGIFactory");
|
|
if(pCreateDXGIFactory == NULL)
|
|
{
|
|
FreeLibrary(library);
|
|
ri.Printf(PRINT_WARNING, "D3D12: Failed to locate CreateDXGIFactory in DXGI.dll\n");
|
|
return false;
|
|
}
|
|
|
|
HRESULT hr;
|
|
BOOL enabled = FALSE;
|
|
IDXGIFactory5* pFactory;
|
|
hr = (*pCreateDXGIFactory)(__uuidof(IDXGIFactory5), (void**)&pFactory);
|
|
if(FAILED(hr))
|
|
{
|
|
FreeLibrary(library);
|
|
ri.Printf(PRINT_WARNING, "D3D12: 'CreateDXGIFactory' failed with code 0x%08X (%s)\n", (unsigned int)hr, GetSystemErrorString(hr));
|
|
return false;
|
|
}
|
|
hr = pFactory->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &enabled, sizeof(enabled));
|
|
pFactory->Release();
|
|
FreeLibrary(library);
|
|
|
|
if(FAILED(hr))
|
|
{
|
|
ri.Printf(PRINT_WARNING, "D3D12: 'IDXGIFactory5::CheckFeatureSupport' failed with code 0x%08X (%s)\n", (unsigned int)hr, GetSystemErrorString(hr));
|
|
return false;
|
|
}
|
|
|
|
return enabled != 0;
|
|
}
|
|
|
|
static UINT GetSwapChainFlags()
|
|
{
|
|
UINT flags = 0;
|
|
if(r_vsync->integer)
|
|
{
|
|
flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT;
|
|
}
|
|
else
|
|
{
|
|
flags = rhi.isTearingSupported ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0;
|
|
}
|
|
|
|
return flags;
|
|
}
|
|
|
|
static void WaitForSwapChain()
|
|
{
|
|
if(rhi.frameLatencyWaitableObject != NULL && rhi.frameLatencyWaitNeeded)
|
|
{
|
|
Q_assert(r_vsync->integer != 0);
|
|
WaitForSingleObjectEx(rhi.frameLatencyWaitableObject, INFINITE, TRUE);
|
|
rhi.frameLatencyWaitNeeded = false;
|
|
}
|
|
}
|
|
|
|
static uint32_t CreateSRV(ID3D12Resource* resource, D3D12_SHADER_RESOURCE_VIEW_DESC& desc, bool slotAtIndex0)
|
|
{
|
|
Q_assert(resource);
|
|
|
|
if(desc.Format == DXGI_FORMAT_D32_FLOAT)
|
|
{
|
|
desc.Format = DXGI_FORMAT_R32_FLOAT;
|
|
}
|
|
|
|
if(!rhi.useDynamicResources)
|
|
{
|
|
return rhi.descHeapGeneric.CreateSRV(resource, desc);
|
|
}
|
|
|
|
if(desc.ViewDimension == D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE)
|
|
{
|
|
resource = NULL;
|
|
}
|
|
|
|
DynamicResources& dr = rhi.dynamicResources;
|
|
const uint32_t index = dr.srvIndex.Allocate(slotAtIndex0);
|
|
D3D12_CPU_DESCRIPTOR_HANDLE handle = dr.genericDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
|
|
handle.ptr += index * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
rhi.device->CreateShaderResourceView(resource, &desc, handle);
|
|
|
|
return index;
|
|
}
|
|
|
|
static uint32_t CreateUAV(ID3D12Resource* resource, D3D12_UNORDERED_ACCESS_VIEW_DESC& desc)
|
|
{
|
|
Q_assert(resource);
|
|
|
|
if(!rhi.useDynamicResources)
|
|
{
|
|
return rhi.descHeapGeneric.CreateUAV(resource, desc);
|
|
}
|
|
|
|
DynamicResources& dr = rhi.dynamicResources;
|
|
const uint32_t index = dr.uavIndex.Allocate();
|
|
D3D12_CPU_DESCRIPTOR_HANDLE handleGPU = dr.genericDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
|
|
handleGPU.ptr += index * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
rhi.device->CreateUnorderedAccessView(resource, NULL, &desc, handleGPU);
|
|
|
|
// make a CPU-visible copy at the same offset in another heap to enable UAV clears
|
|
D3D12_CPU_DESCRIPTOR_HANDLE handleCPU = dr.genericCPUDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
|
|
handleCPU.ptr += index * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
rhi.device->CreateUnorderedAccessView(resource, NULL, &desc, handleCPU);
|
|
|
|
return index;
|
|
}
|
|
|
|
static uint32_t CreateCBV(D3D12_CONSTANT_BUFFER_VIEW_DESC& desc)
|
|
{
|
|
if(!rhi.useDynamicResources)
|
|
{
|
|
return rhi.descHeapGeneric.CreateCBV(desc);
|
|
}
|
|
|
|
DynamicResources& dr = rhi.dynamicResources;
|
|
const uint32_t index = dr.cbvIndex.Allocate();
|
|
D3D12_CPU_DESCRIPTOR_HANDLE handle = dr.genericDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
|
|
handle.ptr += index * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
rhi.device->CreateConstantBufferView(&desc, handle);
|
|
|
|
return index;
|
|
}
|
|
|
|
static uint32_t CreateSampler(D3D12_SAMPLER_DESC& desc)
|
|
{
|
|
if(!rhi.useDynamicResources)
|
|
{
|
|
return rhi.descHeapSamplers.CreateSampler(desc);
|
|
}
|
|
|
|
DynamicResources& dr = rhi.dynamicResources;
|
|
const uint32_t index = dr.samplerIndex.Allocate();
|
|
D3D12_CPU_DESCRIPTOR_HANDLE handle = dr.samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
|
|
handle.ptr += index * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
|
|
rhi.device->CreateSampler(&desc, handle);
|
|
|
|
return index;
|
|
}
|
|
|
|
static void CreateBufferDescriptors(Buffer& buffer)
|
|
{
|
|
const BufferDesc& rhiDesc = buffer.desc;
|
|
ID3D12Resource* const resource = buffer.buffer;
|
|
|
|
uint32_t srvIndex = InvalidDescriptorIndex;
|
|
if(rhiDesc.initialState & ResourceStates::ShaderAccessBits)
|
|
{
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC srv = {};
|
|
srv.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
|
|
srv.Buffer.FirstElement = 0;
|
|
if(rhiDesc.structureByteCount > 0)
|
|
{
|
|
srv.Format = DXGI_FORMAT_UNKNOWN;
|
|
srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
|
srv.Buffer.NumElements = rhiDesc.byteCount / rhiDesc.structureByteCount;
|
|
srv.Buffer.StructureByteStride = rhiDesc.structureByteCount;
|
|
srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
|
|
}
|
|
else
|
|
{
|
|
srv.Format = DXGI_FORMAT_R32_TYPELESS;
|
|
srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
|
srv.Buffer.NumElements = rhiDesc.byteCount / 4;
|
|
srv.Buffer.StructureByteStride = 0;
|
|
srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
|
|
}
|
|
srvIndex = CreateSRV(resource, srv, rhiDesc.useSrvIndex0);
|
|
}
|
|
else if(rhiDesc.initialState & ResourceStates::RaytracingASBit)
|
|
{
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC srv = {};
|
|
srv.ViewDimension = D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE;
|
|
srv.Format = DXGI_FORMAT_UNKNOWN;
|
|
srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
|
srv.RaytracingAccelerationStructure.Location = buffer.gpuAddress;
|
|
srvIndex = CreateSRV(resource, srv, false);
|
|
}
|
|
|
|
uint32_t cbvIndex = InvalidDescriptorIndex;
|
|
if(rhiDesc.initialState & ResourceStates::ConstantBufferBit)
|
|
{
|
|
D3D12_CONSTANT_BUFFER_VIEW_DESC cbv = {};
|
|
cbv.BufferLocation = resource->GetGPUVirtualAddress();
|
|
cbv.SizeInBytes = rhiDesc.byteCount;
|
|
cbvIndex = CreateCBV(cbv);
|
|
}
|
|
|
|
uint32_t uavIndex = InvalidDescriptorIndex;
|
|
if(rhiDesc.initialState & ResourceStates::UnorderedAccessBit)
|
|
{
|
|
D3D12_UNORDERED_ACCESS_VIEW_DESC uav = {};
|
|
uav.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
|
|
uav.Buffer.CounterOffsetInBytes = 0;
|
|
uav.Buffer.FirstElement = 0;
|
|
if(rhiDesc.structureByteCount > 0)
|
|
{
|
|
uav.Format = DXGI_FORMAT_UNKNOWN;
|
|
uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE;
|
|
uav.Buffer.NumElements = rhiDesc.byteCount / rhiDesc.structureByteCount;
|
|
uav.Buffer.StructureByteStride = rhiDesc.structureByteCount;
|
|
}
|
|
else
|
|
{
|
|
uav.Format = DXGI_FORMAT_R32_TYPELESS;
|
|
uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
|
|
uav.Buffer.NumElements = rhiDesc.byteCount / 4;
|
|
uav.Buffer.StructureByteStride = 0;
|
|
}
|
|
uavIndex = CreateUAV(resource, uav);
|
|
}
|
|
|
|
buffer.cbvIndex = cbvIndex;
|
|
buffer.uavIndex = uavIndex;
|
|
buffer.srvIndex = srvIndex;
|
|
}
|
|
|
|
static void CreateTextureDescriptors(Texture& texture)
|
|
{
|
|
const TextureDesc& rhiDesc = texture.desc;
|
|
ID3D12Resource* const resource = texture.texture;
|
|
|
|
if(rhiDesc.allowedState & ResourceStates::ShaderAccessBits)
|
|
{
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC srv = {};
|
|
srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
|
|
srv.Format = GetD3DFormat(rhiDesc.format);
|
|
srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
|
srv.Texture2D.MipLevels = rhiDesc.mipCount;
|
|
srv.Texture2D.MostDetailedMip = 0;
|
|
srv.Texture2D.PlaneSlice = 0;
|
|
srv.Texture2D.ResourceMinLODClamp = 0.0f;
|
|
if(rhiDesc.format == TextureFormat::Depth24_Stencil8)
|
|
{
|
|
srv.Format = DXGI_FORMAT_R24_UNORM_X8_TYPELESS;
|
|
}
|
|
if(rhiDesc.depth > 1)
|
|
{
|
|
srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
|
|
srv.Texture3D.MipLevels = rhiDesc.mipCount;
|
|
srv.Texture3D.MostDetailedMip = 0;
|
|
srv.Texture3D.ResourceMinLODClamp = 0.0f;
|
|
}
|
|
texture.srvIndex = CreateSRV(resource, srv, false);
|
|
}
|
|
else
|
|
{
|
|
texture.srvIndex = InvalidDescriptorIndex;
|
|
}
|
|
|
|
if(rhiDesc.allowedState & ResourceStates::UnorderedAccessBit)
|
|
{
|
|
for(uint32_t m = 0; m < rhiDesc.mipCount; ++m)
|
|
{
|
|
D3D12_UNORDERED_ACCESS_VIEW_DESC uav = {};
|
|
uav.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
|
|
uav.Format = GetD3DFormat(rhiDesc.format);
|
|
uav.Texture2D.MipSlice = m;
|
|
uav.Texture2D.PlaneSlice = 0;
|
|
if(rhiDesc.depth > 1)
|
|
{
|
|
uav.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D;
|
|
uav.Texture3D.MipSlice = m;
|
|
uav.Texture3D.FirstWSlice = 0;
|
|
uav.Texture3D.WSize = UINT(~0); // get access to all slices
|
|
}
|
|
texture.mips[m].uavIndex = CreateUAV(resource, uav);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for(uint32_t m = 0; m < rhiDesc.mipCount; ++m)
|
|
{
|
|
texture.mips[m].uavIndex = InvalidDescriptorIndex;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void InitDynamicResourceDescriptorRanges()
|
|
{
|
|
const uint32_t uavStart = DynamicResources::MaxDescriptorsSRV;
|
|
const uint32_t cbvStart = uavStart + DynamicResources::MaxDescriptorsUAV;
|
|
DynamicResources& dr = rhi.dynamicResources;
|
|
dr.srvIndex.Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, DynamicResources::MaxDescriptorsSRV);
|
|
dr.uavIndex.Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, uavStart, DynamicResources::MaxDescriptorsUAV);
|
|
dr.cbvIndex.Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, cbvStart, DynamicResources::MaxDescriptorsCBV);
|
|
dr.samplerIndex.Init(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1024);
|
|
}
|
|
|
|
static void CreateDynamicResources()
|
|
{
|
|
if(!rhi.useDynamicResources)
|
|
{
|
|
return;
|
|
}
|
|
|
|
const uint32_t genericDescriptorCount = DynamicResources::MaxDescriptorsGeneric;
|
|
const uint32_t samplerDescriptorCount = DynamicResources::MaxDescriptorsSamplers;
|
|
DynamicResources& dr = rhi.dynamicResources;
|
|
InitDynamicResourceDescriptorRanges();
|
|
|
|
{
|
|
// keep 1.0 behavior for max. flexibility,
|
|
// effectively disabling optimizations
|
|
const D3D12_DESCRIPTOR_RANGE_FLAGS genericRangeFlags =
|
|
D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE |
|
|
D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE;
|
|
const D3D12_DESCRIPTOR_RANGE_FLAGS samplerRangeFlags =
|
|
D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE;
|
|
|
|
const DynamicResources::DescriptorRange generic[3] =
|
|
{
|
|
dr.srvIndex,
|
|
dr.uavIndex,
|
|
dr.cbvIndex
|
|
};
|
|
D3D12_DESCRIPTOR_RANGE1 mainRanges[3] = {};
|
|
for(uint32_t i = 0; i < ARRAY_LEN(generic); ++i)
|
|
{
|
|
mainRanges[i].RangeType = generic[i].type;
|
|
mainRanges[i].BaseShaderRegister = generic[i].start;
|
|
mainRanges[i].NumDescriptors = generic[i].count;
|
|
mainRanges[i].RegisterSpace = 0;
|
|
mainRanges[i].OffsetInDescriptorsFromTableStart = generic[i].start;
|
|
mainRanges[i].Flags = genericRangeFlags;
|
|
}
|
|
D3D12_DESCRIPTOR_RANGE1 samplerRange = {};
|
|
samplerRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
|
|
samplerRange.BaseShaderRegister = 0;
|
|
samplerRange.NumDescriptors = samplerDescriptorCount;
|
|
samplerRange.RegisterSpace = 0;
|
|
samplerRange.OffsetInDescriptorsFromTableStart = 0;
|
|
samplerRange.Flags = samplerRangeFlags;
|
|
|
|
D3D12_ROOT_PARAMETER1 rootParameters[3] = {};
|
|
rootParameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
rootParameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
rootParameters[0].DescriptorTable.NumDescriptorRanges = ARRAY_LEN(mainRanges);
|
|
rootParameters[0].DescriptorTable.pDescriptorRanges = mainRanges;
|
|
rootParameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
rootParameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
rootParameters[1].DescriptorTable.NumDescriptorRanges = 1;
|
|
rootParameters[1].DescriptorTable.pDescriptorRanges = &samplerRange;
|
|
rootParameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
|
rootParameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
rootParameters[2].Constants.Num32BitValues = 64 - 2; // all the available space minus the 2 tables
|
|
rootParameters[2].Constants.ShaderRegister = 0; // access the RC at register b0
|
|
|
|
D3D12_VERSIONED_ROOT_SIGNATURE_DESC desc;
|
|
desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
|
|
desc.Desc_1_1.Flags =
|
|
D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT |
|
|
D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED |
|
|
D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED |
|
|
D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS |
|
|
D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS |
|
|
D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS |
|
|
D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS |
|
|
D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS;
|
|
desc.Desc_1_1.NumParameters = ARRAY_LEN(rootParameters);
|
|
desc.Desc_1_1.pParameters = rootParameters;
|
|
desc.Desc_1_1.NumStaticSamplers = 0;
|
|
desc.Desc_1_1.pStaticSamplers = NULL;
|
|
|
|
ID3DBlob* blob;
|
|
ID3DBlob* errorBlob;
|
|
if(FAILED(D3D12SerializeVersionedRootSignature(&desc, &blob, &errorBlob)))
|
|
{
|
|
ri.Error(ERR_FATAL, "Root signature creation failed!\n%s\n", (const char*)errorBlob->GetBufferPointer());
|
|
}
|
|
COM_RELEASE(errorBlob);
|
|
|
|
ID3D12RootSignature* signature;
|
|
D3D(rhi.device->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(&signature)));
|
|
COM_RELEASE(blob);
|
|
SetDebugName(signature, "Uber", D3DResourceType::RootSignature);
|
|
dr.rootSignature = signature;
|
|
}
|
|
|
|
{
|
|
ID3D12DescriptorHeap* heap;
|
|
D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {};
|
|
heapDesc.NumDescriptors = genericDescriptorCount;
|
|
heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
|
|
heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
|
D3D(rhi.device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&heap)));
|
|
SetDebugName(heap, "Uber GPU generic", D3DResourceType::DescriptorHeap);
|
|
dr.genericDescriptorHeap = heap;
|
|
|
|
heapDesc.NumDescriptors = samplerDescriptorCount;
|
|
heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
|
|
D3D(rhi.device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&heap)));
|
|
SetDebugName(heap, "Uber GPU sampler", D3DResourceType::DescriptorHeap);
|
|
dr.samplerDescriptorHeap = heap;
|
|
|
|
heapDesc.NumDescriptors = genericDescriptorCount;
|
|
heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
|
|
heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
|
D3D(rhi.device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&heap)));
|
|
SetDebugName(heap, "Uber CPU generic", D3DResourceType::DescriptorHeap);
|
|
dr.genericCPUDescriptorHeap = heap;
|
|
}
|
|
}
|
|
|
|
static void DestroyDynamicResources()
|
|
{
|
|
DynamicResources& dr = rhi.dynamicResources;
|
|
COM_RELEASE(dr.rootSignature);
|
|
COM_RELEASE(dr.genericDescriptorHeap);
|
|
COM_RELEASE(dr.samplerDescriptorHeap);
|
|
COM_RELEASE(dr.genericCPUDescriptorHeap);
|
|
}
|
|
|
|
static void BindDynamicResources()
|
|
{
|
|
if(!rhi.useDynamicResources)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// @NOTE: Set*RootSignature must be called after SetDescriptorHeaps
|
|
DynamicResources& dr = rhi.dynamicResources;
|
|
ID3D12DescriptorHeap* heaps[] = { dr.genericDescriptorHeap, dr.samplerDescriptorHeap };
|
|
rhi.commandList->SetDescriptorHeaps(ARRAY_LEN(heaps), heaps);
|
|
if(rhi.commandList->GetType() == D3D12_COMMAND_LIST_TYPE_DIRECT)
|
|
{
|
|
rhi.commandList->SetGraphicsRootSignature(dr.rootSignature);
|
|
rhi.commandList->SetGraphicsRootDescriptorTable(0, dr.genericDescriptorHeap->GetGPUDescriptorHandleForHeapStart());
|
|
rhi.commandList->SetGraphicsRootDescriptorTable(1, dr.samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart());
|
|
}
|
|
rhi.commandList->SetComputeRootSignature(dr.rootSignature);
|
|
rhi.commandList->SetComputeRootDescriptorTable(0, dr.genericDescriptorHeap->GetGPUDescriptorHandleForHeapStart());
|
|
rhi.commandList->SetComputeRootDescriptorTable(1, dr.samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart());
|
|
}
|
|
|
|
static void UpdateDynamicResources()
|
|
{
|
|
if(!rhi.useDynamicResources)
|
|
{
|
|
return;
|
|
}
|
|
|
|
InitDynamicResourceDescriptorRanges();
|
|
|
|
Texture* texture;
|
|
HTexture htexture;
|
|
for(int i = 0; rhi.textures.FindNext(&texture, &htexture, &i);)
|
|
{
|
|
CreateTextureDescriptors(*texture);
|
|
}
|
|
|
|
Buffer* buffer;
|
|
HBuffer hbuffer;
|
|
for(int i = 0; rhi.buffers.FindNext(&buffer, &hbuffer, &i);)
|
|
{
|
|
CreateBufferDescriptors(*buffer);
|
|
}
|
|
}
|
|
|
|
static void SetRootConstants(uint32_t byteOffset, uint32_t byteCount, const void* constants, bool graphics)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
Q_assert(byteCount > 0);
|
|
Q_assert(byteCount % 4 == 0);
|
|
Q_assert(byteOffset % 4 == 0);
|
|
Q_assert(byteOffset + byteCount <= 256 - 2 * 4); // 8 bytes are used for the 2 descriptor tables
|
|
Q_assert(constants);
|
|
|
|
// parameter index is 2 because the 2 descriptor tables come first
|
|
if(graphics)
|
|
{
|
|
rhi.commandList->SetGraphicsRoot32BitConstants(2, (UINT)byteCount / 4, constants, (UINT)byteOffset / 4);
|
|
}
|
|
else
|
|
{
|
|
rhi.commandList->SetComputeRoot32BitConstants(2, (UINT)byteCount / 4, constants, (UINT)byteOffset / 4);
|
|
}
|
|
}
|
|
|
|
static void Barrier(uint32_t texCount, const TextureBarrier* textures, uint32_t buffCount, const BufferBarrier* buffers)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
static D3D12_RESOURCE_BARRIER barriers[MAX_DRAWIMAGES * 2];
|
|
Q_assert(buffCount + texCount <= ARRAY_LEN(barriers));
|
|
|
|
UINT barrierCount = 0;
|
|
for(uint32_t i = 0; i < texCount; ++i)
|
|
{
|
|
Q_assert(!IsNullHandle(textures[i].texture));
|
|
Texture& texture = rhi.textures.Get(textures[i].texture);
|
|
if(SetBarrier(texture.currentState, barriers[barrierCount], textures[i].newState, texture.texture))
|
|
{
|
|
barrierCount++;
|
|
}
|
|
|
|
}
|
|
for(uint32_t i = 0; i < buffCount; ++i)
|
|
{
|
|
Q_assert(!IsNullHandle(buffers[i].buffer));
|
|
Buffer& buffer = rhi.buffers.Get(buffers[i].buffer);
|
|
if(SetBarrier(buffer.currentState, barriers[barrierCount], buffers[i].newState, buffer.buffer))
|
|
{
|
|
barrierCount++;
|
|
}
|
|
}
|
|
|
|
if(barrierCount > 0)
|
|
{
|
|
rhi.commandList->ResourceBarrier(barrierCount, barriers);
|
|
}
|
|
}
|
|
|
|
static void EnsureBufferIsThisLarge(HBuffer& hbuffer, const char* name, ResourceStates::Flags state, uint32_t byteCount)
|
|
{
|
|
uint32_t oldByteCount = 0;
|
|
if(!IsNullHandle(hbuffer))
|
|
{
|
|
const Buffer& buffer = rhi.buffers.Get(hbuffer);
|
|
if(buffer.desc.byteCount >= byteCount)
|
|
{
|
|
return;
|
|
}
|
|
|
|
oldByteCount = buffer.desc.byteCount;
|
|
}
|
|
|
|
byteCount = max(byteCount, 2 * oldByteCount);
|
|
DestroyBufferDelayed(hbuffer);
|
|
BufferDesc desc(name, byteCount, state);
|
|
desc.shortLifeTime = true;
|
|
hbuffer = CreateBuffer(desc);
|
|
}
|
|
|
|
static void UpdateGPUIndexRangeAndHelp()
|
|
{
|
|
Cvar_SetRange(r_gpuIndex->name, r_gpuIndex->type, "0", va("%d", rhi.gpuCount));
|
|
|
|
char values[256];
|
|
StringList stringList;
|
|
stringList.Init(values, sizeof(values));
|
|
stringList.Append("0");
|
|
stringList.Append("Default GPU");
|
|
stringList.Append("");
|
|
for(uint32_t i = 0; i < rhi.gpuCount; ++i)
|
|
{
|
|
stringList.Append(va("%d", (int)i + 1));
|
|
stringList.Append(rhi.gpus[i].name);
|
|
stringList.Append("");
|
|
}
|
|
stringList.Terminate();
|
|
Cvar_SetMenuData(r_gpuIndex->name, CVARCAT_DISPLAY | CVARCAT_PERFORMANCE, "GPU selection", "Choose the GPU to use", "", values);
|
|
}
|
|
|
|
#if defined(RHI_ENABLE_NVAPI) && defined(RHI_ENABLE_NVAPI_RT_VALIDATION)
|
|
static void __stdcall NVAPI_RTValidationCallback(void* pUserData, NVAPI_D3D12_RAYTRACING_VALIDATION_MESSAGE_SEVERITY severity, const char* messageCode, const char* message, const char* messageDetails)
|
|
{
|
|
const char* severityString = "unknown";
|
|
switch(severity)
|
|
{
|
|
case NVAPI_D3D12_RAYTRACING_VALIDATION_MESSAGE_SEVERITY_ERROR: severityString = "error"; break;
|
|
case NVAPI_D3D12_RAYTRACING_VALIDATION_MESSAGE_SEVERITY_WARNING: severityString = "warning"; break;
|
|
}
|
|
OutputDebugStringA(va("NVAPI RT: %s: [%s] %s\n%s\n", severityString, messageCode, message, messageDetails));
|
|
}
|
|
#endif
|
|
|
|
static void DrawResourceUsage()
|
|
{
|
|
if(BeginTable("Handles", 3))
|
|
{
|
|
TableHeader(3, "Type", "Count", "Max");
|
|
|
|
#define ITEM(Name, Variable) TableRow(3, Name, va("%d", (int)Variable.CountUsedSlots()), va("%d", (int)Variable.size))
|
|
ITEM("Buffers", rhi.buffers);
|
|
ITEM("Textures", rhi.textures);
|
|
ITEM("Root Signatures", rhi.rootSignatures);
|
|
ITEM("Descriptor Tables", rhi.descriptorTables);
|
|
ITEM("Pipelines", rhi.pipelines);
|
|
ITEM("Shaders", rhi.shaders);
|
|
ITEM("Samplers", rhi.samplers);
|
|
#undef ITEM
|
|
TableRow(3, "Duration Queries",
|
|
va("%d", rhi.frameQueries[rhi.frameIndex].durationQueryCount),
|
|
va("%d", MaxDurationQueries));
|
|
|
|
ImGui::EndTable();
|
|
}
|
|
|
|
ImGui::NewLine();
|
|
if(BeginTable("Descriptors", 3))
|
|
{
|
|
TableHeader(3, "Type", "Count", "Max");
|
|
|
|
#define ITEM(Name, Variable) TableRow(3, Name, va("%d", (int)Variable.allocatedItemCount), va("%d", (int)Variable.size))
|
|
#define DRITEM(Name, Index) TableRow(3, Name, va("%d", (int)(Index.index - Index.start)), va("%d", (int)Index.count));
|
|
if(rhi.useDynamicResources)
|
|
{
|
|
const DynamicResources& dr = rhi.dynamicResources;
|
|
DRITEM("GPU SRV", dr.srvIndex);
|
|
DRITEM("GPU UAV", dr.uavIndex);
|
|
DRITEM("GPU CBV", dr.cbvIndex);
|
|
DRITEM("GPU Samplers", dr.samplerIndex);
|
|
}
|
|
else
|
|
{
|
|
ITEM("CPU CBV/SRV/UAV", rhi.descHeapGeneric.freeList);
|
|
ITEM("CPU Samplers", rhi.descHeapSamplers.freeList);
|
|
}
|
|
ITEM("CPU RTV", rhi.descHeapRTVs.freeList);
|
|
ITEM("CPU DSV", rhi.descHeapDSVs.freeList);
|
|
#undef DRITEM
|
|
#undef ITEM
|
|
|
|
ImGui::EndTable();
|
|
}
|
|
|
|
ImGui::NewLine();
|
|
if(BeginTable("Memory", 2))
|
|
{
|
|
D3D12MA::Budget budget;
|
|
rhi.allocator->GetBudget(&budget, NULL);
|
|
TableRow2("UMA", rhi.allocator->IsUMA());
|
|
TableRow2("Cache coherent UMA", rhi.allocator->IsCacheCoherentUMA());
|
|
TableRow(2, "Total", Com_FormatBytes(rhi.allocator->GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_LOCAL)));
|
|
TableRow(2, "Budget", Com_FormatBytes(budget.BudgetBytes));
|
|
TableRow(2, "Usage", Com_FormatBytes(budget.UsageBytes));
|
|
TableRow(2, "Allocated", Com_FormatBytes(budget.Stats.BlockBytes));
|
|
TableRow(2, "Used", Com_FormatBytes(budget.Stats.AllocationBytes));
|
|
TableRow(2, "Block count", va("%d", budget.Stats.BlockCount));
|
|
TableRow(2, "Allocation count", va("%d", budget.Stats.AllocationCount));
|
|
|
|
ImGui::EndTable();
|
|
}
|
|
}
|
|
|
|
static void DrawCaps()
|
|
{
|
|
if(BeginTable("Capabilities", 2))
|
|
{
|
|
TableRow(2, "Adapter", rhi.adapterName);
|
|
TableRow(2, "Driver version", rhi.umdVersionString);
|
|
|
|
D3D12_FEATURE_DATA_D3D12_OPTIONS options0 = { 0 };
|
|
if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options0, sizeof(options0))))
|
|
{
|
|
const char* tier = "Unknown";
|
|
switch(options0.ResourceBindingTier)
|
|
{
|
|
case D3D12_RESOURCE_BINDING_TIER_1: tier = "1"; break;
|
|
case D3D12_RESOURCE_BINDING_TIER_2: tier = "2"; break;
|
|
case D3D12_RESOURCE_BINDING_TIER_3: tier = "3"; break;
|
|
default: break;
|
|
}
|
|
TableRow(2, "Resource binding tier", tier);
|
|
}
|
|
|
|
D3D12_FEATURE_DATA_D3D12_OPTIONS2 options2 = { 0 };
|
|
if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS2, &options2, sizeof(options2))))
|
|
{
|
|
TableRow2("Depth bounds test", options2.DepthBoundsTestSupported ? "YES" : "NO");
|
|
}
|
|
|
|
D3D12_FEATURE_DATA_ARCHITECTURE arch0 = { 0 };
|
|
if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_ARCHITECTURE, &arch0, sizeof(arch0))))
|
|
{
|
|
TableRow2("Tile-based renderer", arch0.TileBasedRenderer ? "YES" : "NO");
|
|
}
|
|
|
|
D3D12_FEATURE_DATA_ROOT_SIGNATURE root0 = {};
|
|
if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_ROOT_SIGNATURE, &root0, sizeof(root0))))
|
|
{
|
|
const char* version = "Unknown";
|
|
switch(root0.HighestVersion)
|
|
{
|
|
case D3D_ROOT_SIGNATURE_VERSION_1_0: version = "1.0"; break;
|
|
case D3D_ROOT_SIGNATURE_VERSION_1_1: version = "1.1"; break;
|
|
default: break;
|
|
}
|
|
TableRow(2, "Root signature version", version);
|
|
}
|
|
|
|
D3D12_FEATURE_DATA_D3D12_OPTIONS5 options5 = { 0 };
|
|
if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS5, &options5, sizeof(options5))))
|
|
{
|
|
const char* tier = "Unknown";
|
|
switch(options5.RenderPassesTier)
|
|
{
|
|
case D3D12_RENDER_PASS_TIER_0: tier = "0"; break;
|
|
case D3D12_RENDER_PASS_TIER_1: tier = "1"; break;
|
|
case D3D12_RENDER_PASS_TIER_2: tier = "2"; break;
|
|
default: break;
|
|
}
|
|
TableRow(2, "Render passes tier", tier);
|
|
|
|
tier = "Unknown";
|
|
switch(options5.RaytracingTier)
|
|
{
|
|
case D3D12_RAYTRACING_TIER_NOT_SUPPORTED: tier = "Not supported"; break;
|
|
case D3D12_RAYTRACING_TIER_1_0: tier = "1.0"; break;
|
|
case D3D12_RAYTRACING_TIER_1_1: tier = "1.1"; break;
|
|
default: break;
|
|
}
|
|
TableRow(2, "Raytracing (DXR) tier", tier);
|
|
}
|
|
|
|
D3D12_FEATURE_DATA_D3D12_OPTIONS6 options6 = { 0 };
|
|
if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS6, &options6, sizeof(options6))))
|
|
{
|
|
const char* tier = "Unknown";
|
|
switch(options6.VariableShadingRateTier)
|
|
{
|
|
case D3D12_VARIABLE_SHADING_RATE_TIER_NOT_SUPPORTED: tier = "N/A"; break;
|
|
case D3D12_VARIABLE_SHADING_RATE_TIER_1: tier = "1"; break;
|
|
case D3D12_VARIABLE_SHADING_RATE_TIER_2: tier = "2"; break;
|
|
default: break;
|
|
}
|
|
TableRow(2, "Variable-rate shading (VRS) tier", tier);
|
|
|
|
TableRow(2, "VRS: 2x4, 4x2, 4x4 support", options6.AdditionalShadingRatesSupported ? "YES" : "NO");
|
|
}
|
|
|
|
D3D12_FEATURE_DATA_SHADER_MODEL shaderModel = {};
|
|
shaderModel.HighestShaderModel = D3D_SHADER_MODEL_6_8;
|
|
if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &shaderModel, sizeof(shaderModel))))
|
|
{
|
|
const char* model = "Unknown";
|
|
switch(shaderModel.HighestShaderModel)
|
|
{
|
|
case D3D_SHADER_MODEL_6_0: model = "6.0"; break;
|
|
case D3D_SHADER_MODEL_6_1: model = "6.1"; break;
|
|
case D3D_SHADER_MODEL_6_2: model = "6.2"; break;
|
|
case D3D_SHADER_MODEL_6_3: model = "6.3"; break;
|
|
case D3D_SHADER_MODEL_6_4: model = "6.4"; break;
|
|
case D3D_SHADER_MODEL_6_5: model = "6.5"; break;
|
|
case D3D_SHADER_MODEL_6_6: model = "6.6"; break;
|
|
case D3D_SHADER_MODEL_6_7: model = "6.7"; break;
|
|
case D3D_SHADER_MODEL_6_8: model = "6.8"; break;
|
|
default: break;
|
|
}
|
|
TableRow(2, "Shader model", model);
|
|
}
|
|
|
|
// the validation layer reports live objects at shutdown when NvAPI_D3D12_QueryCpuVisibleVidmem is called
|
|
#if defined(RHI_ENABLE_NVAPI)
|
|
if(rhi.nvapiActive)
|
|
{
|
|
NvU64 cvvTotal, cvvFree;
|
|
if(NvAPI_D3D12_QueryCpuVisibleVidmem(rhi.device, &cvvTotal, &cvvFree) == NvAPI_Status::NVAPI_OK &&
|
|
cvvTotal > 0)
|
|
{
|
|
TableRow(2, "CPU Visible VRAM Total", Com_FormatBytes(cvvTotal));
|
|
TableRow(2, "CPU Visible VRAM Free", Com_FormatBytes(cvvFree));
|
|
}
|
|
else
|
|
{
|
|
TableRow(2, "CPU Visible VRAM", "N/A");
|
|
}
|
|
}
|
|
#endif
|
|
|
|
ImGui::EndTable();
|
|
}
|
|
}
|
|
|
|
static void DrawTextures()
|
|
{
|
|
static char filter[256];
|
|
if(ImGui::Button("Clear filter"))
|
|
{
|
|
filter[0] = '\0';
|
|
}
|
|
ImGui::SameLine();
|
|
ImGui::InputText(" ", filter, ARRAY_LEN(filter));
|
|
|
|
if(BeginTable("Textures", 4))
|
|
{
|
|
TableHeader(4, "Name", "State", "Size", "Format");
|
|
|
|
int i = 0;
|
|
Texture* texture;
|
|
HTexture htexture;
|
|
while(rhi.textures.FindNext(&texture, &htexture, &i))
|
|
{
|
|
if(filter[0] != '\0' && !Com_Filter(filter, texture->desc.name))
|
|
{
|
|
continue;
|
|
}
|
|
const D3D12_RESOURCE_DESC desc = texture->texture->GetDesc();
|
|
const uint64_t byteCount = texture->allocation != NULL ? texture->allocation->GetSize() : 0;
|
|
TableRow(4,
|
|
texture->desc.name,
|
|
GetNameForD3DResourceStates(texture->currentState),
|
|
Com_FormatBytes(byteCount),
|
|
GetNameForD3DFormat(desc.Format));
|
|
}
|
|
|
|
ImGui::EndTable();
|
|
}
|
|
}
|
|
|
|
static void DrawBuffers()
|
|
{
|
|
static char filter[256];
|
|
if(ImGui::Button("Clear filter"))
|
|
{
|
|
filter[0] = '\0';
|
|
}
|
|
ImGui::SameLine();
|
|
ImGui::InputText(" ", filter, ARRAY_LEN(filter));
|
|
|
|
if(BeginTable("Buffers", 4))
|
|
{
|
|
TableHeader(4, "Buffer", "State", "Heap", "Size");
|
|
|
|
int i = 0;
|
|
Buffer* buffer;
|
|
HBuffer hbuffer;
|
|
while(rhi.buffers.FindNext(&buffer, &hbuffer, &i))
|
|
{
|
|
if(filter[0] != '\0' && !Com_Filter(filter, buffer->desc.name))
|
|
{
|
|
continue;
|
|
}
|
|
TableRow(4,
|
|
buffer->desc.name,
|
|
GetNameForD3DResourceStates(buffer->currentState),
|
|
GetResourceHeapName(buffer->buffer),
|
|
Com_FormatBytes(buffer->allocation->GetSize()));
|
|
}
|
|
|
|
ImGui::EndTable();
|
|
}
|
|
}
|
|
|
|
typedef void (*UICallback)();
|
|
|
|
static void DrawSection(const char* name, UICallback callback)
|
|
{
|
|
if(ImGui::BeginTabItem(name))
|
|
{
|
|
(*callback)();
|
|
ImGui::EndTabItem();
|
|
}
|
|
}
|
|
|
|
static void DrawGUI()
|
|
{
|
|
static bool resourcesActive = false;
|
|
ToggleBooleanWithShortcut(resourcesActive, ImGuiKey_R);
|
|
GUI_AddMainMenuItem(GUI_MainMenu::Info, "RHI Resources", "Ctrl+R", &resourcesActive);
|
|
if(resourcesActive)
|
|
{
|
|
if(ImGui::Begin("Direct3D 12 RHI", &resourcesActive))
|
|
{
|
|
ImGui::BeginTabBar("Tabs#RHI");
|
|
DrawSection("Resources", &DrawResourceUsage);
|
|
DrawSection("Caps", &DrawCaps);
|
|
DrawSection("Textures", &DrawTextures);
|
|
DrawSection("Buffers", &DrawBuffers);
|
|
ImGui::EndTabBar();
|
|
}
|
|
ImGui::End();
|
|
}
|
|
}
|
|
|
|
bool Init(const InitDesc& initDesc)
|
|
{
|
|
Sys_V_Init();
|
|
|
|
if(rhi.device != NULL)
|
|
{
|
|
rhi.raytracingScratchBuffer = RHI_MAKE_NULL_HANDLE();
|
|
rhi.raytracingInstanceBuffer = RHI_MAKE_NULL_HANDLE();
|
|
|
|
DXGI_SWAP_CHAIN_DESC desc;
|
|
D3D(rhi.swapChain->GetDesc(&desc));
|
|
|
|
// V-Sync toggles require changing the swap chain flags,
|
|
// which means ResizeBuffers can't be used
|
|
const bool vsync = r_vsync->integer != 0;
|
|
rhi.renderFrameCount = vsync ? 1 : 2;
|
|
|
|
if(glInfo.winWidth != desc.BufferDesc.Width ||
|
|
glInfo.winHeight != desc.BufferDesc.Height ||
|
|
vsync != rhi.vsync)
|
|
{
|
|
WaitUntilDeviceIsIdle();
|
|
|
|
for(uint32_t f = 0; f < rhi.swapChainBufferCount; ++f)
|
|
{
|
|
DestroyTexture(rhi.renderTargets[f]);
|
|
}
|
|
|
|
const UINT flags = GetSwapChainFlags();
|
|
if(vsync == rhi.vsync)
|
|
{
|
|
D3D(rhi.swapChain->ResizeBuffers(desc.BufferCount, glInfo.winWidth, glInfo.winHeight, desc.BufferDesc.Format, flags));
|
|
}
|
|
else
|
|
{
|
|
if(rhi.frameLatencyWaitableObject != NULL)
|
|
{
|
|
CloseHandle(rhi.frameLatencyWaitableObject);
|
|
rhi.frameLatencyWaitableObject = NULL;
|
|
}
|
|
|
|
COM_RELEASE(rhi.swapChain);
|
|
|
|
IDXGISwapChain* dxgiSwapChain;
|
|
DXGI_SWAP_CHAIN_DESC swapChainDesc = { 0 };
|
|
swapChainDesc.BufferCount = rhi.swapChainBufferCount;
|
|
swapChainDesc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
swapChainDesc.BufferDesc.Width = glInfo.winWidth;
|
|
swapChainDesc.BufferDesc.Height = glInfo.winHeight;
|
|
swapChainDesc.BufferDesc.RefreshRate.Numerator = 0;
|
|
swapChainDesc.BufferDesc.RefreshRate.Denominator = 1;
|
|
swapChainDesc.BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED;
|
|
swapChainDesc.BufferDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED;
|
|
swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
|
swapChainDesc.Flags = flags;
|
|
swapChainDesc.OutputWindow = GetActiveWindow();
|
|
swapChainDesc.SampleDesc.Count = 1;
|
|
swapChainDesc.SampleDesc.Quality = 0;
|
|
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
|
|
swapChainDesc.Windowed = TRUE;
|
|
D3D(rhi.factory->CreateSwapChain(rhi.mainCommandQueue, &swapChainDesc, &dxgiSwapChain));
|
|
|
|
D3D(dxgiSwapChain->QueryInterface(IID_PPV_ARGS(&rhi.swapChain)));
|
|
COM_RELEASE(dxgiSwapChain);
|
|
|
|
if(vsync)
|
|
{
|
|
rhi.frameLatencyWaitableObject = rhi.swapChain->GetFrameLatencyWaitableObject();
|
|
rhi.frameLatencyWaitNeeded = true;
|
|
D3D(rhi.swapChain->SetMaximumFrameLatency(1));
|
|
}
|
|
}
|
|
|
|
GrabSwapChainTextures();
|
|
|
|
rhi.swapChainBufferIndex = rhi.swapChain->GetCurrentBackBufferIndex();
|
|
|
|
for(uint32_t f = 0; f < FrameCount; ++f)
|
|
{
|
|
rhi.mainFenceValues[f] = 0;
|
|
}
|
|
|
|
rhi.readback.ResizeIfNeeded();
|
|
}
|
|
|
|
GetMonitorRefreshRate();
|
|
|
|
rhi.tempStringAllocator.Clear();
|
|
|
|
rhi.vsync = vsync;
|
|
|
|
UpdateDynamicResources();
|
|
|
|
UpdateGPUIndexRangeAndHelp();
|
|
|
|
return false;
|
|
}
|
|
|
|
// @NOTE: we can't use memset because of the StaticPool members
|
|
new (&rhi) RHIPrivate();
|
|
rhi.useDynamicResources = initDesc.directDescriptorHeapIndexing;
|
|
|
|
// check for the presence of our 3 DLLs ASAP
|
|
{
|
|
HMODULE coreModule = LoadLibraryA("cnq3/D3D12Core.dll");
|
|
if(coreModule == NULL)
|
|
{
|
|
ri.Error(ERR_FATAL, "Failed to locate/open cnq3/D3D12Core.dll\n");
|
|
}
|
|
FreeLibrary(coreModule);
|
|
|
|
rhi.dxilModule = LoadLibraryA("cnq3/dxil.dll");
|
|
if(rhi.dxilModule == NULL)
|
|
{
|
|
ri.Error(ERR_FATAL, "Failed to locate/open cnq3/dxil.dll\n");
|
|
}
|
|
|
|
rhi.dxcModule = LoadLibraryA("cnq3/dxcompiler.dll");
|
|
if(rhi.dxcModule == NULL)
|
|
{
|
|
ri.Error(ERR_FATAL, "Failed to locate/open cnq3/dxcompiler.dll\n");
|
|
}
|
|
}
|
|
|
|
rhi.persStringAllocator.Init(rhi.persStringData, sizeof(rhi.persStringData));
|
|
rhi.tempStringAllocator.Init(rhi.tempStringData, sizeof(rhi.tempStringData));
|
|
|
|
#if defined(D3D_DEBUG)
|
|
if(SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&rhi.debug))))
|
|
{
|
|
// calling after device creation will remove the device
|
|
// if you hit this error:
|
|
// "D3D12 SDKLayers dll does not match the D3D12SDKVersion of D3D12 Core dll."
|
|
// make sure your D3D12SDKVersion and D3D12SDKPath are valid!
|
|
rhi.debug->EnableDebugLayer();
|
|
|
|
#if defined(D3D_GPU_BASED_VALIDATION)
|
|
ID3D12Debug1* debug1;
|
|
if(SUCCEEDED(rhi.debug->QueryInterface(IID_PPV_ARGS(&debug1))))
|
|
{
|
|
debug1->SetEnableGPUBasedValidation(TRUE);
|
|
debug1->SetEnableSynchronizedCommandQueueValidation(TRUE);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
UINT dxgiFactoryFlags = 0;
|
|
if(SUCCEEDED(DXGIGetDebugInterface1(0, IID_PPV_ARGS(&rhi.dxgiInfoQueue))))
|
|
{
|
|
dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG;
|
|
rhi.dxgiInfoQueue->SetBreakOnSeverity(DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, TRUE);
|
|
rhi.dxgiInfoQueue->SetBreakOnSeverity(DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, TRUE);
|
|
}
|
|
#endif
|
|
|
|
#if defined(D3D_DEBUG)
|
|
D3D(CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(&rhi.factory)));
|
|
#else
|
|
D3D(CreateDXGIFactory1(IID_PPV_ARGS(&rhi.factory)));
|
|
#endif
|
|
|
|
CreateAdapterList();
|
|
if(r_gpuIndex->integer > 0)
|
|
{
|
|
rhi.adapter = GetAdapterAtIndex(r_gpuIndex->integer - 1);
|
|
}
|
|
if(rhi.adapter == NULL)
|
|
{
|
|
rhi.adapter = FindMostSuitableAdapter(rhi.factory, r_gpuPreference->integer);
|
|
}
|
|
{
|
|
char adapterName[256];
|
|
const char* adapterNamePtr = "unknown";
|
|
DXGI_ADAPTER_DESC1 desc;
|
|
if(SUCCEEDED(rhi.adapter->GetDesc1(&desc)) &&
|
|
WideCharToMultiByte(CP_UTF8, 0, desc.Description, -1, adapterName, sizeof(adapterName), NULL, NULL) > 0)
|
|
{
|
|
adapterNamePtr = adapterName;
|
|
}
|
|
ri.Printf(PRINT_ALL, "Selected graphics adapter: %s\n", adapterNamePtr);
|
|
Q_strncpyz(rhi.adapterName, adapterNamePtr, sizeof(rhi.adapterName));
|
|
}
|
|
UpdateGPUIndexRangeAndHelp();
|
|
|
|
D3D(D3D12CreateDevice(rhi.adapter, FeatureLevel, IID_PPV_ARGS(&rhi.device)));
|
|
|
|
rhi.vendorId = VENDORID_INVALID;
|
|
{
|
|
DXGI_ADAPTER_DESC desc;
|
|
if(SUCCEEDED(rhi.adapter->GetDesc(&desc)))
|
|
{
|
|
rhi.vendorId = desc.VendorId;
|
|
}
|
|
}
|
|
|
|
{
|
|
LARGE_INTEGER driverVersion;
|
|
if(SUCCEEDED(rhi.adapter->CheckInterfaceSupport(__uuidof(IDXGIDevice), &driverVersion)))
|
|
{
|
|
rhi.umdVersionSplit[0] = driverVersion.QuadPart >> 48;
|
|
rhi.umdVersionSplit[1] = (driverVersion.QuadPart >> 32) & 0xFFFF;
|
|
rhi.umdVersionSplit[2] = (driverVersion.QuadPart >> 16) & 0xFFFF;
|
|
rhi.umdVersionSplit[3] = driverVersion.QuadPart & 0xFFFF;
|
|
rhi.umdVersion = driverVersion.QuadPart;
|
|
Com_sprintf(rhi.umdVersionString, sizeof(rhi.umdVersionString), "%d.%d.%d.%d",
|
|
(int)rhi.umdVersionSplit[0],
|
|
(int)rhi.umdVersionSplit[1],
|
|
(int)rhi.umdVersionSplit[2],
|
|
(int)rhi.umdVersionSplit[3]);
|
|
}
|
|
}
|
|
|
|
// with direct descriptor heap indexing on Intel,
|
|
// *Texture*.GetDimensions() calls return incorrect values
|
|
// UMD version 31.0.101.5382 (0x001F000000651506) is still broken
|
|
// UMD version 31.0.101.5444 (0X001F000000651544) is the first fixed version
|
|
if(initDesc.directDescriptorHeapIndexing &&
|
|
rhi.vendorId == VENDORID_INTEL &&
|
|
rhi.umdVersion < 0X001F000000651544)
|
|
{
|
|
ri.Error(ERR_FATAL,
|
|
"This Intel graphics driver is known to have incorrect behavior.\n"
|
|
"Version 31.0.101.5444 (April 19, 2024) or newer is required.\n");
|
|
}
|
|
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
rhi.aftermathActive = false;
|
|
if(rhi.vendorId == VENDORID_NVIDIA)
|
|
{
|
|
const GFSDK_Aftermath_FeatureFlags flags = GFSDK_Aftermath_FeatureFlags_Maximum;
|
|
const GFSDK_Aftermath_Result result = GFSDK_Aftermath_DX12_Initialize(GFSDK_Aftermath_Version_API, flags, rhi.device);
|
|
if(result == GFSDK_Aftermath_Result_Success)
|
|
{
|
|
rhi.aftermathActive = true;
|
|
}
|
|
Q_assert(rhi.aftermathActive);
|
|
}
|
|
#endif
|
|
|
|
{
|
|
D3D12_INDIRECT_ARGUMENT_DESC argDesc = {};
|
|
argDesc.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH;
|
|
D3D12_COMMAND_SIGNATURE_DESC desc = {};
|
|
desc.ByteStride = 12;
|
|
desc.NodeMask = 0;
|
|
desc.NumArgumentDescs = 1;
|
|
desc.pArgumentDescs = &argDesc;
|
|
D3D(rhi.device->CreateCommandSignature(&desc, NULL, IID_PPV_ARGS(&rhi.indirectDispatchSignature)));
|
|
}
|
|
|
|
if(rhi.useDynamicResources)
|
|
{
|
|
D3D12_FEATURE_DATA_D3D12_OPTIONS options0 = {};
|
|
if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options0, sizeof(options0))))
|
|
{
|
|
if(options0.ResourceBindingTier < D3D12_RESOURCE_BINDING_TIER_3)
|
|
{
|
|
ri.Error(ERR_FATAL, "The CRP requires Resource Binding Tier 3 capable hardware\n");
|
|
}
|
|
}
|
|
|
|
D3D12_FEATURE_DATA_SHADER_MODEL shaderModel = {};
|
|
shaderModel.HighestShaderModel = D3D_SHADER_MODEL_6_6;
|
|
if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &shaderModel, sizeof(shaderModel))))
|
|
{
|
|
if(shaderModel.HighestShaderModel < D3D_SHADER_MODEL_6_6)
|
|
{
|
|
ri.Error(ERR_FATAL, "The CRP requires Shader Model 6.6 capable hardware\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
{
|
|
D3D12MA::ALLOCATOR_DESC desc = {};
|
|
desc.pDevice = rhi.device;
|
|
desc.pAdapter = rhi.adapter;
|
|
desc.Flags = D3D12MA::ALLOCATOR_FLAG_SINGLETHREADED;
|
|
D3D(D3D12MA::CreateAllocator(&desc, &rhi.allocator));
|
|
}
|
|
|
|
if(rhi.allocator->IsCacheCoherentUMA())
|
|
{
|
|
D3D12MA::POOL_DESC poolDesc = {};
|
|
poolDesc.HeapProperties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE;
|
|
poolDesc.HeapProperties.CreationNodeMask = 0;
|
|
poolDesc.HeapProperties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0; // system
|
|
poolDesc.HeapProperties.Type = D3D12_HEAP_TYPE_CUSTOM;
|
|
poolDesc.HeapProperties.VisibleNodeMask = 0;
|
|
poolDesc.HeapFlags = D3D12_HEAP_FLAG_NONE;
|
|
poolDesc.Flags = D3D12MA::POOL_FLAG_NONE;
|
|
|
|
D3D12MA::Pool* pool;
|
|
if(SUCCEEDED(rhi.allocator->CreatePool(&poolDesc, &pool)))
|
|
{
|
|
rhi.umaPool = pool;
|
|
}
|
|
}
|
|
|
|
#if defined(D3D_DEBUG)
|
|
if(rhi.debug)
|
|
{
|
|
rhi.device->QueryInterface(IID_PPV_ARGS(&rhi.infoQueue));
|
|
if(rhi.infoQueue)
|
|
{
|
|
rhi.infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, TRUE);
|
|
rhi.infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE);
|
|
rhi.infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE);
|
|
|
|
D3D12_MESSAGE_ID filteredMessages[] =
|
|
{
|
|
// can't remember what this one is for...
|
|
//D3D12_MESSAGE_ID_SETPRIVATEDATA_CHANGINGPARAMS,
|
|
// clear color mismatch will happen when going through a teleporter
|
|
D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE
|
|
};
|
|
D3D12_INFO_QUEUE_FILTER filter = { 0 };
|
|
filter.DenyList.NumIDs = ARRAY_LEN(filteredMessages);
|
|
filter.DenyList.pIDList = filteredMessages;
|
|
rhi.infoQueue->AddStorageFilterEntries(&filter);
|
|
}
|
|
}
|
|
|
|
if(rhi.debug)
|
|
{
|
|
ID3D12DebugDevice1* debugDevice1;
|
|
if(SUCCEEDED(rhi.device->QueryInterface(IID_PPV_ARGS(&debugDevice1))))
|
|
{
|
|
// defaults:
|
|
// D3D12_GPU_BASED_VALIDATION_SHADER_PATCH_MODE_UNGUARDED_VALIDATION
|
|
// 256
|
|
// D3D12_GPU_BASED_VALIDATION_PIPELINE_STATE_CREATE_FLAG_NONE
|
|
D3D12_DEBUG_DEVICE_GPU_BASED_VALIDATION_SETTINGS gbv = {};
|
|
gbv.DefaultShaderPatchMode = D3D12_GPU_BASED_VALIDATION_SHADER_PATCH_MODE_GUARDED_VALIDATION;
|
|
gbv.MaxMessagesPerCommandList = 1024; // defaults to 256
|
|
gbv.PipelineStateCreateFlags = D3D12_GPU_BASED_VALIDATION_PIPELINE_STATE_CREATE_FLAG_FRONT_LOAD_CREATE_GUARDED_VALIDATION_SHADERS;
|
|
debugDevice1->SetDebugParameter(D3D12_DEBUG_DEVICE_PARAMETER_GPU_BASED_VALIDATION_SETTINGS, &gbv, sizeof(gbv));
|
|
|
|
// default: D3D12_DEBUG_FEATURE_NONE
|
|
const D3D12_DEBUG_FEATURE features =
|
|
D3D12_DEBUG_FEATURE_ALLOW_BEHAVIOR_CHANGING_DEBUG_AIDS |
|
|
D3D12_DEBUG_FEATURE_CONSERVATIVE_RESOURCE_STATE_TRACKING;
|
|
debugDevice1->SetDebugParameter(D3D12_DEBUG_DEVICE_PARAMETER_FEATURE_FLAGS, &features, sizeof(features));
|
|
|
|
COM_RELEASE(debugDevice1);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
{
|
|
uint16_t* freeList = rhi.descriptorFreeListData;
|
|
rhi.descHeapGeneric.Create(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, MaxCPUGenericDescriptors, freeList, "all-encompassing CBV SRV UAV");
|
|
freeList += MaxCPUGenericDescriptors;
|
|
rhi.descHeapSamplers.Create(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, MaxCPUSamplerDescriptors, freeList, "all-encompassing sampler");
|
|
freeList += MaxCPUSamplerDescriptors;
|
|
rhi.descHeapRTVs.Create(D3D12_DESCRIPTOR_HEAP_TYPE_RTV, MaxCPURTVDescriptors, freeList, "all-encompassing RTV");
|
|
freeList += MaxCPURTVDescriptors;
|
|
rhi.descHeapDSVs.Create(D3D12_DESCRIPTOR_HEAP_TYPE_DSV, MaxCPUDSVDescriptors, freeList, "all-encompassing DSV");
|
|
}
|
|
|
|
{
|
|
D3D12_COMMAND_QUEUE_DESC commandQueueDesc = { 0 };
|
|
commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
|
|
commandQueueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
|
|
commandQueueDesc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
|
|
commandQueueDesc.NodeMask = 0;
|
|
D3D(rhi.device->CreateCommandQueue(&commandQueueDesc, IID_PPV_ARGS(&rhi.mainCommandQueue)));
|
|
SetDebugName(rhi.mainCommandQueue, "main", D3DResourceType::CommandQueue);
|
|
|
|
commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_COMPUTE;
|
|
D3D(rhi.device->CreateCommandQueue(&commandQueueDesc, IID_PPV_ARGS(&rhi.computeCommandQueue)));
|
|
SetDebugName(rhi.computeCommandQueue, "compute", D3DResourceType::CommandQueue);
|
|
}
|
|
|
|
rhi.isTearingSupported = IsTearingSupported();
|
|
rhi.swapChainBufferCount = 2;
|
|
rhi.renderFrameCount = r_vsync->integer ? 1 : 2;
|
|
|
|
{
|
|
const UINT flags = GetSwapChainFlags();
|
|
|
|
IDXGISwapChain* dxgiSwapChain;
|
|
DXGI_SWAP_CHAIN_DESC swapChainDesc = { 0 };
|
|
swapChainDesc.BufferCount = rhi.swapChainBufferCount;
|
|
swapChainDesc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
swapChainDesc.BufferDesc.Width = glInfo.winWidth;
|
|
swapChainDesc.BufferDesc.Height = glInfo.winHeight;
|
|
swapChainDesc.BufferDesc.RefreshRate.Numerator = 0;
|
|
swapChainDesc.BufferDesc.RefreshRate.Denominator = 1;
|
|
swapChainDesc.BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED;
|
|
swapChainDesc.BufferDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED;
|
|
swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
|
swapChainDesc.Flags = flags;
|
|
swapChainDesc.OutputWindow = GetActiveWindow();
|
|
swapChainDesc.SampleDesc.Count = 1;
|
|
swapChainDesc.SampleDesc.Quality = 0;
|
|
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
|
|
swapChainDesc.Windowed = TRUE;
|
|
D3D(rhi.factory->CreateSwapChain(rhi.mainCommandQueue, &swapChainDesc, &dxgiSwapChain));
|
|
rhi.vsync = r_vsync->integer != 0;
|
|
|
|
D3D(dxgiSwapChain->QueryInterface(IID_PPV_ARGS(&rhi.swapChain)));
|
|
rhi.swapChainBufferIndex = rhi.swapChain->GetCurrentBackBufferIndex();
|
|
COM_RELEASE(dxgiSwapChain);
|
|
|
|
if(r_vsync->integer)
|
|
{
|
|
rhi.frameLatencyWaitableObject = rhi.swapChain->GetFrameLatencyWaitableObject();
|
|
rhi.frameLatencyWaitNeeded = true;
|
|
D3D(rhi.swapChain->SetMaximumFrameLatency(1));
|
|
}
|
|
|
|
GrabSwapChainTextures();
|
|
}
|
|
|
|
GetMonitorRefreshRate();
|
|
|
|
for(UINT f = 0; f < FrameCount; ++f)
|
|
{
|
|
D3D(rhi.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&rhi.mainCommandAllocators[f])));
|
|
SetDebugName(rhi.mainCommandAllocators[f], va("main #%d", f + 1), D3DResourceType::CommandAllocator);
|
|
}
|
|
|
|
// get command list ready to use during init
|
|
D3D(rhi.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, rhi.mainCommandAllocators[rhi.frameIndex], NULL, IID_PPV_ARGS(&rhi.mainCommandList)));
|
|
SetDebugName(rhi.mainCommandList, "main", D3DResourceType::CommandList);
|
|
|
|
D3D(rhi.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE, IID_PPV_ARGS(&rhi.tempCommandAllocator)));
|
|
SetDebugName(rhi.tempCommandAllocator, "temp", D3DResourceType::CommandAllocator);
|
|
|
|
// the temp command list is always left open for the user
|
|
D3D(rhi.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE, rhi.tempCommandAllocator, NULL, IID_PPV_ARGS(&rhi.tempCommandList)));
|
|
SetDebugName(rhi.tempCommandList, "temp", D3DResourceType::CommandList);
|
|
rhi.tempCommandListOpen = true;
|
|
|
|
// the active/bound command list is the main one by default
|
|
rhi.commandList = rhi.mainCommandList;
|
|
|
|
rhi.mainFence.Create(rhi.mainFenceValues[rhi.frameIndex], "main command queue");
|
|
|
|
rhi.tempFence.Create(rhi.tempFenceValue, "temp command queue");
|
|
|
|
rhi.upload.Create();
|
|
|
|
rhi.readback.Create();
|
|
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
if(rhi.aftermathActive)
|
|
{
|
|
Q_assert(rhi.mainCommandList != NULL);
|
|
Q_assert(rhi.tempCommandList != NULL);
|
|
GFSDK_Aftermath_Result r = GFSDK_Aftermath_Result_Fail;
|
|
r = GFSDK_Aftermath_DX12_CreateContextHandle(rhi.mainCommandList, &rhi.aftermathMainCommandList);
|
|
Q_assert(r == GFSDK_Aftermath_Result_Success);
|
|
r = GFSDK_Aftermath_DX12_CreateContextHandle(rhi.tempCommandList, &rhi.aftermathTempCommandList);
|
|
Q_assert(r == GFSDK_Aftermath_Result_Success);
|
|
rhi.aftermathCommandList = rhi.aftermathMainCommandList;
|
|
}
|
|
#endif
|
|
|
|
for(uint32_t f = 0; f < FrameCount; ++f)
|
|
{
|
|
D3D12_QUERY_HEAP_DESC desc = { 0 };
|
|
desc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
|
|
desc.Count = MaxDurationQueries * 2;
|
|
desc.NodeMask = 0;
|
|
D3D(rhi.device->CreateQueryHeap(&desc, IID_PPV_ARGS(&rhi.timeStampHeaps[f])));
|
|
SetDebugName(rhi.timeStampHeaps[f], va("timestamp #%d", f + 1), D3DResourceType::QueryHeap);
|
|
}
|
|
|
|
for(uint32_t f = 0; f < FrameCount; ++f)
|
|
{
|
|
const uint32_t byteCount = MaxDurationQueries * 2 * sizeof(UINT64);
|
|
BufferDesc desc(va("timestamp readback #%d", f + 1), byteCount, ResourceStates::CopySourceBit);
|
|
desc.memoryUsage = MemoryUsage::Readback;
|
|
rhi.timeStampBuffers[f] = CreateBuffer(desc);
|
|
}
|
|
|
|
CreateDynamicResources();
|
|
CreateNullResources();
|
|
|
|
// queue some actual work...
|
|
|
|
D3D(rhi.commandList->Close());
|
|
|
|
WaitUntilDeviceIsIdle();
|
|
|
|
#if defined(RHI_ENABLE_NVAPI)
|
|
rhi.nvapiActive = false;
|
|
if(rhi.vendorId == VENDORID_NVIDIA)
|
|
{
|
|
NvAPI_Status nr = NvAPI_Initialize();
|
|
if(nr == NvAPI_Status::NVAPI_OK)
|
|
{
|
|
NvAPI_ShortString version;
|
|
if(NvAPI_GetInterfaceVersionString(version) == NvAPI_Status::NVAPI_OK)
|
|
{
|
|
ri.Printf(PRINT_ALL, "Opened nvapi.dll (%s)\n", version);
|
|
}
|
|
else
|
|
{
|
|
ri.Printf(PRINT_ALL, "Opened nvapi.dll\n");
|
|
}
|
|
rhi.nvapiActive = true;
|
|
|
|
#if defined(RHI_ENABLE_NVAPI_RT_VALIDATION)
|
|
nr = NvAPI_D3D12_EnableRaytracingValidation(rhi.device, NVAPI_D3D12_RAYTRACING_VALIDATION_FLAG_NONE);
|
|
Q_assert(nr == NvAPI_Status::NVAPI_OK);
|
|
void* nvapiValidationCallbackHandle = NULL;
|
|
nr = NvAPI_D3D12_RegisterRaytracingValidationMessageCallback(rhi.device, &NVAPI_RTValidationCallback, NULL, &nvapiValidationCallbackHandle);
|
|
Q_assert(nr == NvAPI_Status::NVAPI_OK);
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
NvAPI_ShortString desc;
|
|
if(NvAPI_GetErrorMessage(nr, desc) == NvAPI_Status::NVAPI_OK)
|
|
{
|
|
ri.Printf(PRINT_WARNING, "Failed to load nvapi.dll: %s\n", desc);
|
|
}
|
|
else
|
|
{
|
|
ri.Printf(PRINT_WARNING, "Failed to load nvapi.dll\n");
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
rhi.pix.module = LoadLibraryA("cnq3/WinPixEventRuntime.dll");
|
|
if(rhi.pix.module != NULL)
|
|
{
|
|
rhi.pix.BeginEventOnCommandList = (PIX::BeginEventOnCommandListPtr)GetProcAddress(rhi.pix.module, "PIXBeginEventOnCommandList");
|
|
rhi.pix.EndEventOnCommandList = (PIX::EndEventOnCommandListPtr)GetProcAddress(rhi.pix.module, "PIXEndEventOnCommandList");
|
|
rhi.pix.SetMarkerOnCommandList = (PIX::SetMarkerOnCommandListPtr)GetProcAddress(rhi.pix.module, "PIXSetMarkerOnCommandList");
|
|
rhi.pix.canBeginAndEnd = rhi.pix.BeginEventOnCommandList != NULL && rhi.pix.EndEventOnCommandList != NULL;
|
|
}
|
|
|
|
typedef HRESULT (__stdcall* DxcCreateInstancePtr)(REFCLSID, REFIID, LPVOID*);
|
|
DxcCreateInstancePtr dxcCreateInstance = (DxcCreateInstancePtr)GetProcAddress(rhi.dxcModule, "DxcCreateInstance");
|
|
if(dxcCreateInstance == NULL)
|
|
{
|
|
ri.Error(ERR_FATAL, "Failed to locate DxcCreateInstance in cnq3/dxcompiler.dll\n");
|
|
}
|
|
D3D(dxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(&rhi.dxcUtils)));
|
|
D3D(dxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&rhi.dxcCompiler)));
|
|
|
|
{
|
|
D3D12_FEATURE_DATA_D3D12_OPTIONS6 options6 = {};
|
|
if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS6, &options6, sizeof(options6))))
|
|
{
|
|
rhi.baseVRSSupport = options6.VariableShadingRateTier != D3D12_VARIABLE_SHADING_RATE_TIER_NOT_SUPPORTED;
|
|
rhi.extendedVRSSupport = rhi.baseVRSSupport && options6.AdditionalShadingRatesSupported;
|
|
}
|
|
|
|
const char* modeLists[] = { "1x1", "1x1 2x1 1x2 2x2", "1x1 2x1 1x2 2x2 4x2 2x4 4x4" };
|
|
const int listIndex = rhi.extendedVRSSupport ? 2 : (rhi.baseVRSSupport ? 1 : 0);
|
|
ri.Printf(PRINT_ALL, "Supported VRS modes: %s\n", modeLists[listIndex]);
|
|
}
|
|
|
|
D3D12_FEATURE_DATA_D3D12_OPTIONS5 options5 = {};
|
|
const bool hasInlineRaytracing =
|
|
SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS5, &options5, sizeof(options5))) &&
|
|
options5.RaytracingTier >= D3D12_RAYTRACING_TIER_1_1;
|
|
|
|
D3D12_FEATURE_DATA_D3D12_OPTIONS3 options3 = {};
|
|
const bool hasBarycentrics =
|
|
SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &options3, sizeof(options3))) &&
|
|
options3.BarycentricsSupported;
|
|
|
|
glInfo.maxTextureSize = MAX_TEXTURE_SIZE;
|
|
glInfo.maxAnisotropy = 16;
|
|
glInfo.depthFadeSupport = qtrue;
|
|
|
|
Q_strncpyz(glConfig.vendor_string, rhi.adapterName, sizeof(glConfig.vendor_string));
|
|
Q_strncpyz(glConfig.renderer_string, "Direct3D 12", sizeof(glConfig.renderer_string));
|
|
|
|
Q_strncpyz(rhiInfo.name, "Direct3D 12", sizeof(rhiInfo.name));
|
|
Q_strncpyz(rhiInfo.adapter, rhi.adapterName, sizeof(rhiInfo.adapter));
|
|
rhiInfo.hasTearing = rhi.isTearingSupported;
|
|
rhiInfo.hasBaseVRS = rhi.baseVRSSupport;
|
|
rhiInfo.hasExtendedVRS = rhi.extendedVRSSupport;
|
|
rhiInfo.isUMA = rhi.allocator->IsUMA();
|
|
rhiInfo.isCacheCoherentUMA = rhi.allocator->IsCacheCoherentUMA();
|
|
rhiInfo.hasInlineRaytracing = hasInlineRaytracing;
|
|
rhiInfo.hasBarycentrics = hasBarycentrics;
|
|
|
|
rhi.initialized = true;
|
|
|
|
return true;
|
|
}
|
|
|
|
void ShutDown(bool destroyWindow)
|
|
{
|
|
#define DESTROY_POOL(Name, Func) DestroyPool(rhi.Name, &Func, !!destroyWindow);
|
|
|
|
if(!destroyWindow &&
|
|
r_gpuPreference->latchedString != NULL &&
|
|
Q_stricmp(r_gpuPreference->latchedString, r_gpuPreference->string) != 0)
|
|
{
|
|
destroyWindow = true;
|
|
}
|
|
|
|
if(!destroyWindow &&
|
|
r_gpuIndex->latchedString != NULL &&
|
|
Q_stricmp(r_gpuIndex->latchedString, r_gpuIndex->string) != 0)
|
|
{
|
|
destroyWindow = true;
|
|
}
|
|
|
|
if(rhi.frameBegun)
|
|
{
|
|
backEnd.renderFrame = qfalse;
|
|
EndFrame();
|
|
backEnd.renderFrame = qtrue;
|
|
}
|
|
|
|
rhi.buffersToDelete.Clear();
|
|
|
|
if(!destroyWindow)
|
|
{
|
|
WaitUntilDeviceIsIdle();
|
|
|
|
rhi.texturesToTransition.Clear();
|
|
rhi.buffersToTransition.Clear();
|
|
|
|
DESTROY_POOL_LIST(DESTROY_POOL);
|
|
|
|
return;
|
|
}
|
|
|
|
rhi.initialized = false;
|
|
|
|
FreeLibrary(rhi.pix.module);
|
|
|
|
WaitUntilDeviceIsIdle();
|
|
|
|
if(destroyWindow)
|
|
{
|
|
DestroyDynamicResources();
|
|
}
|
|
|
|
if(rhi.frameLatencyWaitableObject != NULL)
|
|
{
|
|
CloseHandle(rhi.frameLatencyWaitableObject);
|
|
}
|
|
|
|
free(rhi.rtGeoDescs);
|
|
|
|
rhi.upload.Release();
|
|
rhi.readback.Release();
|
|
rhi.mainFence.Release();
|
|
rhi.tempFence.Release();
|
|
rhi.descHeapGeneric.Release();
|
|
rhi.descHeapSamplers.Release();
|
|
rhi.descHeapRTVs.Release();
|
|
rhi.descHeapDSVs.Release();
|
|
|
|
DESTROY_POOL_LIST(DESTROY_POOL);
|
|
|
|
COM_RELEASE(rhi.indirectDispatchSignature);
|
|
COM_RELEASE(rhi.dxcCompiler);
|
|
COM_RELEASE(rhi.dxcUtils);
|
|
COM_RELEASE_ARRAY(rhi.timeStampHeaps);
|
|
COM_RELEASE(rhi.mainCommandList);
|
|
COM_RELEASE_ARRAY(rhi.mainCommandAllocators);
|
|
COM_RELEASE(rhi.tempCommandList);
|
|
COM_RELEASE(rhi.tempCommandAllocator);
|
|
COM_RELEASE(rhi.swapChain);
|
|
COM_RELEASE(rhi.computeCommandQueue);
|
|
COM_RELEASE(rhi.mainCommandQueue);
|
|
COM_RELEASE(rhi.infoQueue);
|
|
COM_RELEASE(rhi.umaPool);
|
|
COM_RELEASE(rhi.allocator);
|
|
COM_RELEASE(rhi.device);
|
|
COM_RELEASE(rhi.adapter);
|
|
COM_RELEASE(rhi.factory);
|
|
#if defined(D3D_DEBUG)
|
|
COM_RELEASE(rhi.dxgiInfoQueue);
|
|
#endif
|
|
COM_RELEASE(rhi.debug);
|
|
|
|
FreeLibrary(rhi.dxilModule);
|
|
FreeLibrary(rhi.dxcModule);
|
|
|
|
#if defined(RHI_ENABLE_NVAPI)
|
|
if(rhi.nvapiActive)
|
|
{
|
|
NvAPI_Unload();
|
|
rhi.nvapiActive = false;
|
|
}
|
|
#endif
|
|
|
|
#if defined(D3D_DEBUG)
|
|
IDXGIDebug1* debug = NULL;
|
|
if(SUCCEEDED(DXGIGetDebugInterface1(0, IID_PPV_ARGS(&debug))))
|
|
{
|
|
// DXGI_DEBUG_RLO_ALL is DXGI_DEBUG_RLO_SUMMARY | DXGI_DEBUG_RLO_DETAIL | DXGI_DEBUG_RLO_IGNORE_INTERNAL
|
|
OutputDebugStringA("**** >>>> CNQ3: calling ReportLiveObjects\n");
|
|
const HRESULT hr = debug->ReportLiveObjects(DXGI_DEBUG_ALL, DXGI_DEBUG_RLO_ALL);
|
|
OutputDebugStringA(va("**** >>>> CNQ3: ReportLiveObjects returned 0x%08X (%s)\n", (unsigned int)hr, GetSystemErrorString(hr)));
|
|
debug->Release();
|
|
}
|
|
#endif
|
|
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
if(rhi.aftermathActive)
|
|
{
|
|
GFSDK_Aftermath_ReleaseContextHandle(rhi.aftermathMainCommandList);
|
|
GFSDK_Aftermath_ReleaseContextHandle(rhi.aftermathTempCommandList);
|
|
GFSDK_Aftermath_ReleaseContextHandle(rhi.aftermathUploadCommandList);
|
|
rhi.aftermathMainCommandList = NULL;
|
|
rhi.aftermathTempCommandList = NULL;
|
|
rhi.aftermathUploadCommandList = NULL;
|
|
rhi.aftermathCommandList = NULL;
|
|
rhi.aftermathActive = false;
|
|
}
|
|
#endif
|
|
|
|
#undef DESTROY_POOL
|
|
}
|
|
|
|
void BeginFrame()
|
|
{
|
|
if(rhi.frameBegun)
|
|
{
|
|
Sys_DebugPrintf("BeginFrame already called!\n");
|
|
return;
|
|
}
|
|
rhi.frameBegun = true;
|
|
|
|
rhi.beginFrameCounter++;
|
|
|
|
rhi.beforeRenderingUS = Sys_Microseconds();
|
|
|
|
WaitForSwapChain();
|
|
|
|
{
|
|
const UINT64 currentFenceValue = rhi.mainFenceValues[rhi.frameIndex];
|
|
#if RHI_DEBUG_FENCE
|
|
Sys_DebugPrintf("Wait: %d (BeginFrame)\n", (int)currentFenceValue);
|
|
#endif
|
|
rhi.mainFence.WaitOnCPU(currentFenceValue);
|
|
rhi.frameIndex = (rhi.frameIndex + 1) % rhi.renderFrameCount;
|
|
rhi.mainFenceValues[rhi.frameIndex] = currentFenceValue + 1;
|
|
rhi.swapChainBufferIndex = rhi.swapChain->GetCurrentBackBufferIndex();
|
|
}
|
|
|
|
#if defined(RHI_ENABLE_NVAPI) && defined(RHI_ENABLE_NVAPI_RT_VALIDATION)
|
|
if(rhi.nvapiActive)
|
|
{
|
|
const NvAPI_Status nr = NvAPI_D3D12_FlushRaytracingValidationMessages(rhi.device);
|
|
Q_assert(nr == NvAPI_Status::NVAPI_OK);
|
|
}
|
|
#endif
|
|
|
|
DrawGUI();
|
|
|
|
Q_assert(rhi.commandList == rhi.mainCommandList);
|
|
|
|
rhi.currentRootSignature = RHI_MAKE_NULL_HANDLE();
|
|
|
|
WaitForTempCommandList();
|
|
|
|
// wait for pending copies from the upload manager to be finished
|
|
rhi.upload.WaitToStartDrawing(rhi.mainCommandQueue);
|
|
|
|
rhie.inputToRenderUS = (uint32_t)(Sys_Microseconds() - rhi.beforeInputSamplingUS);
|
|
|
|
// reclaim used memory and start recording
|
|
D3D(rhi.mainCommandAllocators[rhi.frameIndex]->Reset());
|
|
D3D(rhi.commandList->Reset(rhi.mainCommandAllocators[rhi.frameIndex], NULL));
|
|
BindDynamicResources();
|
|
|
|
rhi.frameDurationQueryIndex = CmdBeginDurationQuery();
|
|
|
|
rhi.commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
|
|
|
static TextureBarrier textureBarriers[MAX_DRAWIMAGES + 1];
|
|
static BufferBarrier bufferBarriers[64];
|
|
for(uint32_t t = 0; t < rhi.texturesToTransition.count; ++t)
|
|
{
|
|
const HTexture handle = rhi.texturesToTransition[t];
|
|
const Texture& texture = rhi.textures.Get(handle);
|
|
textureBarriers[t] = TextureBarrier(handle, texture.desc.initialState);
|
|
}
|
|
textureBarriers[rhi.texturesToTransition.count] = TextureBarrier(rhi.renderTargets[rhi.swapChainBufferIndex], ResourceStates::RenderTargetBit);
|
|
for(uint32_t b = 0; b < rhi.buffersToTransition.count; ++b)
|
|
{
|
|
const HBuffer handle = rhi.buffersToTransition[b];
|
|
const Buffer& buffer = rhi.buffers.Get(handle);
|
|
bufferBarriers[b] = BufferBarrier(handle, buffer.desc.initialState);
|
|
}
|
|
Barrier(rhi.texturesToTransition.count, textureBarriers, rhi.buffersToTransition.count, bufferBarriers);
|
|
rhi.texturesToTransition.Clear();
|
|
rhi.buffersToTransition.Clear();
|
|
|
|
for(uint32_t b = 0; b < rhi.buffersToDelete.count; )
|
|
{
|
|
if(rhi.beginFrameCounter >= rhi.buffersToDelete[b].beginFrameCounter)
|
|
{
|
|
DestroyBuffer(rhi.buffersToDelete[b].buffer);
|
|
rhi.buffersToDelete.Remove(b);
|
|
}
|
|
else
|
|
{
|
|
b++;
|
|
}
|
|
}
|
|
|
|
CmdInsertDebugLabel("RHI::BeginFrame", 0.8f, 0.8f, 0.8f);
|
|
}
|
|
|
|
void EndFrame()
|
|
{
|
|
if(!rhi.frameBegun)
|
|
{
|
|
Sys_DebugPrintf("EndFrame already called!\n");
|
|
return;
|
|
}
|
|
rhi.frameBegun = false;
|
|
|
|
CmdInsertDebugLabel("RHI::EndFrame", 0.8f, 0.8f, 0.8f);
|
|
|
|
CmdBeginBarrier();
|
|
CmdTextureBarrier(rhi.renderTargets[rhi.swapChainBufferIndex], ResourceStates::PresentBit);
|
|
CmdEndBarrier();
|
|
|
|
CmdEndDurationQuery(rhi.frameDurationQueryIndex);
|
|
|
|
// needs to happens before the command list is closed
|
|
ResolveDurationQueries();
|
|
|
|
// stop recording
|
|
D3D(rhi.commandList->Close());
|
|
|
|
#if RHI_DEBUG_FENCE
|
|
Sys_DebugPrintf("Signal: %d (EndFrame)\n", rhi.mainFenceValues[rhi.frameIndex]);
|
|
#endif
|
|
rhi.mainFence.Signal(rhi.mainCommandQueue, rhi.mainFenceValues[rhi.frameIndex]);
|
|
|
|
const int64_t currentTimeUS = Sys_Microseconds();
|
|
rhie.inputToPresentUS = (uint32_t)(currentTimeUS - rhi.beforeInputSamplingUS);
|
|
rhie.renderToPresentUS = (uint32_t)(currentTimeUS - rhi.beforeRenderingUS);
|
|
|
|
if(backEnd.renderFrame)
|
|
{
|
|
ID3D12CommandList* commandListArray[] = { rhi.commandList };
|
|
rhi.mainCommandQueue->ExecuteCommandLists(ARRAY_LEN(commandListArray), commandListArray);
|
|
|
|
if(!rhi.vsync && com_nextTargetTimeUS > currentTimeUS)
|
|
{
|
|
const int64_t remainingUS = com_nextTargetTimeUS - currentTimeUS;
|
|
Sys_MicroSleep((int)remainingUS);
|
|
}
|
|
|
|
Present();
|
|
|
|
static int64_t prevTS = 0;
|
|
const int64_t currTS = Sys_Microseconds();
|
|
const int64_t us = currTS - prevTS;
|
|
prevTS = currTS;
|
|
rhie.presentToPresentUS = us;
|
|
}
|
|
else
|
|
{
|
|
rhie.presentToPresentUS = 0;
|
|
}
|
|
}
|
|
|
|
uint32_t GetFrameIndex()
|
|
{
|
|
return rhi.frameIndex;
|
|
}
|
|
|
|
HTexture GetSwapChainTexture()
|
|
{
|
|
return rhi.renderTargets[rhi.swapChainBufferIndex];
|
|
}
|
|
|
|
HBuffer CreateBuffer(const BufferDesc& rhiDesc)
|
|
{
|
|
// alignment must be 64KB (D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT) or 0, which is effectively 64KB.
|
|
// https://msdn.microsoft.com/en-us/library/windows/desktop/dn903813(v=vs.85).aspx
|
|
D3D12_RESOURCE_DESC desc = { 0 };
|
|
desc.Alignment = 0; // D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT
|
|
desc.DepthOrArraySize = 1;
|
|
desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
|
desc.Flags = D3D12_RESOURCE_FLAG_NONE;
|
|
desc.Format = DXGI_FORMAT_UNKNOWN;
|
|
desc.Width = rhiDesc.byteCount;
|
|
desc.Height = 1;
|
|
desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
|
desc.MipLevels = 1;
|
|
desc.SampleDesc.Count = 1;
|
|
desc.SampleDesc.Quality = 0;
|
|
if(rhiDesc.initialState & ResourceStates::UnorderedAccessBit)
|
|
{
|
|
desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
|
}
|
|
if(rhiDesc.initialState & ResourceStates::RaytracingASBit)
|
|
{
|
|
// @NOTE: don't use D3D12_RESOURCE_FLAG_RAYTRACING_ACCELERATION_STRUCTURE
|
|
// it's reserved for future use and isn't the right one to use
|
|
desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
|
}
|
|
|
|
bool transitionNeeded = false;
|
|
D3D12_RESOURCE_STATES resourceState = D3D12_RESOURCE_STATE_COMMON;
|
|
D3D12MA::ALLOCATION_DESC allocDesc = { 0 };
|
|
allocDesc.HeapType = D3D12_HEAP_TYPE_DEFAULT;
|
|
if(rhiDesc.initialState == ResourceStates::RaytracingASBit)
|
|
{
|
|
resourceState = D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE;
|
|
}
|
|
else if(rhiDesc.memoryUsage == MemoryUsage::CPU || rhiDesc.memoryUsage == MemoryUsage::Upload)
|
|
{
|
|
allocDesc.HeapType = D3D12_HEAP_TYPE_UPLOAD;
|
|
resourceState = D3D12_RESOURCE_STATE_GENERIC_READ; // mandated
|
|
}
|
|
else if(rhiDesc.memoryUsage == MemoryUsage::Readback)
|
|
{
|
|
allocDesc.HeapType = D3D12_HEAP_TYPE_READBACK;
|
|
resourceState = D3D12_RESOURCE_STATE_COPY_DEST; // mandated
|
|
desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
|
|
}
|
|
else
|
|
{
|
|
transitionNeeded = true;
|
|
}
|
|
if(rhiDesc.memoryUsage == MemoryUsage::GPU && rhi.umaPool != NULL)
|
|
{
|
|
// we only use the custom heap for buffers that are not supposed to be CPU-visible
|
|
allocDesc.HeapType = D3D12_HEAP_TYPE_CUSTOM;
|
|
allocDesc.CustomPool = rhi.umaPool;
|
|
}
|
|
allocDesc.Flags = D3D12MA::ALLOCATION_FLAG_STRATEGY_MIN_MEMORY;
|
|
if(rhiDesc.committedResource)
|
|
{
|
|
allocDesc.Flags = (D3D12MA::ALLOCATION_FLAGS)(allocDesc.Flags | D3D12MA::ALLOCATION_FLAG_COMMITTED);
|
|
}
|
|
|
|
D3D12MA::Allocation* allocation;
|
|
ID3D12Resource* resource;
|
|
D3D(rhi.allocator->CreateResource(&allocDesc, &desc, resourceState, NULL, &allocation, IID_PPV_ARGS(&resource)));
|
|
AllocateAndFixName(rhiDesc);
|
|
SetDebugName(resource, rhiDesc.name, D3DResourceType::Buffer);
|
|
|
|
Buffer buffer = {};
|
|
buffer.desc = rhiDesc;
|
|
buffer.allocation = allocation;
|
|
buffer.buffer = resource;
|
|
buffer.gpuAddress = resource->GetGPUVirtualAddress();
|
|
buffer.currentState = resourceState;
|
|
buffer.shortLifeTime = rhiDesc.shortLifeTime;
|
|
CreateBufferDescriptors(buffer);
|
|
|
|
const HBuffer hbuffer = rhi.buffers.Add(buffer);
|
|
if(transitionNeeded)
|
|
{
|
|
rhi.buffersToTransition.Add(hbuffer);
|
|
}
|
|
|
|
return hbuffer;
|
|
}
|
|
|
|
void DestroyBuffer(HBuffer handle)
|
|
{
|
|
Buffer& buffer = rhi.buffers.Get(handle);
|
|
if(buffer.mapped)
|
|
{
|
|
UnmapBuffer(handle);
|
|
}
|
|
if(!rhi.useDynamicResources)
|
|
{
|
|
if(buffer.srvIndex != InvalidDescriptorIndex)
|
|
{
|
|
rhi.descHeapGeneric.Free(buffer.srvIndex);
|
|
}
|
|
if(buffer.uavIndex != InvalidDescriptorIndex)
|
|
{
|
|
rhi.descHeapGeneric.Free(buffer.uavIndex);
|
|
}
|
|
if(buffer.cbvIndex != InvalidDescriptorIndex)
|
|
{
|
|
rhi.descHeapGeneric.Free(buffer.cbvIndex);
|
|
}
|
|
}
|
|
COM_RELEASE(buffer.buffer);
|
|
COM_RELEASE(buffer.allocation);
|
|
rhi.buffers.Remove(handle);
|
|
}
|
|
|
|
void DestroyBufferDelayed(HBuffer buffer)
|
|
{
|
|
if(IsNullHandle(buffer))
|
|
{
|
|
return;
|
|
}
|
|
|
|
BufferToDelete b = {};
|
|
b.beginFrameCounter = rhi.beginFrameCounter + FrameCount + 1;
|
|
b.buffer = buffer;
|
|
rhi.buffersToDelete.Add(b);
|
|
}
|
|
|
|
uint8_t* MapBuffer(HBuffer handle)
|
|
{
|
|
Buffer& buffer = rhi.buffers.Get(handle);
|
|
if(buffer.mapped)
|
|
{
|
|
ri.Error(ERR_FATAL, "Attempted to map buffer '%s' that is already mapped!\n", buffer.desc.name);
|
|
return NULL;
|
|
}
|
|
|
|
void* mappedPtr = NULL;
|
|
D3D(buffer.buffer->Map(0, NULL, &mappedPtr));
|
|
buffer.mapped = true;
|
|
Q_assert(mappedPtr != NULL);
|
|
|
|
return (uint8_t*)mappedPtr;
|
|
}
|
|
|
|
void UnmapBuffer(HBuffer handle)
|
|
{
|
|
Buffer& buffer = rhi.buffers.Get(handle);
|
|
if(!buffer.mapped)
|
|
{
|
|
ri.Error(ERR_FATAL, "Attempted to unmap buffer '%s' that isn't mapped!\n", buffer.desc.name);
|
|
return;
|
|
}
|
|
|
|
buffer.buffer->Unmap(0, NULL);
|
|
buffer.mapped = false;
|
|
}
|
|
|
|
HTexture CreateTexture(const TextureDesc& rhiDesc)
|
|
{
|
|
Q_assert(rhiDesc.width > 0);
|
|
Q_assert(rhiDesc.height > 0);
|
|
Q_assert(rhiDesc.sampleCount > 0);
|
|
Q_assert(rhiDesc.mipCount > 0);
|
|
Q_assert(rhiDesc.mipCount <= MaxTextureMips);
|
|
|
|
// Alignment 0 is the same as specifying D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT
|
|
D3D12_RESOURCE_DESC desc = { 0 };
|
|
desc.Alignment = 0;
|
|
desc.DepthOrArraySize = 1;
|
|
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
|
desc.Flags = D3D12_RESOURCE_FLAG_NONE;
|
|
desc.Format = GetD3DFormat(rhiDesc.format);
|
|
desc.Width = rhiDesc.width;
|
|
desc.Height = rhiDesc.height;
|
|
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
|
desc.MipLevels = rhiDesc.mipCount;
|
|
desc.SampleDesc.Count = rhiDesc.sampleCount;
|
|
desc.SampleDesc.Quality = 0;
|
|
if(rhiDesc.allowedState & ResourceStates::UnorderedAccessBit)
|
|
{
|
|
desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
|
}
|
|
if(rhiDesc.allowedState & ResourceStates::RenderTargetBit)
|
|
{
|
|
desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
|
|
}
|
|
if(rhiDesc.allowedState & ResourceStates::DepthAccessBits)
|
|
{
|
|
desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
|
|
}
|
|
if((rhiDesc.allowedState & ResourceStates::ShaderAccessBits) == 0)
|
|
{
|
|
desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
|
|
}
|
|
if(rhiDesc.depth > 1)
|
|
{
|
|
desc.DepthOrArraySize = rhiDesc.depth;
|
|
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D;
|
|
}
|
|
|
|
D3D12MA::ALLOCATION_DESC allocDesc = { 0 };
|
|
allocDesc.HeapType = D3D12_HEAP_TYPE_DEFAULT;
|
|
allocDesc.ExtraHeapFlags = D3D12_HEAP_FLAG_NONE;
|
|
allocDesc.Flags = D3D12MA::ALLOCATION_FLAG_STRATEGY_MIN_MEMORY;
|
|
if(rhiDesc.committedResource)
|
|
{
|
|
allocDesc.Flags = (D3D12MA::ALLOCATION_FLAGS)(allocDesc.Flags | D3D12MA::ALLOCATION_FLAG_COMMITTED);
|
|
}
|
|
|
|
D3D12_CLEAR_VALUE clearValue = {};
|
|
const D3D12_CLEAR_VALUE* pClearValue = NULL;
|
|
if(rhiDesc.usePreferredClearValue)
|
|
{
|
|
pClearValue = &clearValue;
|
|
clearValue.Format = desc.Format;
|
|
if(IsD3DDepthFormat(clearValue.Format))
|
|
{
|
|
clearValue.DepthStencil.Depth = rhiDesc.clearDepth;
|
|
clearValue.DepthStencil.Stencil = rhiDesc.clearStencil;
|
|
}
|
|
else
|
|
{
|
|
memcpy(clearValue.Color, rhiDesc.clearColor, sizeof(clearValue.Color));
|
|
}
|
|
}
|
|
|
|
if(rhiDesc.format == TextureFormat::Depth24_Stencil8)
|
|
{
|
|
desc.Format = DXGI_FORMAT_R24G8_TYPELESS;
|
|
}
|
|
|
|
D3D12MA::Allocation* allocation = NULL;
|
|
ID3D12Resource* resource;
|
|
if(rhiDesc.nativeResource != NULL)
|
|
{
|
|
resource = (ID3D12Resource*)rhiDesc.nativeResource;
|
|
}
|
|
else
|
|
{
|
|
D3D(rhi.allocator->CreateResource(&allocDesc, &desc, D3D12_RESOURCE_STATE_COPY_DEST, pClearValue, &allocation, IID_PPV_ARGS(&resource)));
|
|
}
|
|
AllocateAndFixName(rhiDesc);
|
|
SetDebugName(resource, rhiDesc.name, D3DResourceType::Texture);
|
|
|
|
uint32_t rtvIndex = InvalidDescriptorIndex;
|
|
if(rhiDesc.allowedState & ResourceStates::RenderTargetBit)
|
|
{
|
|
D3D12_RENDER_TARGET_VIEW_DESC rtv = { 0 };
|
|
rtv.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
|
|
rtv.Format = desc.Format;
|
|
rtv.Texture2D.MipSlice = 0;
|
|
rtv.Texture2D.PlaneSlice = 0;
|
|
rtvIndex = rhi.descHeapRTVs.CreateRTV(resource, rtv);
|
|
}
|
|
|
|
uint32_t dsvIndex = InvalidDescriptorIndex;
|
|
if(rhiDesc.allowedState & ResourceStates::DepthWriteBit)
|
|
{
|
|
D3D12_DEPTH_STENCIL_VIEW_DESC dsv = { 0 };
|
|
dsv.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
|
|
dsv.Format = desc.Format;
|
|
dsv.Flags = D3D12_DSV_FLAG_NONE;
|
|
dsv.Texture2D.MipSlice = 0;
|
|
if(rhiDesc.format == TextureFormat::Depth24_Stencil8)
|
|
{
|
|
dsv.Format = DXGI_FORMAT_D24_UNORM_S8_UINT;
|
|
}
|
|
dsvIndex = rhi.descHeapDSVs.CreateDSV(resource, dsv);
|
|
}
|
|
|
|
Texture texture = {};
|
|
texture.desc = rhiDesc;
|
|
texture.allocation = allocation;
|
|
texture.texture = resource;
|
|
texture.rtvIndex = rtvIndex;
|
|
texture.dsvIndex = dsvIndex;
|
|
texture.currentState = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
texture.shortLifeTime = rhiDesc.shortLifeTime;
|
|
CreateTextureDescriptors(texture);
|
|
|
|
const HTexture handle = rhi.textures.Add(texture);
|
|
if(rhiDesc.nativeResource == NULL)
|
|
{
|
|
rhi.texturesToTransition.Add(handle);
|
|
}
|
|
|
|
return handle;
|
|
}
|
|
|
|
void DestroyTexture(HTexture handle)
|
|
{
|
|
Texture& texture = rhi.textures.Get(handle);
|
|
if(texture.rtvIndex != InvalidDescriptorIndex)
|
|
{
|
|
rhi.descHeapRTVs.Free(texture.rtvIndex);
|
|
}
|
|
if(texture.dsvIndex != InvalidDescriptorIndex)
|
|
{
|
|
rhi.descHeapDSVs.Free(texture.dsvIndex);
|
|
}
|
|
if(!rhi.useDynamicResources)
|
|
{
|
|
if(texture.srvIndex != InvalidDescriptorIndex)
|
|
{
|
|
rhi.descHeapGeneric.Free(texture.srvIndex);
|
|
}
|
|
|
|
for(uint32_t m = 0; m < texture.desc.mipCount; ++m)
|
|
{
|
|
const uint32_t uavIndex = texture.mips[m].uavIndex;
|
|
if(uavIndex != InvalidDescriptorIndex)
|
|
{
|
|
rhi.descHeapGeneric.Free(uavIndex);
|
|
}
|
|
}
|
|
}
|
|
COM_RELEASE(texture.texture);
|
|
COM_RELEASE(texture.allocation);
|
|
rhi.textures.Remove(handle);
|
|
}
|
|
|
|
HSampler CreateSampler(const SamplerDesc& rhiDesc)
|
|
{
|
|
const D3D12_TEXTURE_ADDRESS_MODE addressMode = GetD3DTextureAddressMode(rhiDesc.wrapMode);
|
|
D3D12_FILTER filter = GetD3DFilter(rhiDesc.filterMode);
|
|
UINT maxAnisotropy = r_ext_max_anisotropy->integer;
|
|
if(filter == D3D12_FILTER_ANISOTROPIC && maxAnisotropy <= 1)
|
|
{
|
|
filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
|
|
maxAnisotropy = 1;
|
|
}
|
|
if(filter != D3D12_FILTER_ANISOTROPIC)
|
|
{
|
|
maxAnisotropy = 1;
|
|
}
|
|
|
|
D3D12_SAMPLER_DESC desc = { 0 };
|
|
desc.AddressU = addressMode;
|
|
desc.AddressV = addressMode;
|
|
desc.AddressW = addressMode;
|
|
desc.ComparisonFunc = D3D12_COMPARISON_FUNC_NONE;
|
|
desc.MaxAnisotropy = maxAnisotropy;
|
|
desc.MaxLOD = 666.0f;
|
|
desc.MinLOD = rhiDesc.minLOD;
|
|
desc.MipLODBias = rhiDesc.mipLODBias;
|
|
desc.Filter = filter;
|
|
const uint32_t index = CreateSampler(desc);
|
|
|
|
Sampler sampler;
|
|
sampler.desc = rhiDesc;
|
|
sampler.shortLifeTime = rhiDesc.shortLifeTime;
|
|
sampler.heapIndex = index;
|
|
const HSampler handle = rhi.samplers.Add(sampler);
|
|
|
|
return handle;
|
|
}
|
|
|
|
void DestroySampler(HSampler hsampler)
|
|
{
|
|
const Sampler& sampler = rhi.samplers.Get(hsampler);
|
|
if(!rhi.useDynamicResources)
|
|
{
|
|
rhi.descHeapSamplers.Free(sampler.heapIndex);
|
|
}
|
|
rhi.samplers.Remove(hsampler);
|
|
}
|
|
|
|
static void AddShaderVisibility(bool outVis[ShaderStage::Count], D3D12_SHADER_VISIBILITY inVis)
|
|
{
|
|
switch(inVis)
|
|
{
|
|
case D3D12_SHADER_VISIBILITY_VERTEX: outVis[ShaderStage::Vertex] = true; break;
|
|
case D3D12_SHADER_VISIBILITY_PIXEL: outVis[ShaderStage::Pixel] = true; break;
|
|
default: break;
|
|
}
|
|
}
|
|
|
|
HRootSignature CreateRootSignature(const RootSignatureDesc& rhiDesc)
|
|
{
|
|
ASSERT_DR_DISABLED();
|
|
|
|
RootSignature rhiSignature = {};
|
|
rhiSignature.genericTableIndex = UINT32_MAX;
|
|
rhiSignature.samplerTableIndex = UINT32_MAX;
|
|
rhiSignature.genericDescCount = 0;
|
|
rhiSignature.samplerDescCount = rhiDesc.samplerCount;
|
|
|
|
bool shaderVis[ShaderStage::Count] = {};
|
|
|
|
//
|
|
// root constants
|
|
//
|
|
int parameterCount = 0;
|
|
D3D12_ROOT_PARAMETER parameters[16];
|
|
for(int s = 0; s < ShaderStage::Count; ++s)
|
|
{
|
|
if(rhiDesc.constants[s].byteCount > 0)
|
|
{
|
|
rhiSignature.constants[s].parameterIndex = parameterCount;
|
|
|
|
D3D12_ROOT_PARAMETER& p = parameters[parameterCount];
|
|
p.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
|
p.Constants.Num32BitValues = AlignUp<UINT>(rhiDesc.constants[s].byteCount, 4) / 4;
|
|
p.Constants.RegisterSpace = 0;
|
|
p.Constants.ShaderRegister = 0;
|
|
p.ShaderVisibility = GetD3DVisibility((ShaderStage::Id)s);
|
|
AddShaderVisibility(shaderVis, p.ShaderVisibility);
|
|
|
|
parameterCount++;
|
|
}
|
|
}
|
|
Q_assert(parameterCount <= ShaderStage::Count);
|
|
|
|
//
|
|
// CBV SRV UAV table
|
|
//
|
|
uint32_t onePastLastTableIndex = 0;
|
|
D3D12_DESCRIPTOR_RANGE genericRanges[ARRAY_LEN(rhiDesc.genericRanges)] = {};
|
|
for(uint32_t rangeIndex = 0; rangeIndex < rhiDesc.genericRangeCount; ++rangeIndex)
|
|
{
|
|
D3D12_DESCRIPTOR_RANGE& r = genericRanges[rangeIndex];
|
|
const RootSignatureDesc::DescriptorRange& rIn = rhiDesc.genericRanges[rangeIndex];
|
|
Q_assert(rIn.count > 0);
|
|
r.BaseShaderRegister = 0;
|
|
r.NumDescriptors = rIn.count;
|
|
r.OffsetInDescriptorsFromTableStart = rIn.firstIndex;
|
|
r.RangeType = GetD3DDescriptorRangeType(rIn.type);
|
|
r.RegisterSpace = 0;
|
|
if(rIn.type == DescriptorType::Buffer)
|
|
{
|
|
// @TODO: or bump up BaseShaderRegister, or let the user decide
|
|
r.RegisterSpace = 1;
|
|
}
|
|
rhiSignature.genericDescCount += rIn.count;
|
|
onePastLastTableIndex = max(onePastLastTableIndex, rIn.firstIndex + rIn.count);
|
|
}
|
|
|
|
// if you trigger this, you have at least 1 gap in your root signature
|
|
Q_assert(onePastLastTableIndex == rhiSignature.genericDescCount);
|
|
rhiSignature.genericDescCount = onePastLastTableIndex; // safety net...
|
|
|
|
if(rhiSignature.genericDescCount > 0)
|
|
{
|
|
rhiSignature.genericTableIndex = parameterCount;
|
|
|
|
D3D12_ROOT_PARAMETER& p = parameters[parameterCount++];
|
|
p.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
p.DescriptorTable.NumDescriptorRanges = rhiDesc.genericRangeCount;
|
|
p.DescriptorTable.pDescriptorRanges = genericRanges;
|
|
p.ShaderVisibility = GetD3DVisibility(rhiDesc.genericVisibility);
|
|
AddShaderVisibility(shaderVis, p.ShaderVisibility);
|
|
}
|
|
|
|
//
|
|
// sampler table
|
|
//
|
|
D3D12_DESCRIPTOR_RANGE samplerRange = {};
|
|
if(rhiDesc.samplerCount > 0)
|
|
{
|
|
rhiSignature.samplerTableIndex = parameterCount;
|
|
|
|
D3D12_DESCRIPTOR_RANGE& r = samplerRange;
|
|
r.BaseShaderRegister = 0;
|
|
r.NumDescriptors = rhiDesc.samplerCount;
|
|
r.OffsetInDescriptorsFromTableStart = 0;
|
|
r.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
|
|
r.RegisterSpace = 0;
|
|
|
|
D3D12_ROOT_PARAMETER& p = parameters[parameterCount++];
|
|
p.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
p.DescriptorTable.NumDescriptorRanges = 1;
|
|
p.DescriptorTable.pDescriptorRanges = &samplerRange;
|
|
p.ShaderVisibility = GetD3DVisibility(rhiDesc.samplerVisibility);
|
|
AddShaderVisibility(shaderVis, p.ShaderVisibility);
|
|
}
|
|
|
|
D3D12_ROOT_SIGNATURE_DESC desc = { 0 };
|
|
desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE |
|
|
D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS |
|
|
D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS |
|
|
D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS |
|
|
D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS |
|
|
D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS;
|
|
if(!shaderVis[ShaderStage::Vertex])
|
|
{
|
|
desc.Flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS;
|
|
}
|
|
if(!shaderVis[ShaderStage::Pixel])
|
|
{
|
|
desc.Flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS;
|
|
}
|
|
if(rhiDesc.usingVertexBuffers)
|
|
{
|
|
desc.Flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
|
|
}
|
|
desc.NumParameters = parameterCount;
|
|
desc.pParameters = parameters;
|
|
desc.NumStaticSamplers = 0;
|
|
desc.pStaticSamplers = NULL;
|
|
|
|
ID3DBlob* blob;
|
|
ID3DBlob* errorBlob;
|
|
if(FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &blob, &errorBlob)))
|
|
{
|
|
ri.Error(ERR_FATAL, "Root signature creation failed!\n%s\n", (const char*)errorBlob->GetBufferPointer());
|
|
}
|
|
COM_RELEASE(errorBlob);
|
|
|
|
ID3D12RootSignature* signature;
|
|
D3D(rhi.device->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(&signature)));
|
|
COM_RELEASE(blob);
|
|
AllocateAndFixName(rhiDesc);
|
|
SetDebugName(signature, rhiDesc.name, D3DResourceType::RootSignature);
|
|
|
|
rhiSignature.desc = rhiDesc;
|
|
rhiSignature.signature = signature;
|
|
rhiSignature.shortLifeTime = rhiDesc.shortLifeTime;
|
|
|
|
return rhi.rootSignatures.Add(rhiSignature);
|
|
}
|
|
|
|
void DestroyRootSignature(HRootSignature signature)
|
|
{
|
|
ASSERT_DR_DISABLED();
|
|
|
|
COM_RELEASE(rhi.rootSignatures.Get(signature).signature);
|
|
rhi.rootSignatures.Remove(signature);
|
|
}
|
|
|
|
HDescriptorTable CreateDescriptorTable(const DescriptorTableDesc& desc)
|
|
{
|
|
ASSERT_DR_DISABLED();
|
|
|
|
const RootSignature& sig = rhi.rootSignatures.Get(desc.rootSignature);
|
|
|
|
const char* srvName = AllocateName(va("%s GPU-visible CBV SRV UAV", desc.name), desc.shortLifeTime);
|
|
const char* samName = AllocateName(va("%s GPU-visible sampler", desc.name), desc.shortLifeTime);
|
|
|
|
DescriptorTable table = { 0 };
|
|
table.genericHeap = CreateDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, sig.genericDescCount, true, srvName);
|
|
table.samplerHeap = CreateDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, sig.samplerDescCount, true, samName);
|
|
table.shortLifeTime = desc.shortLifeTime;
|
|
|
|
const Texture& nullTex = rhi.textures.Get(rhi.nullTexture);
|
|
const Texture& nullRWTex = rhi.textures.Get(rhi.nullRWTexture);
|
|
const Buffer& nullBuffer = rhi.buffers.Get(rhi.nullBuffer);
|
|
const Buffer& nullRWBuffer = rhi.buffers.Get(rhi.nullRWBuffer);
|
|
|
|
// bind null CBV SRV UAV resources
|
|
for(uint32_t r = 0; r < sig.desc.genericRangeCount; ++r)
|
|
{
|
|
const RootSignatureDesc::DescriptorRange& range = sig.desc.genericRanges[r];
|
|
|
|
uint32_t index;
|
|
switch(range.type)
|
|
{
|
|
case DescriptorType::Texture: index = nullTex.srvIndex; break;
|
|
case DescriptorType::RWTexture: index = nullRWTex.mips[0].uavIndex; break;
|
|
case DescriptorType::Buffer: index = nullBuffer.srvIndex; break;
|
|
case DescriptorType::RWBuffer: index = nullRWBuffer.uavIndex; break;
|
|
default: Q_assert(!"Unsupported descriptor type"); continue;
|
|
}
|
|
|
|
for(uint32_t i = 0; i < range.count; ++i)
|
|
{
|
|
CopyDescriptor(table.genericHeap, range.firstIndex + i, rhi.descHeapGeneric, index);
|
|
}
|
|
}
|
|
|
|
// bind null samplers
|
|
for(uint32_t d = 0; d < sig.desc.samplerCount; ++d)
|
|
{
|
|
Handle type, index, gen;
|
|
DecomposeHandle(&type, &index, &gen, rhi.nullSampler.v);
|
|
CopyDescriptor(table.samplerHeap, d, rhi.descHeapSamplers, index);
|
|
}
|
|
|
|
return rhi.descriptorTables.Add(table);
|
|
}
|
|
|
|
void UpdateDescriptorTable(HDescriptorTable htable, const DescriptorTableUpdate& update)
|
|
{
|
|
ASSERT_DR_DISABLED();
|
|
|
|
Q_assert(update.textures != NULL);
|
|
|
|
DescriptorTable& table = rhi.descriptorTables.Get(htable);
|
|
|
|
if(update.type == DescriptorType::Texture && table.genericHeap)
|
|
{
|
|
for(uint32_t i = 0; i < update.resourceCount; ++i)
|
|
{
|
|
const Texture& texture = rhi.textures.Get(update.textures[i]);
|
|
Q_assert(texture.srvIndex != InvalidDescriptorIndex);
|
|
CopyDescriptor(table.genericHeap, update.firstIndex + i, rhi.descHeapGeneric, texture.srvIndex);
|
|
}
|
|
}
|
|
else if(update.type == DescriptorType::RWBuffer && table.genericHeap)
|
|
{
|
|
for(uint32_t i = 0; i < update.resourceCount; ++i)
|
|
{
|
|
const Buffer& buffer = rhi.buffers.Get(update.buffers[i]);
|
|
Q_assert(buffer.uavIndex != InvalidDescriptorIndex);
|
|
CopyDescriptor(table.genericHeap, update.firstIndex + i, rhi.descHeapGeneric, buffer.uavIndex);
|
|
}
|
|
}
|
|
else if(update.type == DescriptorType::RWTexture && table.genericHeap)
|
|
{
|
|
uint32_t destIndex = update.firstIndex;
|
|
for(uint32_t i = 0; i < update.resourceCount; ++i)
|
|
{
|
|
const Texture& texture = rhi.textures.Get(update.textures[i]);
|
|
uint32_t start;
|
|
uint32_t end;
|
|
if(update.uavMipChain)
|
|
{
|
|
start = 0;
|
|
end = texture.desc.mipCount;
|
|
}
|
|
else
|
|
{
|
|
Q_assert(update.uavMipSlice < texture.desc.mipCount);
|
|
start = update.uavMipSlice;
|
|
end = start + 1;
|
|
}
|
|
|
|
for(uint32_t m = start; m < end; ++m)
|
|
{
|
|
Q_assert(texture.mips[m].uavIndex != InvalidDescriptorIndex);
|
|
CopyDescriptor(table.genericHeap, destIndex++, rhi.descHeapGeneric, texture.mips[m].uavIndex);
|
|
}
|
|
}
|
|
}
|
|
else if(update.type == DescriptorType::Sampler && table.samplerHeap)
|
|
{
|
|
for(uint32_t i = 0; i < update.resourceCount; ++i)
|
|
{
|
|
Handle htype, index, gen;
|
|
DecomposeHandle(&htype, &index, &gen, update.samplers[i].v);
|
|
Q_assert(index != InvalidDescriptorIndex);
|
|
CopyDescriptor(table.samplerHeap, update.firstIndex + i, rhi.descHeapSamplers, index);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ri.Error(ERR_FATAL, "UpdateDescriptorTable: unsupported descriptor type\n");
|
|
}
|
|
}
|
|
|
|
void DestroyDescriptorTable(HDescriptorTable handle)
|
|
{
|
|
ASSERT_DR_DISABLED();
|
|
|
|
DescriptorTable& table = rhi.descriptorTables.Get(handle);
|
|
COM_RELEASE(table.genericHeap);
|
|
COM_RELEASE(table.samplerHeap);
|
|
|
|
rhi.descriptorTables.Remove(handle);
|
|
}
|
|
|
|
HPipeline CreateGraphicsPipeline(const GraphicsPipelineDesc& rhiDesc)
|
|
{
|
|
if(!rhi.useDynamicResources)
|
|
{
|
|
Q_assert(!IsNullHandle(rhiDesc.rootSignature));
|
|
Q_assert(rhi.rootSignatures.Get(rhiDesc.rootSignature).desc.pipelineType == PipelineType::Graphics);
|
|
}
|
|
|
|
D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = { 0 };
|
|
desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; // none available so far
|
|
desc.pRootSignature = rhi.useDynamicResources ?
|
|
rhi.dynamicResources.rootSignature :
|
|
rhi.rootSignatures.Get(rhiDesc.rootSignature).signature;
|
|
desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
|
|
desc.SampleDesc.Count = 1;
|
|
desc.SampleMask = UINT_MAX;
|
|
|
|
UINT semanticIndices[ShaderSemantic::Count] = { 0 };
|
|
D3D12_INPUT_ELEMENT_DESC inputElementDescs[MaxVertexAttributes];
|
|
for(int a = 0; a < rhiDesc.vertexLayout.attributeCount; ++a)
|
|
{
|
|
const VertexAttribute& va = rhiDesc.vertexLayout.attributes[a];
|
|
D3D12_INPUT_ELEMENT_DESC& ied = inputElementDescs[a];
|
|
ied.SemanticName = GetD3DSemanticName(va.semantic);
|
|
ied.SemanticIndex = semanticIndices[va.semantic]++;
|
|
ied.Format = GetD3DFormat(va.dataType, va.vectorLength);
|
|
ied.InputSlot = va.vertexBufferIndex;
|
|
ied.AlignedByteOffset = va.structByteOffset;
|
|
ied.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
|
|
ied.InstanceDataStepRate = 0;
|
|
}
|
|
desc.InputLayout.NumElements = rhiDesc.vertexLayout.attributeCount;
|
|
desc.InputLayout.pInputElementDescs = inputElementDescs;
|
|
|
|
for(int t = 0; t < rhiDesc.renderTargetCount; ++t)
|
|
{
|
|
const GraphicsPipelineDesc::RenderTarget& rtIn = rhiDesc.renderTargets[t];
|
|
D3D12_RENDER_TARGET_BLEND_DESC& rtOut = desc.BlendState.RenderTarget[t];
|
|
rtOut.BlendEnable = TRUE;
|
|
rtOut.BlendOp = D3D12_BLEND_OP_ADD;
|
|
rtOut.BlendOpAlpha = D3D12_BLEND_OP_ADD;
|
|
rtOut.LogicOpEnable = FALSE;
|
|
rtOut.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; // RGBA
|
|
rtOut.SrcBlend = GetD3DSourceBlend(rtIn.q3BlendMode);
|
|
rtOut.DestBlend = GetD3DDestBlend(rtIn.q3BlendMode);
|
|
rtOut.SrcBlendAlpha = GetAlphaBlendFromColorBlend(rtOut.SrcBlend);
|
|
rtOut.DestBlendAlpha = GetAlphaBlendFromColorBlend(rtOut.DestBlend);
|
|
if(rtOut.SrcBlend == D3D12_BLEND_ONE && rtOut.DestBlend == D3D12_BLEND_ZERO)
|
|
{
|
|
rtOut.BlendEnable = FALSE;
|
|
}
|
|
desc.RTVFormats[t] = GetD3DFormat(rtIn.format);
|
|
}
|
|
desc.NumRenderTargets = rhiDesc.renderTargetCount;
|
|
|
|
desc.DepthStencilState.DepthEnable = rhiDesc.depthStencil.enableDepthTest ? TRUE : FALSE;
|
|
desc.DepthStencilState.DepthFunc = GetD3DComparisonFunction(rhiDesc.depthStencil.depthComparison);
|
|
desc.DepthStencilState.DepthWriteMask = rhiDesc.depthStencil.enableDepthWrites ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
|
|
desc.DepthStencilState.StencilEnable = rhiDesc.depthStencil.enableStencil;
|
|
desc.DepthStencilState.StencilReadMask = rhiDesc.depthStencil.stencilReadMask;
|
|
desc.DepthStencilState.StencilWriteMask = rhiDesc.depthStencil.stencilWriteMask;
|
|
desc.DepthStencilState.BackFace.StencilFunc = GetD3DComparisonFunction(rhiDesc.depthStencil.backFace.comparison);
|
|
desc.DepthStencilState.BackFace.StencilPassOp = GetD3DStencilOp(rhiDesc.depthStencil.backFace.passOp);
|
|
desc.DepthStencilState.BackFace.StencilFailOp = GetD3DStencilOp(rhiDesc.depthStencil.backFace.failOp);
|
|
desc.DepthStencilState.BackFace.StencilDepthFailOp = GetD3DStencilOp(rhiDesc.depthStencil.backFace.depthFailOp);
|
|
desc.DepthStencilState.FrontFace.StencilFunc = GetD3DComparisonFunction(rhiDesc.depthStencil.frontFace.comparison);
|
|
desc.DepthStencilState.FrontFace.StencilPassOp = GetD3DStencilOp(rhiDesc.depthStencil.frontFace.passOp);
|
|
desc.DepthStencilState.FrontFace.StencilFailOp = GetD3DStencilOp(rhiDesc.depthStencil.frontFace.failOp);
|
|
desc.DepthStencilState.FrontFace.StencilDepthFailOp = GetD3DStencilOp(rhiDesc.depthStencil.frontFace.depthFailOp);
|
|
desc.DSVFormat = GetD3DFormat(rhiDesc.depthStencil.depthStencilFormat);
|
|
|
|
desc.VS.pShaderBytecode = rhiDesc.vertexShader.data;
|
|
desc.VS.BytecodeLength = rhiDesc.vertexShader.byteCount;
|
|
desc.PS.pShaderBytecode = rhiDesc.pixelShader.data;
|
|
desc.PS.BytecodeLength = rhiDesc.pixelShader.byteCount;
|
|
|
|
desc.RasterizerState.AntialiasedLineEnable = FALSE;
|
|
desc.RasterizerState.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF;
|
|
desc.RasterizerState.CullMode = GetD3DCullMode(rhiDesc.rasterizer.cullMode);
|
|
desc.RasterizerState.FrontCounterClockwise = TRUE;
|
|
desc.RasterizerState.DepthBias = rhiDesc.rasterizer.polygonOffset ? 1 : 0;
|
|
desc.RasterizerState.DepthBiasClamp = 0.0f;
|
|
desc.RasterizerState.SlopeScaledDepthBias = rhiDesc.rasterizer.polygonOffset ? 1.0f : 0.0f;
|
|
desc.RasterizerState.FillMode = rhiDesc.rasterizer.wireFrame ? D3D12_FILL_MODE_WIREFRAME : D3D12_FILL_MODE_SOLID;
|
|
desc.RasterizerState.ForcedSampleCount = 0;
|
|
desc.RasterizerState.MultisampleEnable = FALSE;
|
|
desc.RasterizerState.DepthClipEnable = rhiDesc.rasterizer.clampDepth ? FALSE : TRUE;
|
|
|
|
ID3D12PipelineState* pso;
|
|
D3D(rhi.device->CreateGraphicsPipelineState(&desc, IID_PPV_ARGS(&pso)));
|
|
AllocateAndFixName(rhiDesc);
|
|
SetDebugName(pso, rhiDesc.name, D3DResourceType::PipelineState);
|
|
|
|
Pipeline rhiPipeline;
|
|
rhiPipeline.type = PipelineType::Graphics;
|
|
rhiPipeline.graphicsDesc = rhiDesc;
|
|
rhiPipeline.pso = pso;
|
|
rhiPipeline.shortLifeTime = rhiDesc.shortLifeTime;
|
|
|
|
return rhi.pipelines.Add(rhiPipeline);
|
|
}
|
|
|
|
HPipeline CreateComputePipeline(const ComputePipelineDesc& rhiDesc)
|
|
{
|
|
if(!rhi.useDynamicResources)
|
|
{
|
|
Q_assert(!IsNullHandle(rhiDesc.rootSignature));
|
|
Q_assert(rhi.rootSignatures.Get(rhiDesc.rootSignature).desc.pipelineType == PipelineType::Compute);
|
|
}
|
|
|
|
D3D12_COMPUTE_PIPELINE_STATE_DESC desc = { 0 };
|
|
desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; // none available so far
|
|
desc.pRootSignature = rhi.useDynamicResources ?
|
|
rhi.dynamicResources.rootSignature :
|
|
rhi.rootSignatures.Get(rhiDesc.rootSignature).signature;
|
|
desc.CS.pShaderBytecode = rhiDesc.shader.data;
|
|
desc.CS.BytecodeLength = rhiDesc.shader.byteCount;
|
|
|
|
ID3D12PipelineState* pso;
|
|
D3D(rhi.device->CreateComputePipelineState(&desc, IID_PPV_ARGS(&pso)));
|
|
AllocateAndFixName(rhiDesc);
|
|
SetDebugName(pso, rhiDesc.name, D3DResourceType::PipelineState);
|
|
|
|
Pipeline rhiPipeline;
|
|
rhiPipeline.type = PipelineType::Compute;
|
|
rhiPipeline.computeDesc = rhiDesc;
|
|
rhiPipeline.pso = pso;
|
|
rhiPipeline.shortLifeTime = rhiDesc.shortLifeTime;
|
|
|
|
return rhi.pipelines.Add(rhiPipeline);
|
|
}
|
|
|
|
void DestroyPipeline(HPipeline pipeline)
|
|
{
|
|
COM_RELEASE(rhi.pipelines.Get(pipeline).pso);
|
|
rhi.pipelines.Remove(pipeline);
|
|
}
|
|
|
|
HShader CreateShader(const ShaderDesc& desc)
|
|
{
|
|
IDxcBlobEncoding* blobEncoding;
|
|
D3D(rhi.dxcUtils->CreateBlob(desc.source, desc.sourceLength, CP_ACP, &blobEncoding));
|
|
|
|
LPCWSTR targetW = L"???";
|
|
LPCSTR targetName = "???";
|
|
switch(desc.stage)
|
|
{
|
|
case ShaderStage::Vertex: targetW = L"vs_6_0"; targetName = "vs"; break;
|
|
case ShaderStage::Pixel: targetW = L"ps_6_0"; targetName = "ps"; break;
|
|
case ShaderStage::Compute: targetW = L"cs_6_0"; targetName = "cs"; break;
|
|
default: Q_assert(0); break;
|
|
}
|
|
|
|
wchar_t entryPointW[256];
|
|
MultiByteToWideChar(CP_ACP, 0, desc.entryPoint, -1, entryPointW, ARRAY_LEN(entryPointW));
|
|
|
|
struct MacroW
|
|
{
|
|
wchar_t macro[256];
|
|
};
|
|
MacroW macros[16];
|
|
Q_assert(desc.macroCount <= ARRAY_LEN(macros));
|
|
|
|
LPCWSTR arguments[64];
|
|
UINT32 argumentCount = 0;
|
|
#define PushArg(Arg) arguments[argumentCount++] = Arg
|
|
PushArg(L"E");
|
|
PushArg(L"-E");
|
|
PushArg(entryPointW);
|
|
PushArg(L"-T");
|
|
PushArg(targetW);
|
|
PushArg(DXC_ARG_WARNINGS_ARE_ERRORS); // -WX
|
|
#if defined(D3D_DEBUG)
|
|
PushArg(DXC_ARG_DEBUG); // -Zi embeds debug info
|
|
PushArg(DXC_ARG_SKIP_OPTIMIZATIONS); // -Od disables optimizations
|
|
PushArg(DXC_ARG_ENABLE_STRICTNESS); // -Ges enables strict mode
|
|
PushArg(DXC_ARG_IEEE_STRICTNESS); // -Gis forces IEEE strictness
|
|
PushArg(L"-Qembed_debug"); // -Qembed_debug embeds debug info in shader container
|
|
#else
|
|
PushArg(L"-Qstrip_debug");
|
|
PushArg(L"-Qstrip_reflect");
|
|
PushArg(DXC_ARG_OPTIMIZATION_LEVEL3); // -O3
|
|
#endif
|
|
PushArg(L"-D");
|
|
PushArg(desc.stage == ShaderStage::Vertex ? L"VERTEX_SHADER=1" : L"VERTEX_SHADER=0");
|
|
PushArg(L"-D");
|
|
PushArg(desc.stage == ShaderStage::Pixel ? L"PIXEL_SHADER=1" : L"PIXEL_SHADER=0");
|
|
PushArg(L"-D");
|
|
PushArg(desc.stage == ShaderStage::Compute ? L"COMPUTE_SHADER=1" : L"COMPUTE_SHADER=0");
|
|
for(uint32_t m = 0; m < desc.macroCount; ++m)
|
|
{
|
|
const char* input = va("%s=%s", desc.macros[m].name, desc.macros[m].value);
|
|
MacroW& output = macros[m];
|
|
MultiByteToWideChar(CP_ACP, 0, input, -1, output.macro, ARRAY_LEN(output.macro));
|
|
PushArg(L"-D");
|
|
PushArg(output.macro);
|
|
}
|
|
#undef PushArg
|
|
Q_assert(argumentCount <= ARRAY_LEN(arguments));
|
|
|
|
DxcBuffer sourceBuffer = {};
|
|
sourceBuffer.Ptr = blobEncoding->GetBufferPointer();
|
|
sourceBuffer.Size = blobEncoding->GetBufferSize();
|
|
sourceBuffer.Encoding = 0;
|
|
|
|
IDxcResult* result = NULL;
|
|
HRESULT hr = S_OK;
|
|
if(FAILED(rhi.dxcCompiler->Compile(&sourceBuffer, arguments, argumentCount, NULL, IID_PPV_ARGS(&result))) ||
|
|
FAILED(result->GetStatus(&hr)) ||
|
|
FAILED(hr))
|
|
{
|
|
IDxcBlobUtf8* errors;
|
|
if(result != NULL && SUCCEEDED(result->GetOutput(DXC_OUT_ERRORS, IID_PPV_ARGS(&errors), NULL)) &&
|
|
errors->GetStringLength() > 0)
|
|
{
|
|
ri.Error(ERR_FATAL, "Shader (%s) compilation failed:\n%s\n", targetName, (const char*)errors->GetBufferPointer());
|
|
}
|
|
else
|
|
{
|
|
ri.Error(ERR_FATAL, "Shader (%s) compilation failed:\n", targetName);
|
|
}
|
|
return RHI_MAKE_NULL_HANDLE();
|
|
}
|
|
|
|
IDxcBlob* shaderBlob;
|
|
D3D(result->GetOutput(DXC_OUT_OBJECT, IID_PPV_ARGS(&shaderBlob), NULL));
|
|
blobEncoding->Release();
|
|
result->Release();
|
|
|
|
Shader shader;
|
|
shader.blob = shaderBlob;
|
|
|
|
return rhi.shaders.Add(shader);
|
|
}
|
|
|
|
ShaderByteCode GetShaderByteCode(HShader shader)
|
|
{
|
|
IDxcBlob* const blob = rhi.shaders.Get(shader).blob;
|
|
|
|
ShaderByteCode byteCode;
|
|
byteCode.data = blob->GetBufferPointer();
|
|
byteCode.byteCount = blob->GetBufferSize();
|
|
|
|
return byteCode;
|
|
}
|
|
|
|
void DestroyShader(HShader shader)
|
|
{
|
|
COM_RELEASE(rhi.shaders.Get(shader).blob);
|
|
rhi.shaders.Remove(shader);
|
|
}
|
|
|
|
void CmdBindRenderTargets(uint32_t colorCount, const HTexture* colorTargets, const HTexture* depthStencilTarget)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
Q_assert(colorCount > 0 || colorTargets == NULL);
|
|
|
|
D3D12_CPU_DESCRIPTOR_HANDLE rtvHandles[MaxRenderTargets] = {};
|
|
for(uint32_t t = 0; t < colorCount; ++t)
|
|
{
|
|
const uint32_t rtvIndex = rhi.textures.Get(colorTargets[t]).rtvIndex;
|
|
rtvHandles[t] = rhi.descHeapRTVs.GetCPUHandle(rtvIndex);
|
|
}
|
|
|
|
D3D12_CPU_DESCRIPTOR_HANDLE* dsvHandlePtr = NULL;
|
|
D3D12_CPU_DESCRIPTOR_HANDLE dsvHandle;
|
|
if(depthStencilTarget != NULL)
|
|
{
|
|
const Texture& depthStencil = rhi.textures.Get(*depthStencilTarget);
|
|
dsvHandle = rhi.descHeapDSVs.GetCPUHandle(depthStencil.dsvIndex);
|
|
dsvHandlePtr = &dsvHandle;
|
|
}
|
|
|
|
rhi.commandList->OMSetRenderTargets(colorCount, rtvHandles, FALSE, dsvHandlePtr);
|
|
}
|
|
|
|
void CmdBindRootSignature(HRootSignature rootSignature)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
const RootSignature& sig = rhi.rootSignatures.Get(rootSignature);
|
|
if(sig.desc.pipelineType == PipelineType::Graphics && rootSignature != rhi.currentRootSignature)
|
|
{
|
|
rhi.currentRootSignature = rootSignature;
|
|
rhi.commandList->SetGraphicsRootSignature(sig.signature);
|
|
}
|
|
else if(sig.desc.pipelineType == PipelineType::Compute)
|
|
{
|
|
rhi.commandList->SetComputeRootSignature(sig.signature);
|
|
}
|
|
}
|
|
|
|
void CmdBindDescriptorTable(HRootSignature sigHandle, HDescriptorTable handle)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
const DescriptorTable& table = rhi.descriptorTables.Get(handle);
|
|
const RootSignature& sig = rhi.rootSignatures.Get(sigHandle);
|
|
|
|
UINT heapCount = 0;
|
|
ID3D12DescriptorHeap* heaps[2];
|
|
if(sig.genericTableIndex != UINT32_MAX)
|
|
{
|
|
heaps[heapCount++] = table.genericHeap;
|
|
}
|
|
if(sig.samplerTableIndex != UINT32_MAX)
|
|
{
|
|
heaps[heapCount++] = table.samplerHeap;
|
|
}
|
|
rhi.commandList->SetDescriptorHeaps(heapCount, heaps);
|
|
|
|
if(sig.genericTableIndex != UINT32_MAX)
|
|
{
|
|
if(sig.desc.pipelineType == PipelineType::Graphics)
|
|
{
|
|
rhi.commandList->SetGraphicsRootDescriptorTable(sig.genericTableIndex, table.genericHeap->GetGPUDescriptorHandleForHeapStart());
|
|
}
|
|
else if(sig.desc.pipelineType == PipelineType::Compute)
|
|
{
|
|
rhi.commandList->SetComputeRootDescriptorTable(sig.genericTableIndex, table.genericHeap->GetGPUDescriptorHandleForHeapStart());
|
|
}
|
|
}
|
|
if(sig.samplerTableIndex != UINT32_MAX)
|
|
{
|
|
if(sig.desc.pipelineType == PipelineType::Graphics)
|
|
{
|
|
rhi.commandList->SetGraphicsRootDescriptorTable(sig.samplerTableIndex, table.samplerHeap->GetGPUDescriptorHandleForHeapStart());
|
|
}
|
|
else if(sig.desc.pipelineType == PipelineType::Compute)
|
|
{
|
|
rhi.commandList->SetComputeRootDescriptorTable(sig.samplerTableIndex, table.samplerHeap->GetGPUDescriptorHandleForHeapStart());
|
|
}
|
|
}
|
|
}
|
|
|
|
void CmdBindPipeline(HPipeline pipeline)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
const Pipeline& pipe = rhi.pipelines.Get(pipeline);
|
|
rhi.commandList->SetPipelineState(pipe.pso);
|
|
}
|
|
|
|
void CmdBindVertexBuffers(uint32_t count, const HBuffer* vertexBuffers, const uint32_t* byteStrides, const uint32_t* startByteOffsets)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
Q_assert(count <= MaxVertexBuffers);
|
|
|
|
count = min(count, MaxVertexBuffers);
|
|
|
|
D3D12_VERTEX_BUFFER_VIEW views[MaxVertexBuffers];
|
|
for(uint32_t v = 0; v < count; ++v)
|
|
{
|
|
const Buffer& buffer = rhi.buffers.Get(vertexBuffers[v]);
|
|
const uint32_t offset = startByteOffsets ? startByteOffsets[v] : 0;
|
|
views[v].BufferLocation = buffer.gpuAddress + offset;
|
|
views[v].SizeInBytes = buffer.desc.byteCount - offset;
|
|
views[v].StrideInBytes = byteStrides[v];
|
|
}
|
|
rhi.commandList->IASetVertexBuffers(0, count, views);
|
|
}
|
|
|
|
void CmdBindIndexBuffer(HBuffer indexBuffer, IndexType::Id type, uint32_t startByteOffset)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
const Buffer& buffer = rhi.buffers.Get(indexBuffer);
|
|
|
|
D3D12_INDEX_BUFFER_VIEW view = { 0 };
|
|
view.BufferLocation = buffer.gpuAddress + startByteOffset;
|
|
view.Format = GetD3DIndexFormat(type);
|
|
view.SizeInBytes = (UINT)(buffer.desc.byteCount - startByteOffset);
|
|
rhi.commandList->IASetIndexBuffer(&view);
|
|
}
|
|
|
|
void CmdSetViewport(uint32_t x, uint32_t y, uint32_t w, uint32_t h, float minDepth, float maxDepth)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
D3D12_VIEWPORT viewport;
|
|
viewport.TopLeftX = x;
|
|
viewport.TopLeftY = y;
|
|
viewport.Width = w;
|
|
viewport.Height = h;
|
|
viewport.MinDepth = minDepth;
|
|
viewport.MaxDepth = maxDepth;
|
|
rhi.commandList->RSSetViewports(1, &viewport);
|
|
}
|
|
|
|
void CmdSetScissor(uint32_t x, uint32_t y, uint32_t w, uint32_t h)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
D3D12_RECT rect;
|
|
rect.left = x;
|
|
rect.top = y;
|
|
rect.right = x + w;
|
|
rect.bottom = y + h;
|
|
rhi.commandList->RSSetScissorRects(1, &rect);
|
|
}
|
|
|
|
void CmdSetRootConstants(HRootSignature rootSignature, ShaderStage::Id shaderType, const void* constants)
|
|
{
|
|
ASSERT_DR_DISABLED();
|
|
|
|
Q_assert(CanWriteCommands());
|
|
Q_assert(constants);
|
|
|
|
const RootSignature& sig = rhi.rootSignatures.Get(rootSignature);
|
|
const UINT parameterIndex = sig.constants[shaderType].parameterIndex;
|
|
const UINT constantCount = sig.desc.constants[shaderType].byteCount / 4;
|
|
|
|
CmdBindRootSignature(rootSignature);
|
|
|
|
if(sig.desc.pipelineType == PipelineType::Graphics)
|
|
{
|
|
rhi.commandList->SetGraphicsRoot32BitConstants(parameterIndex, constantCount, constants, 0);
|
|
}
|
|
else if(sig.desc.pipelineType == PipelineType::Compute)
|
|
{
|
|
rhi.commandList->SetComputeRoot32BitConstants(parameterIndex, constantCount, constants, 0);
|
|
}
|
|
}
|
|
|
|
void CmdSetGraphicsRootConstants(uint32_t byteOffset, uint32_t byteCount, const void* constants)
|
|
{
|
|
ASSERT_DR_ENABLED();
|
|
Q_assert(CanWriteCommands());
|
|
|
|
SetRootConstants(byteOffset, byteCount, constants, true);
|
|
}
|
|
|
|
void CmdSetComputeRootConstants(uint32_t byteOffset, uint32_t byteCount, const void* constants)
|
|
{
|
|
ASSERT_DR_ENABLED();
|
|
Q_assert(CanWriteCommands());
|
|
|
|
SetRootConstants(byteOffset, byteCount, constants, false);
|
|
}
|
|
|
|
void CmdDraw(uint32_t vertexCount, uint32_t firstVertex)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
rhi.commandList->DrawInstanced(vertexCount, 1, firstVertex, 0);
|
|
}
|
|
|
|
void CmdDrawIndexed(uint32_t indexCount, uint32_t firstIndex, uint32_t firstVertex)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
rhi.commandList->DrawIndexedInstanced(indexCount, 1, firstIndex, firstVertex, 0);
|
|
}
|
|
|
|
void CmdDispatch(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
rhi.commandList->Dispatch(groupCountX, groupCountY, groupCountZ);
|
|
}
|
|
|
|
void CmdDispatchIndirect(HBuffer hbuffer, uint32_t byteOffset)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
Q_assert(byteOffset % 4 == 0);
|
|
|
|
const Buffer& buffer = rhi.buffers.Get(hbuffer);
|
|
rhi.commandList->ExecuteIndirect(rhi.indirectDispatchSignature, 1, buffer.buffer, (UINT64)byteOffset, NULL, 0);
|
|
}
|
|
|
|
uint32_t CmdBeginDurationQuery()
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
FrameQueries& fq = rhi.frameQueries[rhi.frameIndex];
|
|
Q_assert(fq.durationQueryCount < MaxDurationQueries);
|
|
if(fq.durationQueryCount >= MaxDurationQueries)
|
|
{
|
|
return UINT32_MAX;
|
|
}
|
|
|
|
const uint32_t durationIndex = fq.durationQueryCount;
|
|
const UINT timeStampBeginIndex = durationIndex * 2;
|
|
rhi.commandList->EndQuery(rhi.timeStampHeaps[rhi.frameIndex], D3D12_QUERY_TYPE_TIMESTAMP, timeStampBeginIndex);
|
|
|
|
DurationQuery& query = fq.durationQueries[durationIndex];
|
|
if(backEnd.renderFrame)
|
|
{
|
|
Q_assert(query.state == QueryState::Free);
|
|
}
|
|
query.state = QueryState::Begun;
|
|
|
|
fq.durationQueryCount++;
|
|
|
|
return durationIndex;
|
|
}
|
|
|
|
void CmdEndDurationQuery(uint32_t durationIndex)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
FrameQueries& fq = rhi.frameQueries[rhi.frameIndex];
|
|
Q_assert(durationIndex < fq.durationQueryCount);
|
|
if(durationIndex >= fq.durationQueryCount)
|
|
{
|
|
return;
|
|
}
|
|
|
|
DurationQuery& query = fq.durationQueries[durationIndex];
|
|
Q_assert(query.state == QueryState::Begun);
|
|
const UINT timeStampEndIndex = durationIndex * 2 + 1;
|
|
rhi.commandList->EndQuery(rhi.timeStampHeaps[rhi.frameIndex], D3D12_QUERY_TYPE_TIMESTAMP, timeStampEndIndex);
|
|
query.state = QueryState::Ended;
|
|
}
|
|
|
|
void CmdBeginBarrier()
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
Q_assert(rhi.textureBarrierCount == 0);
|
|
Q_assert(rhi.bufferBarrierCount == 0);
|
|
Q_assert(!rhi.barrierOpen);
|
|
Q_assert(rhi.barrierCommandList == NULL);
|
|
|
|
rhi.barrierCommandList = rhi.commandList;
|
|
rhi.textureBarrierCount = 0;
|
|
rhi.bufferBarrierCount = 0;
|
|
rhi.barrierOpen = true;
|
|
}
|
|
|
|
void CmdTextureBarrier(HTexture texture, ResourceStates::Flags newState)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
Q_assert(rhi.barrierOpen);
|
|
Q_assert(rhi.commandList == rhi.barrierCommandList);
|
|
Q_assert(rhi.textureBarrierCount < ARRAY_LEN(rhi.textureBarriers));
|
|
|
|
if(rhi.textureBarrierCount < ARRAY_LEN(rhi.textureBarriers))
|
|
{
|
|
TextureBarrier* const barrier = &rhi.textureBarriers[rhi.textureBarrierCount++];
|
|
barrier->texture = texture;
|
|
barrier->newState = newState;
|
|
}
|
|
else
|
|
{
|
|
ri.Error(ERR_FATAL, "Not enough texture barrier storage!\n");
|
|
}
|
|
}
|
|
|
|
void CmdBufferBarrier(HBuffer buffer, ResourceStates::Flags newState)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
Q_assert(rhi.barrierOpen);
|
|
Q_assert(rhi.commandList == rhi.barrierCommandList);
|
|
Q_assert(rhi.bufferBarrierCount < ARRAY_LEN(rhi.bufferBarriers));
|
|
|
|
if(rhi.bufferBarrierCount < ARRAY_LEN(rhi.bufferBarriers))
|
|
{
|
|
BufferBarrier* const barrier = &rhi.bufferBarriers[rhi.bufferBarrierCount++];
|
|
barrier->buffer = buffer;
|
|
barrier->newState = newState;
|
|
}
|
|
else
|
|
{
|
|
ri.Error(ERR_FATAL, "Not enough buffer barrier storage!\n");
|
|
}
|
|
}
|
|
|
|
void CmdEndBarrier()
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
Q_assert(rhi.barrierOpen);
|
|
Q_assert(rhi.commandList == rhi.barrierCommandList);
|
|
|
|
if(rhi.textureBarrierCount > 0 || rhi.bufferBarrierCount > 0)
|
|
{
|
|
Barrier(rhi.textureBarrierCount, rhi.textureBarriers, rhi.bufferBarrierCount, rhi.bufferBarriers);
|
|
}
|
|
|
|
rhi.barrierCommandList = NULL;
|
|
rhi.textureBarrierCount = 0;
|
|
rhi.bufferBarrierCount = 0;
|
|
rhi.barrierOpen = false;
|
|
}
|
|
|
|
void CmdClearColorTarget(HTexture texture, const vec4_t clearColor, const Rect* rect)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
D3D12_RECT* d3dRectPtr = NULL;
|
|
D3D12_RECT d3dRect = {};
|
|
UINT rectCount = 0;
|
|
if(rect != NULL)
|
|
{
|
|
rectCount = 1;
|
|
d3dRect.left = rect->x;
|
|
d3dRect.top = rect->y;
|
|
d3dRect.right = rect->x + rect->w;
|
|
d3dRect.bottom = rect->y + rect->h;
|
|
d3dRectPtr = &d3dRect;
|
|
}
|
|
|
|
const Texture& renderTarget = rhi.textures.Get(texture);
|
|
const D3D12_CPU_DESCRIPTOR_HANDLE rtvHandle = rhi.descHeapRTVs.GetCPUHandle(renderTarget.rtvIndex);
|
|
rhi.commandList->ClearRenderTargetView(rtvHandle, clearColor, rectCount, d3dRectPtr);
|
|
}
|
|
|
|
void CmdClearDepthStencilTarget(HTexture texture, bool clearDepth, float depth, bool clearStencil, uint8_t stencil, const Rect* rect)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
Q_assert(clearDepth || clearStencil);
|
|
if(!clearDepth && !clearStencil)
|
|
{
|
|
return;
|
|
}
|
|
|
|
D3D12_RECT* d3dRectPtr = NULL;
|
|
D3D12_RECT d3dRect = {};
|
|
UINT rectCount = 0;
|
|
if(rect != NULL)
|
|
{
|
|
rectCount = 1;
|
|
d3dRect.left = rect->x;
|
|
d3dRect.top = rect->y;
|
|
d3dRect.right = rect->x + rect->w;
|
|
d3dRect.bottom = rect->y + rect->h;
|
|
d3dRectPtr = &d3dRect;
|
|
}
|
|
|
|
D3D12_CLEAR_FLAGS flags = (D3D12_CLEAR_FLAGS)0;
|
|
if(clearDepth)
|
|
{
|
|
flags |= D3D12_CLEAR_FLAG_DEPTH;
|
|
}
|
|
if(clearStencil)
|
|
{
|
|
flags |= D3D12_CLEAR_FLAG_STENCIL;
|
|
}
|
|
|
|
const Texture& depthStencil = rhi.textures.Get(texture);
|
|
const D3D12_CPU_DESCRIPTOR_HANDLE dsvHandle = rhi.descHeapDSVs.GetCPUHandle(depthStencil.dsvIndex);
|
|
rhi.commandList->ClearDepthStencilView(dsvHandle, flags, depth, stencil, rectCount, d3dRectPtr);
|
|
}
|
|
|
|
void CmdClearTextureUAV(HTexture htexture, uint32_t mipIndex, const uint32_t* values)
|
|
{
|
|
ASSERT_DR_ENABLED();
|
|
Q_assert(CanWriteCommands());
|
|
Q_assert(values);
|
|
static_assert(sizeof(UINT) == 4, "sizeof(UINT) isn't 4 as expected");
|
|
|
|
const Texture& texture = rhi.textures.Get(htexture);
|
|
Q_assert(mipIndex < texture.desc.mipCount);
|
|
|
|
const uint32_t descIndex = texture.mips[mipIndex].uavIndex;
|
|
const UINT descSize = rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = rhi.dynamicResources.genericCPUDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = rhi.dynamicResources.genericDescriptorHeap->GetGPUDescriptorHandleForHeapStart();
|
|
cpuHandle.ptr += descIndex * descSize;
|
|
gpuHandle.ptr += descIndex * descSize;
|
|
rhi.commandList->ClearUnorderedAccessViewUint(gpuHandle, cpuHandle, texture.texture, values, 0, NULL);
|
|
}
|
|
|
|
void CmdClearBufferUAV(HBuffer hbuffer, uint32_t value)
|
|
{
|
|
ASSERT_DR_ENABLED();
|
|
Q_assert(CanWriteCommands());
|
|
static_assert(sizeof(UINT) == 4, "sizeof(UINT) isn't 4 as expected");
|
|
|
|
const Buffer& buffer = rhi.buffers.Get(hbuffer);
|
|
const uint32_t descIndex = buffer.uavIndex;
|
|
const UINT descSize = rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = rhi.dynamicResources.genericCPUDescriptorHeap->GetCPUDescriptorHandleForHeapStart();
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = rhi.dynamicResources.genericDescriptorHeap->GetGPUDescriptorHandleForHeapStart();
|
|
cpuHandle.ptr += descIndex * descSize;
|
|
gpuHandle.ptr += descIndex * descSize;
|
|
const UINT values[4] = { value, value, value, value };
|
|
rhi.commandList->ClearUnorderedAccessViewUint(gpuHandle, cpuHandle, buffer.buffer, values, 0, NULL);
|
|
}
|
|
|
|
void CmdInsertDebugLabel(const char* name, float r, float g, float b)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
Q_assert(name);
|
|
|
|
if(rhi.pix.SetMarkerOnCommandList != NULL)
|
|
{
|
|
rhi.pix.SetMarkerOnCommandList(rhi.commandList, BGRAUIntFromFloat(r, g, b), name);
|
|
}
|
|
else
|
|
{
|
|
rhi.commandList->SetMarker(1, name, strlen(name) + 1);
|
|
}
|
|
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
if(rhi.aftermathActive)
|
|
{
|
|
const GFSDK_Aftermath_Result result = GFSDK_Aftermath_SetEventMarker(rhi.aftermathCommandList, name, strlen(name) + 1);
|
|
Q_assert(result == GFSDK_Aftermath_Result_Success);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void CmdBeginDebugLabel(const char* name, float r, float g, float b)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
Q_assert(name);
|
|
Q_assert(name[0] != '\0');
|
|
|
|
if(rhi.pix.canBeginAndEnd)
|
|
{
|
|
rhi.pix.BeginEventOnCommandList(rhi.commandList, BGRAUIntFromFloat(r, g, b), name);
|
|
}
|
|
else
|
|
{
|
|
rhi.commandList->BeginEvent(1, name, strlen(name) + 1);
|
|
}
|
|
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
if(rhi.aftermathActive)
|
|
{
|
|
const char* const markerString = va("Begin: %s", name);
|
|
const GFSDK_Aftermath_Result result = GFSDK_Aftermath_SetEventMarker(rhi.aftermathCommandList, markerString, strlen(markerString) + 1);
|
|
Q_assert(result == GFSDK_Aftermath_Result_Success);
|
|
Q_assert(rhi.aftermathMarkerDepth < ARRAY_LEN(rhi.aftermathMarkers));
|
|
RHIPrivate::AftermathMarker& marker = rhi.aftermathMarkers[rhi.aftermathMarkerDepth++];
|
|
Q_strncpyz(marker.string, name, sizeof(marker.string));
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void CmdEndDebugLabel()
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
if(rhi.pix.canBeginAndEnd)
|
|
{
|
|
rhi.pix.EndEventOnCommandList(rhi.commandList);
|
|
}
|
|
else
|
|
{
|
|
rhi.commandList->EndEvent();
|
|
}
|
|
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
if(rhi.aftermathActive)
|
|
{
|
|
Q_assert(rhi.aftermathMarkerDepth > 0);
|
|
const RHIPrivate::AftermathMarker& marker = rhi.aftermathMarkers[rhi.aftermathMarkerDepth - 1];
|
|
Q_assert(marker.string[0] != '\0');
|
|
const char* const markerString = va("End: %s", marker.string);
|
|
const GFSDK_Aftermath_Result result = GFSDK_Aftermath_SetEventMarker(rhi.aftermathCommandList, markerString, strlen(markerString) + 1);
|
|
Q_assert(result == GFSDK_Aftermath_Result_Success);
|
|
rhi.aftermathMarkerDepth--;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void CmdSetStencilReference(uint8_t stencilRef)
|
|
{
|
|
rhi.commandList->OMSetStencilRef((UINT)stencilRef);
|
|
}
|
|
|
|
void CmdCopyBuffer(HBuffer dest, HBuffer source)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
const Buffer& dst = rhi.buffers.Get(dest);
|
|
const Buffer& src = rhi.buffers.Get(source);
|
|
const UINT64 byteCount = min(src.desc.byteCount, dst.desc.byteCount);
|
|
rhi.commandList->CopyBufferRegion(dst.buffer, 0, src.buffer, 0, byteCount);
|
|
}
|
|
|
|
void CmdCopyBuffer(HBuffer dest, uint32_t destOffset, HBuffer source, uint32_t sourceOffset, uint32_t byteCount)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
const Buffer& dst = rhi.buffers.Get(dest);
|
|
const Buffer& src = rhi.buffers.Get(source);
|
|
Q_assert(destOffset + byteCount <= dst.desc.byteCount);
|
|
Q_assert(sourceOffset + byteCount <= src.desc.byteCount);
|
|
rhi.commandList->CopyBufferRegion(dst.buffer, destOffset, src.buffer, sourceOffset, byteCount);
|
|
}
|
|
|
|
void CmdCopyTexture(HTexture dest, HTexture source)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
Q_assert(dest != source);
|
|
|
|
const Texture& dst = rhi.textures.Get(dest);
|
|
const Texture& src = rhi.textures.Get(source);
|
|
Q_assert(dst.desc.width == src.desc.width);
|
|
Q_assert(dst.desc.height == src.desc.height);
|
|
Q_assert(dst.desc.depth == src.desc.depth);
|
|
Q_assert(dst.desc.mipCount == src.desc.mipCount);
|
|
Q_assert(dst.desc.format == src.desc.format);
|
|
rhi.commandList->CopyResource(dst.texture, src.texture);
|
|
}
|
|
|
|
void CmdSetShadingRate(ShadingRate::Id shadingRate)
|
|
{
|
|
Q_assert(CanWriteCommands());
|
|
|
|
if(!rhi.baseVRSSupport)
|
|
{
|
|
return;
|
|
}
|
|
|
|
if(!rhi.extendedVRSSupport)
|
|
{
|
|
switch(shadingRate)
|
|
{
|
|
case ShadingRate::SR_2x4:
|
|
case ShadingRate::SR_4x2:
|
|
case ShadingRate::SR_4x4:
|
|
shadingRate = ShadingRate::SR_2x2;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
rhi.commandList->RSSetShadingRate(GetD3DShadingRate(shadingRate), NULL);
|
|
}
|
|
|
|
uint32_t GetDurationCount()
|
|
{
|
|
return rhi.resolvedQueries.durationQueryCount;
|
|
}
|
|
|
|
void GetDurations(uint32_t* gpuMicroSeconds)
|
|
{
|
|
memcpy(gpuMicroSeconds, rhi.resolvedQueries.gpuMicroSeconds, rhi.resolvedQueries.durationQueryCount * sizeof(uint32_t));
|
|
}
|
|
|
|
uint8_t* BeginBufferUpload(HBuffer buffer)
|
|
{
|
|
return rhi.upload.BeginBufferUpload(buffer);
|
|
}
|
|
|
|
void EndBufferUpload(HBuffer buffer)
|
|
{
|
|
rhi.upload.EndBufferUpload(buffer);
|
|
}
|
|
|
|
void BeginTextureUpload(MappedTexture& mappedTexture, HTexture texture)
|
|
{
|
|
rhi.upload.BeginTextureUpload(mappedTexture, texture);
|
|
}
|
|
|
|
void EndTextureUpload()
|
|
{
|
|
rhi.upload.EndTextureUpload();
|
|
}
|
|
|
|
void BeginTempCommandList()
|
|
{
|
|
Q_assert(!rhi.frameBegun);
|
|
Q_assert(rhi.commandList == rhi.mainCommandList);
|
|
rhi.commandList = rhi.tempCommandList;
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
if(rhi.aftermathActive)
|
|
{
|
|
rhi.aftermathCommandList = rhi.aftermathTempCommandList;
|
|
}
|
|
#endif
|
|
|
|
// CPU wait for the temp command list to be done executing on the GPU
|
|
WaitForTempCommandList();
|
|
|
|
// GPU wait for the copy queue to be done executing on the GPU
|
|
rhi.upload.WaitToStartDrawing(rhi.computeCommandQueue);
|
|
|
|
BindDynamicResources();
|
|
}
|
|
|
|
void EndTempCommandList()
|
|
{
|
|
Q_assert(!rhi.frameBegun);
|
|
Q_assert(rhi.commandList == rhi.tempCommandList);
|
|
rhi.commandList = rhi.mainCommandList;
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
if(rhi.aftermathActive)
|
|
{
|
|
rhi.aftermathCommandList = rhi.aftermathMainCommandList;
|
|
}
|
|
#endif
|
|
|
|
// execute and wait on the temporary command list
|
|
ID3D12CommandQueue* const queue = rhi.computeCommandQueue;
|
|
rhi.tempCommandList->Close();
|
|
ID3D12CommandList* tempCommandListArray[] = { rhi.tempCommandList };
|
|
queue->ExecuteCommandLists(ARRAY_LEN(tempCommandListArray), tempCommandListArray);
|
|
rhi.tempFenceValue++;
|
|
rhi.tempFence.Signal(queue, rhi.tempFenceValue);
|
|
rhi.tempCommandListOpen = false;
|
|
}
|
|
|
|
void WaitForTempCommandList()
|
|
{
|
|
rhi.tempFence.WaitOnCPU(rhi.tempFenceValue);
|
|
if(rhi.tempCommandListOpen)
|
|
{
|
|
rhi.tempCommandList->Close();
|
|
}
|
|
D3D(rhi.tempCommandAllocator->Reset());
|
|
D3D(rhi.tempCommandList->Reset(rhi.tempCommandAllocator, NULL));
|
|
rhi.tempCommandListOpen = true;
|
|
}
|
|
|
|
void BeginTextureReadback(MappedTexture& mappedTexture, HTexture htexture)
|
|
{
|
|
rhi.readback.BeginTextureReadback(mappedTexture, htexture);
|
|
}
|
|
|
|
void EndTextureReadback()
|
|
{
|
|
rhi.readback.EndTextureReadback();
|
|
}
|
|
|
|
void WaitUntilDeviceIsIdle()
|
|
{
|
|
// direct queue
|
|
rhi.mainFenceValues[rhi.frameIndex]++;
|
|
#if RHI_DEBUG_FENCE
|
|
Sys_DebugPrintf("Signal: %d (WaitUntilDeviceIsIdle)\n", (int)rhi.mainFenceValues[rhi.frameIndex]);
|
|
Sys_DebugPrintf("Wait: %d (WaitUntilDeviceIsIdle)\n", (int)rhi.mainFenceValues[rhi.frameIndex]);
|
|
#endif
|
|
rhi.mainFence.Signal(rhi.mainCommandQueue, rhi.mainFenceValues[rhi.frameIndex]);
|
|
rhi.mainFence.WaitOnCPU(rhi.mainFenceValues[rhi.frameIndex]);
|
|
|
|
// compute queue
|
|
rhi.tempFence.WaitOnCPU(rhi.tempFenceValue);
|
|
|
|
// upload queue
|
|
rhi.upload.fence.WaitOnCPU(rhi.upload.fenceValue);
|
|
}
|
|
|
|
void SubmitAndContinue()
|
|
{
|
|
ASSERT_DR_ENABLED();
|
|
Q_assert(rhi.commandList == rhi.mainCommandList);
|
|
|
|
CmdInsertDebugLabel("RHI::SubmitAndWaitOnCPU", 0.8f, 0.8f, 0.8f);
|
|
|
|
rhi.frameBegun = false;
|
|
D3D(rhi.commandList->Close());
|
|
ID3D12CommandList* commandListArray[] = { rhi.commandList };
|
|
rhi.mainCommandQueue->ExecuteCommandLists(ARRAY_LEN(commandListArray), commandListArray);
|
|
const UINT64 currentFenceValue = rhi.mainFenceValues[rhi.frameIndex];
|
|
#if RHI_DEBUG_FENCE
|
|
Sys_DebugPrintf("Signal: %d (SubmitAndWaitOnCPU)\n", (int)currentFenceValue);
|
|
Sys_DebugPrintf("Wait: %d (SubmitAndWaitOnCPU)\n", (int)currentFenceValue);
|
|
#endif
|
|
rhi.mainFence.Signal(rhi.mainCommandQueue, currentFenceValue);
|
|
rhi.mainFence.WaitOnCPU(currentFenceValue);
|
|
rhi.mainFenceValues[rhi.frameIndex] = currentFenceValue + 1;
|
|
D3D(rhi.mainCommandAllocators[rhi.frameIndex]->Reset());
|
|
D3D(rhi.commandList->Reset(rhi.mainCommandAllocators[rhi.frameIndex], NULL));
|
|
BindDynamicResources();
|
|
rhi.commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
|
rhi.currentRootSignature = RHI_MAKE_NULL_HANDLE();
|
|
rhi.frameBegun = true;
|
|
}
|
|
|
|
uint32_t GetTextureIndexSRV(HTexture htexture)
|
|
{
|
|
ASSERT_DR_ENABLED();
|
|
Q_assert(!IsNullHandle(htexture));
|
|
|
|
Texture& texture = rhi.textures.Get(htexture);
|
|
|
|
return texture.srvIndex;
|
|
}
|
|
|
|
uint32_t GetTextureIndexUAV(HTexture htexture, uint32_t mipIndex)
|
|
{
|
|
ASSERT_DR_ENABLED();
|
|
Q_assert(!IsNullHandle(htexture));
|
|
|
|
Texture& texture = rhi.textures.Get(htexture);
|
|
Q_assert(mipIndex < texture.desc.mipCount);
|
|
|
|
return texture.mips[mipIndex].uavIndex;
|
|
}
|
|
|
|
uint32_t GetBufferIndexSRV(HBuffer hbuffer)
|
|
{
|
|
ASSERT_DR_ENABLED();
|
|
Q_assert(!IsNullHandle(hbuffer));
|
|
|
|
Buffer& buffer = rhi.buffers.Get(hbuffer);
|
|
|
|
return buffer.srvIndex;
|
|
}
|
|
|
|
uint32_t GetBufferIndexUAV(HBuffer hbuffer)
|
|
{
|
|
ASSERT_DR_ENABLED();
|
|
Q_assert(!IsNullHandle(hbuffer));
|
|
|
|
Buffer& buffer = rhi.buffers.Get(hbuffer);
|
|
|
|
return buffer.uavIndex;
|
|
}
|
|
|
|
uint32_t GetBufferIndexCBV(HBuffer hbuffer)
|
|
{
|
|
ASSERT_DR_ENABLED();
|
|
Q_assert(!IsNullHandle(hbuffer));
|
|
|
|
Buffer& buffer = rhi.buffers.Get(hbuffer);
|
|
|
|
return buffer.cbvIndex;
|
|
}
|
|
|
|
uint32_t GetSamplerIndex(HSampler hsampler)
|
|
{
|
|
ASSERT_DR_ENABLED();
|
|
Q_assert(!IsNullHandle(hsampler));
|
|
|
|
const Sampler& sampler = rhi.samplers.Get(hsampler);
|
|
|
|
return sampler.heapIndex;
|
|
}
|
|
|
|
void CmdBarrierUAV()
|
|
{
|
|
ASSERT_DR_ENABLED();
|
|
Q_assert(CanWriteCommands());
|
|
|
|
D3D12_RESOURCE_BARRIER barrier = {};
|
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
|
|
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
barrier.UAV.pResource = NULL;
|
|
rhi.commandList->ResourceBarrier(1, &barrier);
|
|
}
|
|
|
|
void PrintGPUList()
|
|
{
|
|
CreateAdapterList();
|
|
|
|
ri.Printf(PRINT_ALL, "%s0^7. Default\n", S_COLOR_VAL);
|
|
for(uint32_t i = 0; i < rhi.gpuCount; ++i)
|
|
{
|
|
ri.Printf(PRINT_ALL, "%s%d^7. %s\n", S_COLOR_VAL, (int)i + 1, rhi.gpus[i].name);
|
|
}
|
|
}
|
|
|
|
void CmdCreateBLAS(HBuffer* blasBuffer, const BLASDesc& rhiDesc)
|
|
{
|
|
ASSERT_DR_ENABLED();
|
|
Q_assert(rhi.commandList == rhi.tempCommandList);
|
|
Q_assert(rhi.tempCommandListOpen);
|
|
Q_assert(blasBuffer);
|
|
Q_assert(!IsNullHandle(rhiDesc.vertexBuffer));
|
|
Q_assert(!IsNullHandle(rhiDesc.indexBuffer));
|
|
Q_assert(rhiDesc.meshCount > 0);
|
|
Q_assert(rhiDesc.meshes);
|
|
|
|
const D3D12_GPU_VIRTUAL_ADDRESS baseVertexAddress = rhi.buffers.Get(rhiDesc.vertexBuffer).gpuAddress;
|
|
const D3D12_GPU_VIRTUAL_ADDRESS baseIndexAddress = rhi.buffers.Get(rhiDesc.indexBuffer).gpuAddress;
|
|
|
|
if(rhiDesc.meshCount > rhi.rtGeoDescCount)
|
|
{
|
|
const uint32_t meshCount = max(rhiDesc.meshCount, 2 * rhi.rtGeoDescCount);
|
|
const size_t byteCount = meshCount * sizeof(D3D12_RAYTRACING_GEOMETRY_DESC);
|
|
rhi.rtGeoDescs = (D3D12_RAYTRACING_GEOMETRY_DESC*)realloc(rhi.rtGeoDescs, byteCount);
|
|
if(rhi.rtGeoDescs == NULL)
|
|
{
|
|
ri.Error(ERR_FATAL, "Failed to allocate %d D3D12_RAYTRACING_GEOMETRY_DESC instances\n", (int)meshCount);
|
|
}
|
|
rhi.rtGeoDescCount = rhiDesc.meshCount;
|
|
}
|
|
|
|
for(uint32_t i = 0; i < rhiDesc.meshCount; ++i)
|
|
{
|
|
const BLASMeshDesc& mesh = rhiDesc.meshes[i];
|
|
D3D12_RAYTRACING_GEOMETRY_DESC& geoDesc = rhi.rtGeoDescs[i];
|
|
geoDesc.Type = D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES;
|
|
geoDesc.Flags = mesh.isFullyOpaque ?
|
|
D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE :
|
|
D3D12_RAYTRACING_GEOMETRY_FLAG_NONE;
|
|
geoDesc.Triangles.IndexFormat = DXGI_FORMAT_R32_UINT;
|
|
geoDesc.Triangles.IndexCount = mesh.indexCount;
|
|
geoDesc.Triangles.IndexBuffer = baseIndexAddress + mesh.firstIndex * sizeof(uint32_t);
|
|
geoDesc.Triangles.VertexFormat = DXGI_FORMAT_R32G32B32_FLOAT;
|
|
geoDesc.Triangles.VertexCount = mesh.vertexCount;
|
|
geoDesc.Triangles.VertexBuffer.StartAddress = baseVertexAddress + mesh.firstVertex * sizeof(vec3_t);
|
|
geoDesc.Triangles.VertexBuffer.StrideInBytes = sizeof(vec3_t);
|
|
geoDesc.Triangles.Transform3x4 = NULL;
|
|
}
|
|
|
|
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = {};
|
|
inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL;
|
|
inputs.Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_NONE;
|
|
inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
|
|
inputs.NumDescs = rhiDesc.meshCount;
|
|
inputs.pGeometryDescs = rhi.rtGeoDescs;
|
|
|
|
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO info = {};
|
|
rhi.device->GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &info);
|
|
if(info.ResultDataMaxSizeInBytes >= UINT64(4ull << 30ull) ||
|
|
info.ScratchDataSizeInBytes >= UINT64(4ull << 30ull))
|
|
{
|
|
ri.Error(ERR_FATAL, "Attempted to create a BLAS larger than 4 GB!\n");
|
|
}
|
|
|
|
EnsureBufferIsThisLarge(rhi.raytracingScratchBuffer, "RTAS scratch",
|
|
ResourceStates::UnorderedAccessBit, (uint32_t)info.ScratchDataSizeInBytes);
|
|
EnsureBufferIsThisLarge(*blasBuffer, rhiDesc.name,
|
|
ResourceStates::RaytracingASBit, (uint32_t)info.ResultDataMaxSizeInBytes);
|
|
|
|
// dest + src: D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT
|
|
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC rtasDesc = {};
|
|
rtasDesc.SourceAccelerationStructureData = 0;
|
|
rtasDesc.DestAccelerationStructureData = rhi.buffers.Get(*blasBuffer).gpuAddress;
|
|
rtasDesc.ScratchAccelerationStructureData = rhi.buffers.Get(rhi.raytracingScratchBuffer).gpuAddress;
|
|
rtasDesc.Inputs = inputs;
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
CmdInsertDebugLabel("BLAS: Before build");
|
|
#endif
|
|
rhi.commandList->BuildRaytracingAccelerationStructure(&rtasDesc, 0, NULL);
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
CmdInsertDebugLabel("BLAS: After build");
|
|
#endif
|
|
|
|
CmdBeginBarrier();
|
|
CmdBufferBarrier(*blasBuffer, ResourceStates::UnorderedAccessBit);
|
|
CmdEndBarrier();
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
CmdInsertDebugLabel("BLAS: After barrier");
|
|
#endif
|
|
}
|
|
|
|
void CmdCreateTLAS(HBuffer* tlasBuffer, const TLASDesc& rhiDesc)
|
|
{
|
|
ASSERT_DR_ENABLED();
|
|
Q_assert(rhi.commandList == rhi.tempCommandList);
|
|
Q_assert(rhi.tempCommandListOpen);
|
|
Q_assert(tlasBuffer != NULL);
|
|
Q_assert(rhiDesc.instances);
|
|
Q_assert(rhiDesc.instanceCount > 0);
|
|
|
|
EnsureBufferIsThisLarge(rhi.raytracingInstanceBuffer, "RT TLAS instance",
|
|
ResourceStates::Common, rhiDesc.instanceCount * sizeof(D3D12_RAYTRACING_INSTANCE_DESC));
|
|
|
|
D3D12_RAYTRACING_INSTANCE_DESC* const instanceDescs =
|
|
(D3D12_RAYTRACING_INSTANCE_DESC*)BeginBufferUpload(rhi.raytracingInstanceBuffer);
|
|
for(uint32_t i = 0; i < rhiDesc.instanceCount; ++i)
|
|
{
|
|
const TLASInstanceDesc& rhiInstDesc = rhiDesc.instances[i];
|
|
D3D12_RAYTRACING_INSTANCE_DESC instDesc = {};
|
|
instDesc.AccelerationStructure = rhi.buffers.Get(rhiInstDesc.blasBuffer).gpuAddress;
|
|
switch(rhiInstDesc.cullMode)
|
|
{
|
|
case CT_FRONT_SIDED: instDesc.Flags = D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE; break;
|
|
case CT_BACK_SIDED: instDesc.Flags = D3D12_RAYTRACING_INSTANCE_FLAG_NONE; break;
|
|
default: instDesc.Flags = D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE; break;
|
|
}
|
|
instDesc.InstanceContributionToHitGroupIndex = 0; // @TODO: do we care for this?
|
|
instDesc.InstanceID = rhiInstDesc.instanceId;
|
|
instDesc.InstanceMask = rhiInstDesc.instanceMask;
|
|
instDesc.Transform[0][0] = rhiInstDesc.transform[0]; // @TODO: confirm order
|
|
instDesc.Transform[1][0] = rhiInstDesc.transform[1];
|
|
instDesc.Transform[2][0] = rhiInstDesc.transform[2];
|
|
instDesc.Transform[0][1] = rhiInstDesc.transform[3];
|
|
instDesc.Transform[1][1] = rhiInstDesc.transform[4];
|
|
instDesc.Transform[2][1] = rhiInstDesc.transform[5];
|
|
instDesc.Transform[0][2] = rhiInstDesc.transform[6];
|
|
instDesc.Transform[1][2] = rhiInstDesc.transform[7];
|
|
instDesc.Transform[2][2] = rhiInstDesc.transform[8];
|
|
instDesc.Transform[0][3] = rhiInstDesc.translation[0];
|
|
instDesc.Transform[1][3] = rhiInstDesc.translation[1];
|
|
instDesc.Transform[2][3] = rhiInstDesc.translation[2];
|
|
memcpy(&instanceDescs[i], &instDesc, sizeof(D3D12_RAYTRACING_INSTANCE_DESC));
|
|
}
|
|
EndBufferUpload(rhi.raytracingInstanceBuffer);
|
|
|
|
// GPU wait for the copy queue to be done executing on the GPU
|
|
rhi.upload.WaitToStartDrawing(rhi.computeCommandQueue);
|
|
|
|
// InstanceDescs: D3D12_RAYTRACING_INSTANCE_DESC_BYTE_ALIGNMENT
|
|
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = {};
|
|
inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL;
|
|
inputs.Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_NONE;
|
|
inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
|
|
inputs.NumDescs = rhiDesc.instanceCount;
|
|
inputs.InstanceDescs = rhi.buffers.Get(rhi.raytracingInstanceBuffer).gpuAddress;
|
|
|
|
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO info = {};
|
|
rhi.device->GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &info);
|
|
if(info.ResultDataMaxSizeInBytes >= UINT64(4ull << 30ull) ||
|
|
info.ScratchDataSizeInBytes >= UINT64(4ull << 30ull))
|
|
{
|
|
ri.Error(ERR_FATAL, "Attempted to create a BLAS larger than 4 GB!\n");
|
|
}
|
|
|
|
EnsureBufferIsThisLarge(rhi.raytracingScratchBuffer, "RTAS scratch",
|
|
ResourceStates::UnorderedAccessBit, (uint32_t)info.ScratchDataSizeInBytes);
|
|
EnsureBufferIsThisLarge(*tlasBuffer, "RT TLAS",
|
|
ResourceStates::RaytracingASBit, (uint32_t)info.ResultDataMaxSizeInBytes);
|
|
|
|
// dest + src: D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT
|
|
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC rtasDesc = {};
|
|
rtasDesc.DestAccelerationStructureData = rhi.buffers.Get(*tlasBuffer).gpuAddress;
|
|
rtasDesc.ScratchAccelerationStructureData = rhi.buffers.Get(rhi.raytracingScratchBuffer).gpuAddress;
|
|
rtasDesc.Inputs = inputs;
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
CmdInsertDebugLabel("TLAS: Before build");
|
|
#endif
|
|
rhi.commandList->BuildRaytracingAccelerationStructure(&rtasDesc, 0, NULL);
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
CmdInsertDebugLabel("TLAS: After build");
|
|
#endif
|
|
|
|
CmdBeginBarrier();
|
|
CmdBufferBarrier(*tlasBuffer, ResourceStates::UnorderedAccessBit);
|
|
CmdEndBarrier();
|
|
#if defined(RHI_ENABLE_AFTERMATH)
|
|
CmdInsertDebugLabel("TLAS: After barrier");
|
|
#endif
|
|
}
|
|
}
|
|
|
|
void R_WaitBeforeInputSampling()
|
|
{
|
|
RHI::WaitForSwapChain();
|
|
RHI::rhi.beforeInputSamplingUS = Sys_Microseconds();
|
|
}
|
|
|
|
/*
|
|
PIX CAPTURE API WOES
|
|
|
|
Never got the PIX programmable capture API to work
|
|
PIXBeginCapture returns "not implemented"
|
|
|
|
// before include
|
|
#define USE_PIX 1
|
|
|
|
// before creating the device
|
|
PIXLoadLatestWinPixGpuCapturerLibrary();
|
|
HRESULT hr = PIXSetTargetWindow(GetActiveWindow());
|
|
Check(hr, "PIXSetTargetWindow");
|
|
|
|
// whenever...
|
|
PIXCaptureParameters params = {};
|
|
params.GpuCaptureParameters.FileName = L"temp.wpix";
|
|
HRESULT hr = PIXBeginCapture(0, ¶ms);
|
|
Check(hr, "PIXBeginCapture");
|
|
|
|
The legacy API fails as well
|
|
DXGIGetDebugInterface1 returns "no such interface supported"
|
|
|
|
#include <DXProgrammableCapture.h>
|
|
|
|
IDXGraphicsAnalysis* graphicsAnalysis;
|
|
D3D(DXGIGetDebugInterface1(0, IID_PPV_ARGS(&graphicsAnalysis)));
|
|
*/
|