Optimized perf by fixing several issues with the GPU<->CPU syncing

This commit is contained in:
Robert Beckebans 2022-03-23 20:22:05 +01:00
parent d4f1d99bd7
commit 62429d8f46
12 changed files with 144 additions and 138 deletions

View file

@ -38,11 +38,7 @@ extern DeviceManager* deviceManager;
static void R_ListFramebuffers_f( const idCmdArgs& args )
{
if( !glConfig.framebufferObjectAvailable )
{
common->Printf( "GL_EXT_framebuffer_object is not available.\n" );
return;
}
// TODO
}
Framebuffer::Framebuffer( const char* name, int w, int h )

View file

@ -357,6 +357,7 @@ void idImage::AllocImage()
uint scaledWidth = originalWidth;
uint scaledHeight = originalHeight;
#if 0
uint maxTextureSize = 0;
if( maxTextureSize > 0 &&
@ -375,6 +376,7 @@ void idImage::AllocImage()
scaledHeight = maxTextureSize;
}
}
#endif
auto textureDesc = nvrhi::TextureDesc()
.setDebugName( GetName() )

View file

@ -61,6 +61,7 @@ public:
int currentImageParm = 0;
idArray< idImage*, MAX_IMAGE_PARMS > imageParms;
idScreenRect scissor; // set by GL_Scissor
//nvrhi::GraphicsPipelineHandle pipeline;
//bool fullscreen = false;
};
@ -172,9 +173,6 @@ idRenderBackend::DrawElementsWithCounters
*/
void idRenderBackend::DrawElementsWithCounters( const drawSurf_t* surf )
{
// Only update the constant buffer if it was updated at all.
renderProgManager.CommitConstantBuffer( commandList );
// Get vertex buffer
const vertCacheHandle_t vbHandle = surf->ambientCache;
idVertexBuffer* vertexBuffer;
@ -265,6 +263,9 @@ void idRenderBackend::DrawElementsWithCounters( const drawSurf_t* surf )
changeState = true;
}
// TODO: Only update the constant buffer if it was updated at all.
renderProgManager.CommitConstantBuffer( commandList );
if( changeState )
{
nvrhi::GraphicsState state;
@ -285,11 +286,17 @@ void idRenderBackend::DrawElementsWithCounters( const drawSurf_t* surf )
( float )currentViewport.y2,
currentViewport.zmin,
currentViewport.zmax };
state.viewport.addViewportAndScissorRect( viewport );
state.viewport.addViewport( viewport );
if( !currentScissor.IsEmpty() )
#if 0
if( !context.scissor.IsEmpty() )
{
state.viewport.addScissorRect( nvrhi::Rect( currentScissor.x1, currentScissor.x2, currentScissor.y1, currentScissor.y2 ) );
state.viewport.addScissorRect( nvrhi::Rect( context.scissor.x1, context.scissor.x2, context.scissor.y1, context.scissor.y2 ) );
}
else
#endif
{
state.viewport.addScissorRect( nvrhi::Rect( viewport ) );
}
commandList->setGraphicsState( state );
@ -677,28 +684,6 @@ void idRenderBackend::GL_EndFrame()
commandList->close();
deviceManager->GetDevice()->executeCommandList( commandList );
// Make sure that all frames have finished rendering
deviceManager->GetDevice()->waitForIdle();
// Release all in-flight references to the render targets
deviceManager->GetDevice()->runGarbageCollection();
// Present to the swap chain.
deviceManager->Present();
}
void idRenderBackend::GL_EndRenderPass()
{
#if defined( USE_NVRHI )
commandList->close();
deviceManager->GetDevice()->executeCommandList( commandList );
deviceManager->GetDevice()->runGarbageCollection();
commandList->open();
#endif
}
/*
@ -712,6 +697,27 @@ void idRenderBackend::GL_BlockingSwapBuffers()
{
// Make sure that all frames have finished rendering
deviceManager->GetDevice()->waitForIdle();
// Release all in-flight references to the render targets
deviceManager->GetDevice()->runGarbageCollection();
// Present to the swap chain.
deviceManager->Present();
renderLog.EndFrame();
}
void idRenderBackend::GL_EndRenderPass()
{
#if 0
commandList->close();
deviceManager->GetDevice()->executeCommandList( commandList );
deviceManager->GetDevice()->runGarbageCollection();
commandList->open();
#endif
}
/*
@ -773,9 +779,9 @@ idRenderBackend::GL_Scissor
void idRenderBackend::GL_Scissor( int x /* left*/, int y /* bottom */, int w, int h )
{
// TODO Check if this is right.
//currentScissor.Clear();
//currentScissor.AddPoint( x, y );
//currentScissor.AddPoint( x + w, y + h );
context.scissor.Clear();
context.scissor.AddPoint( x, y );
context.scissor.AddPoint( x + w, y + h );
}
/*
@ -921,23 +927,6 @@ void idRenderBackend::CheckCVars()
}
}*/
if( r_useSeamlessCubeMap.IsModified() )
{
r_useSeamlessCubeMap.ClearModified();
if( glConfig.seamlessCubeMapAvailable )
{
if( r_useSeamlessCubeMap.GetBool() )
{
//glEnable( GL_TEXTURE_CUBE_MAP_SEAMLESS );
}
else
{
//glDisable( GL_TEXTURE_CUBE_MAP_SEAMLESS );
}
}
}
// SRS - Enable SDL-driven vync changes without restart for UNIX-like OSs
#if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)
extern idCVar r_swapInterval;
@ -958,6 +947,7 @@ void idRenderBackend::CheckCVars()
}
#endif
// SRS end
if( r_antiAliasing.IsModified() )
{
switch( r_antiAliasing.GetInteger() )
@ -977,6 +967,7 @@ void idRenderBackend::CheckCVars()
}
}
/*
if( r_usePBR.IsModified() ||
r_useHDR.IsModified() ||
r_useHalfLambertLighting.IsModified() ||
@ -1002,20 +993,7 @@ void idRenderBackend::CheckCVars()
renderProgManager.KillAllShaders();
renderProgManager.LoadAllShaders();
}
// RB: turn off shadow mapping for OpenGL drivers that are too slow
switch( glConfig.driverType )
{
case GLDRV_OPENGL_ES2:
case GLDRV_OPENGL_ES3:
//case GLDRV_OPENGL_MESA:
r_useShadowMapping.SetInteger( 0 );
break;
default:
break;
}
// RB end
*/
}
/*
@ -1324,6 +1302,7 @@ idRenderBackend::idRenderBackend()
memset( &glConfig, 0, sizeof( glConfig ) );
//glConfig.gpuSkinningAvailable = true;
glConfig.timerQueryAvailable = true;
}
/*

View file

@ -314,4 +314,6 @@ idRenderProgManager::CommitConstantBuffer
void idRenderProgManager::CommitConstantBuffer( nvrhi::ICommandList* commandList )
{
commandList->writeBuffer( constantBuffer, uniforms.Ptr(), uniforms.Allocated() );
//commandList->setPushConstants( uniforms.Ptr(), uniforms.Allocated() );
}

View file

@ -5910,12 +5910,12 @@ void idRenderBackend::ExecuteBackEndCommands( const emptyCommand_t* cmds )
if( renderSystem->GetStereo3DMode() != STEREO3D_OFF )
{
StereoRenderExecuteBackEndCommands( cmds );
renderLog.EndFrame();
//renderLog.EndFrame();
return;
}
renderLog.StartFrame();
GL_StartFrame();
renderLog.StartFrame( commandList );
void* textureId = globalImages->hierarchicalZbufferImage->GetTextureID();
globalImages->LoadDeferredImages( commandList );
@ -6030,7 +6030,7 @@ void idRenderBackend::ExecuteBackEndCommands( const emptyCommand_t* cmds )
pc.c_copyFrameBuffer = 0;
}
renderLog.EndFrame();
//renderLog.EndFrame();
}

View file

@ -497,6 +497,7 @@ private:
idRenderMatrix shadowV[6]; // shadow depth view matrix
idRenderMatrix shadowP[6]; // shadow depth projection matrix
// TODO remove
float hdrAverageLuminance;
float hdrMaxLuminance;
float hdrTime;

View file

@ -106,14 +106,20 @@ PC_BeginNamedEvent
FIXME: this is not thread safe on the PC
========================
*/
void PC_BeginNamedEvent( const char* szName, const idVec4& color )
void PC_BeginNamedEvent( const char* szName, const idVec4& color, nvrhi::ICommandList* commandList )
{
if( r_logLevel.GetInteger() <= 0 )
{
return;
}
#if defined( USE_VULKAN )
#if defined( USE_NVRHI )
if( commandList )
{
commandList->beginMarker( szName );
}
#elif defined( USE_VULKAN )
// start an annotated group of calls under the this name
// SRS - Prefer VK_EXT_debug_utils over VK_EXT_debug_marker/VK_EXT_debug_report (deprecated by VK_EXT_debug_utils)
@ -141,8 +147,6 @@ void PC_BeginNamedEvent( const char* szName, const idVec4& color )
qvkCmdDebugMarkerBeginEXT( vkcontext.commandBuffer[ vkcontext.frameParity ], &label );
}
#elif defined(USE_NVRHI)
// SP: TODO
#else
// RB: colors are not supported in OpenGL
@ -196,14 +200,20 @@ void PC_BeginNamedEvent( const char* szName, const idVec4& color )
PC_EndNamedEvent
========================
*/
void PC_EndNamedEvent()
void PC_EndNamedEvent( nvrhi::ICommandList* commandList )
{
if( r_logLevel.GetInteger() <= 0 )
{
return;
}
#if defined( USE_VULKAN )
#if defined( USE_NVRHI )
if( commandList )
{
commandList->endMarker();
}
#elif defined( USE_VULKAN )
// SRS - Prefer VK_EXT_debug_utils over VK_EXT_debug_marker/VK_EXT_debug_report (deprecated by VK_EXT_debug_utils)
if( vkcontext.debugUtilsSupportAvailable )
{
@ -300,8 +310,6 @@ idRenderLog
idRenderLog renderLog;
// RB begin
/*
========================
idRenderLog::idRenderLog
@ -309,9 +317,22 @@ idRenderLog::idRenderLog
*/
idRenderLog::idRenderLog()
{
frameCounter = 0;
frameParity = 0;
}
#if 1
void idRenderLog::StartFrame( nvrhi::ICommandList* _commandList )
{
commandList = _commandList;
}
void idRenderLog::EndFrame()
{
frameCounter++;
frameParity = frameCounter % NUM_FRAME_DATA;
}
/*
========================
@ -329,7 +350,11 @@ void idRenderLog::OpenMainBlock( renderLogMainBlock_t block, nvrhi::ICommandList
{
mainBlock = block;
#if defined( USE_VULKAN )
#if defined( USE_NVRHI )
// SP: use nvrhi timer queries
#elif defined( USE_VULKAN )
if( vkcontext.queryIndex[ vkcontext.frameParity ] >= ( NUM_TIMESTAMP_QUERIES - 1 ) )
{
return;
@ -341,10 +366,6 @@ void idRenderLog::OpenMainBlock( renderLogMainBlock_t block, nvrhi::ICommandList
uint32 queryIndex = vkcontext.queryAssignedIndex[ vkcontext.frameParity ][ mainBlock * 2 + 0 ] = vkcontext.queryIndex[ vkcontext.frameParity ]++;
vkCmdWriteTimestamp( commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, queryPool, queryIndex );
#elif defined(USE_NVRHI)
// SP: use nvrhi timer queries
#elif defined(__APPLE__)
// SRS - For OSX use elapsed time query for Apple OpenGL 4.1 using GL_TIME_ELAPSED vs GL_TIMESTAMP (which is not implemented on OSX)
// SRS - OSX AMD drivers have a rendering bug (flashing colours) with an elasped time query when Shadow Mapping is on - turn off query for that case unless r_skipAMDWorkarounds is set
@ -413,7 +434,6 @@ void idRenderLog::CloseMainBlock()
}
}
#endif
/*
========================
@ -422,14 +442,7 @@ idRenderLog::OpenBlock
*/
void idRenderLog::OpenBlock( const char* label, const idVec4& color )
{
#if defined( USE_NVRHI )
if( commandList && r_logLevel.GetInteger() > 0 )
{
commandList->beginMarker( label );
}
#else
PC_BeginNamedEvent( label, color );
#endif
PC_BeginNamedEvent( label, color, commandList );
}
/*
@ -439,13 +452,6 @@ idRenderLog::CloseBlock
*/
void idRenderLog::CloseBlock()
{
#if defined( USE_NVRHI )
if( commandList && r_logLevel.GetInteger() > 0 )
{
commandList->endMarker();
}
#else
PC_EndNamedEvent();
#endif
PC_EndNamedEvent( commandList );
}
// RB end

View file

@ -89,13 +89,16 @@ private:
#if defined( USE_NVRHI )
nvrhi::CommandListHandle commandList;
uint64 frameCounter;
uint32 frameParity;
#endif
public:
idRenderLog();
void StartFrame() {}
void EndFrame() {}
void StartFrame( nvrhi::ICommandList* _commandList );
void EndFrame();
void Close() {}
int Active()
{

View file

@ -31,13 +31,14 @@ If you have questions concerning this license or the applicable additional terms
#include "precompiled.h"
#pragma hdrstop
#if !defined( USE_NVRHI )
#include "RenderCommon.h"
#include "RenderProgs_embedded.h"
idCVar r_skipStripDeadCode( "r_skipStripDeadCode", "0", CVAR_BOOL, "Skip stripping dead code" );
struct idCGBlock
{
idStr prefix; // tokens that comes before the name
@ -1680,7 +1681,7 @@ const char* idRenderProgManager::GetGLSLMacroName( shaderFeature_t sf ) const
return GLSLMacroNames[ sf ];
}
#if !defined( USE_NVRHI )
/*
================================================================================================
idRenderProgManager::FindGLSLProgram

View file

@ -173,16 +173,11 @@ struct backEndCounters_t
struct glconfig_t
{
graphicsVendor_t vendor;
graphicsDriverType_t driverType;
const char* renderer_string;
const char* vendor_string;
const char* version_string;
const char* extensions_string;
const char* wgl_extensions_string;
const char* shading_language_string;
float glVersion; // atof( version_string )
int maxTextureSize; // queried from GL
int maxTextureCoords;
@ -190,6 +185,36 @@ struct glconfig_t
int uniformBufferOffsetAlignment;
float maxTextureAnisotropy;
bool timerQueryAvailable;
bool gpuSkinningAvailable;
stereo3DMode_t stereo3Dmode;
int nativeScreenWidth; // this is the native screen width resolution of the renderer
int nativeScreenHeight; // this is the native screen height resolution of the renderer
int displayFrequency;
int isFullscreen; // monitor number
bool isStereoPixelFormat;
bool stereoPixelFormatAvailable;
int multisamples;
// Screen separation for stereoscopic rendering is set based on this.
// PC vid code sets this, converting from diagonals / inches / whatever as needed.
// If the value can't be determined, set something reasonable, like 50cm.
float physicalScreenWidthInCentimeters;
float pixelAspect;
#if !defined(USE_NVRHI)
graphicsDriverType_t driverType;
const char* wgl_extensions_string;
const char* shading_language_string;
float glVersion; // atof( version_string )
int colorBits;
int depthBits;
int stencilBits;
@ -210,12 +235,11 @@ struct glconfig_t
bool twoSidedStencilAvailable;
bool depthBoundsTestAvailable;
bool syncAvailable;
bool timerQueryAvailable;
bool occlusionQueryAvailable;
bool debugOutputAvailable;
bool swapControlTearAvailable;
// RB begin
bool gremedyStringMarkerAvailable;
bool khronosDebugAvailable;
bool vertexHalfFloatAvailable;
@ -226,31 +250,11 @@ struct glconfig_t
// bool framebufferPackedDepthStencilAvailable;
bool framebufferBlitAvailable;
// only true with uniform buffer support and an OpenGL driver that supports GLSL >= 1.50
bool gpuSkinningAvailable;
// RB end
stereo3DMode_t stereo3Dmode;
int nativeScreenWidth; // this is the native screen width resolution of the renderer
int nativeScreenHeight; // this is the native screen height resolution of the renderer
int displayFrequency;
int isFullscreen; // monitor number
bool isStereoPixelFormat;
bool stereoPixelFormatAvailable;
int multisamples;
// Screen separation for stereoscopic rendering is set based on this.
// PC vid code sets this, converting from diagonals / inches / whatever as needed.
// If the value can't be determined, set something reasonable, like 50cm.
float physicalScreenWidthInCentimeters;
float pixelAspect;
#if !defined(USE_NVRHI) && !defined(USE_VULKAN)
#if !defined(USE_VULKAN)
GLuint global_vao;
#endif
#endif
};

View file

@ -1510,6 +1510,7 @@ void GfxInfo_f( const idCmdArgs& args )
common->Printf( "GL_RENDERER: %s\n", glConfig.renderer_string );
common->Printf( "GL_VERSION: %s\n", glConfig.version_string );
common->Printf( "GL_EXTENSIONS: %s\n", glConfig.extensions_string );
#if !defined( USE_NVRHI )
if( glConfig.wgl_extensions_string )
{
common->Printf( "WGL_EXTENSIONS: %s\n", glConfig.wgl_extensions_string );
@ -1517,12 +1518,13 @@ void GfxInfo_f( const idCmdArgs& args )
common->Printf( "GL_MAX_TEXTURE_SIZE: %d\n", glConfig.maxTextureSize );
common->Printf( "GL_MAX_TEXTURE_COORDS_ARB: %d\n", glConfig.maxTextureCoords );
common->Printf( "GL_MAX_TEXTURE_IMAGE_UNITS_ARB: %d\n", glConfig.maxTextureImageUnits );
#endif
// print all the display adapters, monitors, and video modes
//void DumpAllDisplayDevices();
//DumpAllDisplayDevices();
common->Printf( "\nPIXELFORMAT: color(%d-bits) Z(%d-bit) stencil(%d-bits)\n", glConfig.colorBits, glConfig.depthBits, glConfig.stencilBits );
//common->Printf( "\nPIXELFORMAT: color(%d-bits) Z(%d-bit) stencil(%d-bits)\n", glConfig.colorBits, glConfig.depthBits, glConfig.stencilBits );
common->Printf( "MODE: %d, %d x %d %s hz:", r_vidMode.GetInteger(), renderSystem->GetWidth(), renderSystem->GetHeight(), fsstrings[r_fullscreen.GetBool()] );
if( glConfig.displayFrequency )
{

View file

@ -600,7 +600,17 @@ void DeviceManager_DX12::Present()
auto bufferIndex = m_SwapChain->GetCurrentBackBufferIndex();
UINT presentFlags = 0;
if( !deviceParms.vsyncEnabled && !glConfig.isFullscreen && glConfig.swapControlTearAvailable )
if( r_swapInterval.GetInteger() == 1 )
{
SetVsyncEnabled( false );
}
else if( r_swapInterval.GetInteger() == 2 )
{
SetVsyncEnabled( true );
}
if( !deviceParms.vsyncEnabled && !glConfig.isFullscreen && m_TearingSupported && r_swapInterval.GetInteger() == 0 )
{
presentFlags |= DXGI_PRESENT_ALLOW_TEARING;
}