From 81e32ecc72970c5662a0835b5005deb238bbd4a2 Mon Sep 17 00:00:00 2001
From: Magnus Norddahl <dpjudas@users.noreply.github.com>
Date: Tue, 30 Apr 2019 22:55:35 +0200
Subject: [PATCH] - implement stat gpu on vulkan and fix it on opengl

---
 .../gl/renderer/gl_renderbuffers.cpp          | 12 ++-
 src/rendering/gl/renderer/gl_renderbuffers.h  |  3 +
 src/rendering/gl/renderer/gl_scene.cpp        |  4 -
 src/rendering/gl/system/gl_debug.cpp          | 13 +---
 src/rendering/gl/system/gl_framebuffer.cpp    |  2 -
 .../postprocessing/hw_postprocess.cpp         | 50 ++++++++++++
 .../postprocessing/hw_postprocess.h           |  3 +
 .../vulkan/renderer/vk_postprocess.cpp        | 10 +++
 .../vulkan/renderer/vk_postprocess.h          |  3 +
 .../vulkan/system/vk_framebuffer.cpp          | 76 +++++++++++++++++++
 src/rendering/vulkan/system/vk_framebuffer.h  | 17 +++++
 src/rendering/vulkan/system/vk_objects.h      |  2 +-
 12 files changed, 176 insertions(+), 19 deletions(-)

diff --git a/src/rendering/gl/renderer/gl_renderbuffers.cpp b/src/rendering/gl/renderer/gl_renderbuffers.cpp
index 2ab12e44a..fafd6b794 100644
--- a/src/rendering/gl/renderer/gl_renderbuffers.cpp
+++ b/src/rendering/gl/renderer/gl_renderbuffers.cpp
@@ -857,8 +857,6 @@ FShaderProgram *GLPPRenderState::GetGLShader(PPShader *shader)
 
 void GLPPRenderState::Draw()
 {
-	//FGLDebug::PushGroup(name.GetChars());
-
 	FGLPostProcessState savedState;
 
 	// Bind input textures
@@ -971,8 +969,16 @@ void GLPPRenderState::Draw()
 		buffers->NextTexture();
 
 	glViewport(screen->mScreenViewport.left, screen->mScreenViewport.top, screen->mScreenViewport.width, screen->mScreenViewport.height);
+}
 
-	//FGLDebug::PopGroup();
+void GLPPRenderState::PushGroup(const FString &name)
+{
+	FGLDebug::PushGroup(name.GetChars());
+}
+
+void GLPPRenderState::PopGroup()
+{
+	FGLDebug::PopGroup();
 }
 
 
diff --git a/src/rendering/gl/renderer/gl_renderbuffers.h b/src/rendering/gl/renderer/gl_renderbuffers.h
index bb8e6e02d..4e1fd977c 100644
--- a/src/rendering/gl/renderer/gl_renderbuffers.h
+++ b/src/rendering/gl/renderer/gl_renderbuffers.h
@@ -88,6 +88,9 @@ class GLPPRenderState : public PPRenderState
 {
 public:
 	GLPPRenderState(FGLRenderBuffers *buffers) : buffers(buffers) { }
+
+	void PushGroup(const FString &name) override;
+	void PopGroup() override;
 	void Draw() override;
 
 private:
diff --git a/src/rendering/gl/renderer/gl_scene.cpp b/src/rendering/gl/renderer/gl_scene.cpp
index 906d58dfb..5102b3a0a 100644
--- a/src/rendering/gl/renderer/gl_scene.cpp
+++ b/src/rendering/gl/renderer/gl_scene.cpp
@@ -172,8 +172,6 @@ sector_t * FGLRenderer::RenderViewpoint (FRenderViewpoint &mainvp, AActor * came
 
 		if (mainview) // Bind the scene frame buffer and turn on draw buffers used by ssao
 		{
-			FGLDebug::PushGroup("MainView");
-
 			bool useSSAO = (gl_ssao != 0);
 			mBuffers->BindSceneFB(useSSAO);
 			gl_RenderState.SetPassType(useSSAO ? GBUFFER_PASS : NORMAL_PASS);
@@ -214,8 +212,6 @@ sector_t * FGLRenderer::RenderViewpoint (FRenderViewpoint &mainvp, AActor * came
 
 			mBuffers->BlitSceneToTexture(); // Copy the resulting scene to the current post process texture
 
-			FGLDebug::PopGroup(); // MainView
-
 			PostProcessScene(cm, [&]() { di->DrawEndScene2D(mainvp.sector, gl_RenderState); });
 			PostProcess.Unclock();
 		}
diff --git a/src/rendering/gl/system/gl_debug.cpp b/src/rendering/gl/system/gl_debug.cpp
index 56dcf3727..2cf781a28 100644
--- a/src/rendering/gl/system/gl_debug.cpp
+++ b/src/rendering/gl/system/gl_debug.cpp
@@ -43,21 +43,16 @@ CUSTOM_CVAR(Int, gl_debug_level, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOIN
 
 CVAR(Bool, gl_debug_breakpoint, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
 
+extern bool gpuStatActive;
+extern bool keepGpuStatActive;
+extern FString gpuStatOutput;
+
 namespace OpenGLRenderer
 {
 
 namespace
 {
-	bool gpuStatActive = false;
-	bool keepGpuStatActive = false;
 	std::vector<std::pair<FString, GLuint>> timeElapsedQueries;
-	FString gpuStatOutput;
-}
-
-ADD_STAT(gpu)
-{
-	keepGpuStatActive = true;
-	return gpuStatOutput;
 }
 
 //-----------------------------------------------------------------------------
diff --git a/src/rendering/gl/system/gl_framebuffer.cpp b/src/rendering/gl/system/gl_framebuffer.cpp
index d70f071fc..5c2afa931 100644
--- a/src/rendering/gl/system/gl_framebuffer.cpp
+++ b/src/rendering/gl/system/gl_framebuffer.cpp
@@ -445,10 +445,8 @@ void OpenGLFrameBuffer::Draw2D()
 {
 	if (GLRenderer != nullptr)
 	{
-		FGLDebug::PushGroup("Draw2D");
 		GLRenderer->mBuffers->BindCurrentFB();
 		::Draw2D(&m2DDrawer, gl_RenderState);
-		FGLDebug::PopGroup();
 	}
 }
 
diff --git a/src/rendering/hwrenderer/postprocessing/hw_postprocess.cpp b/src/rendering/hwrenderer/postprocessing/hw_postprocess.cpp
index b982a6035..80e43e4cc 100644
--- a/src/rendering/hwrenderer/postprocessing/hw_postprocess.cpp
+++ b/src/rendering/hwrenderer/postprocessing/hw_postprocess.cpp
@@ -10,6 +10,16 @@ Postprocess hw_postprocess;
 
 PPResource *PPResource::First = nullptr;
 
+bool gpuStatActive = false;
+bool keepGpuStatActive = false;
+FString gpuStatOutput;
+
+ADD_STAT(gpu)
+{
+	keepGpuStatActive = true;
+	return gpuStatOutput;
+}
+
 /////////////////////////////////////////////////////////////////////////////
 
 void PPBloom::UpdateTextures(int width, int height)
@@ -46,6 +56,8 @@ void PPBloom::RenderBloom(PPRenderState *renderstate, int sceneWidth, int sceneH
 		return;
 	}
 
+	renderstate->PushGroup("bloom");
+
 	UpdateTextures(sceneWidth, sceneHeight);
 
 	ExtractUniforms extractUniforms;
@@ -121,6 +133,8 @@ void PPBloom::RenderBloom(PPRenderState *renderstate, int sceneWidth, int sceneH
 	renderstate->SetOutputCurrent();
 	renderstate->SetAdditiveBlend();
 	renderstate->Draw();
+
+	renderstate->PopGroup();
 }
 
 void PPBloom::RenderBlur(PPRenderState *renderstate, int sceneWidth, int sceneHeight, float gameinfobluramount)
@@ -144,6 +158,8 @@ void PPBloom::RenderBlur(PPRenderState *renderstate, int sceneWidth, int sceneHe
 		return;
 	}
 
+	renderstate->PushGroup("blur");
+
 	int numLevels = 3;
 	assert(numLevels <= NumBloomLevels);
 
@@ -214,6 +230,8 @@ void PPBloom::RenderBlur(PPRenderState *renderstate, int sceneWidth, int sceneHe
 	renderstate->SetOutputCurrent();
 	renderstate->SetNoBlend();
 	renderstate->Draw();
+
+	renderstate->PopGroup();
 }
 
 void PPBloom::BlurStep(PPRenderState *renderstate, const BlurUniforms &blurUniforms, PPTexture &input, PPTexture &output, PPViewport viewport, bool vertical)
@@ -295,6 +313,8 @@ void PPLensDistort::Render(PPRenderState *renderstate)
 	uniforms.LensDistortionCoefficient = k;
 	uniforms.CubicDistortionValue = kcube;
 
+	renderstate->PushGroup("lens");
+
 	renderstate->Clear();
 	renderstate->Shader = &Lens;
 	renderstate->Uniforms.Set(uniforms);
@@ -303,6 +323,8 @@ void PPLensDistort::Render(PPRenderState *renderstate)
 	renderstate->SetOutputNext();
 	renderstate->SetNoBlend();
 	renderstate->Draw();
+
+	renderstate->PopGroup();
 }
 
 /////////////////////////////////////////////////////////////////////////////
@@ -319,6 +341,8 @@ void PPFXAA::Render(PPRenderState *renderstate)
 	FXAAUniforms uniforms;
 	uniforms.ReciprocalResolution = { 1.0f / screen->mScreenViewport.width, 1.0f / screen->mScreenViewport.height };
 
+	renderstate->PushGroup("fxaa");
+
 	renderstate->Clear();
 	renderstate->Shader = &FXAALuma;
 	renderstate->Uniforms.Clear();
@@ -332,6 +356,8 @@ void PPFXAA::Render(PPRenderState *renderstate)
 	renderstate->Uniforms.Set(uniforms);
 	renderstate->SetInputCurrent(0, PPFilterMode::Linear);
 	renderstate->Draw();
+
+	renderstate->PopGroup();
 }
 
 int PPFXAA::GetMaxVersion()
@@ -385,6 +411,8 @@ void PPCameraExposure::Render(PPRenderState *renderstate, int sceneWidth, int sc
 		return;
 	}
 
+	renderstate->PushGroup("exposure");
+
 	UpdateTextures(sceneWidth, sceneHeight);
 
 	ExposureExtractUniforms extractUniforms;
@@ -439,6 +467,8 @@ void PPCameraExposure::Render(PPRenderState *renderstate, int sceneWidth, int sc
 		renderstate->SetNoBlend();
 	renderstate->Draw();
 
+	renderstate->PopGroup();
+
 	FirstExposureFrame = false;
 }
 
@@ -492,6 +522,8 @@ void PPColormap::Render(PPRenderState *renderstate, int fixedcm)
 	uniforms.MapStart = { scm->ColorizeStart[0], scm->ColorizeStart[1], scm->ColorizeStart[2], 0.f };
 	uniforms.MapRange = m;
 
+	renderstate->PushGroup("colormap");
+
 	renderstate->Clear();
 	renderstate->Shader = &Colormap;
 	renderstate->Uniforms.Set(uniforms);
@@ -500,6 +532,8 @@ void PPColormap::Render(PPRenderState *renderstate, int fixedcm)
 	renderstate->SetOutputNext();
 	renderstate->SetNoBlend();
 	renderstate->Draw();
+
+	renderstate->PopGroup();
 }
 
 /////////////////////////////////////////////////////////////////////////////
@@ -552,6 +586,8 @@ void PPTonemap::Render(PPRenderState *renderstate)
 	case Palette:		shader = &PaletteShader; break;
 	}
 
+	renderstate->PushGroup("tonemap");
+
 	renderstate->Clear();
 	renderstate->Shader = shader;
 	renderstate->Viewport = screen->mScreenViewport;
@@ -561,6 +597,8 @@ void PPTonemap::Render(PPRenderState *renderstate)
 	renderstate->SetOutputNext();
 	renderstate->SetNoBlend();
 	renderstate->Draw();
+
+	renderstate->PopGroup();
 }
 
 /////////////////////////////////////////////////////////////////////////////
@@ -713,6 +751,8 @@ void PPAmbientOcclusion::Render(PPRenderState *renderstate, float m5, int sceneW
 	ambientViewport.width = AmbientWidth;
 	ambientViewport.height = AmbientHeight;
 
+	renderstate->PushGroup("ssao");
+
 	// Calculate linear depth values
 	renderstate->Clear();
 	renderstate->Shader = gl_multisample > 1 ? &LinearDepthMS : &LinearDepth;
@@ -774,6 +814,8 @@ void PPAmbientOcclusion::Render(PPRenderState *renderstate, float m5, int sceneW
 	else
 		renderstate->SetAlphaBlend();
 	renderstate->Draw();
+
+	renderstate->PopGroup();
 }
 
 /////////////////////////////////////////////////////////////////////////////
@@ -804,6 +846,8 @@ void PPShadowMap::Update(PPRenderState *renderstate)
 	ShadowMapUniforms uniforms;
 	uniforms.ShadowmapQuality = (float)gl_shadowmap_quality;
 
+	renderstate->PushGroup("shadowmap");
+
 	renderstate->Clear();
 	renderstate->Shader = &ShadowMap;
 	renderstate->Uniforms.Set(uniforms);
@@ -812,6 +856,8 @@ void PPShadowMap::Update(PPRenderState *renderstate)
 	renderstate->SetOutputShadowMap();
 	renderstate->SetNoBlend();
 	renderstate->Draw();
+
+	renderstate->PopGroup();
 }
 
 /////////////////////////////////////////////////////////////////////////////
@@ -905,6 +951,8 @@ PPCustomShaderInstance::PPCustomShaderInstance(PostProcessShader *desc) : Desc(d
 
 void PPCustomShaderInstance::Run(PPRenderState *renderstate)
 {
+	renderstate->PushGroup(Desc->Name);
+
 	renderstate->Clear();
 	renderstate->Shader = &Shader;
 	renderstate->Viewport = screen->mScreenViewport;
@@ -916,6 +964,8 @@ void PPCustomShaderInstance::Run(PPRenderState *renderstate)
 	SetUniforms(renderstate);
 
 	renderstate->Draw();
+
+	renderstate->PopGroup();
 }
 
 void PPCustomShaderInstance::SetTextures(PPRenderState *renderstate)
diff --git a/src/rendering/hwrenderer/postprocessing/hw_postprocess.h b/src/rendering/hwrenderer/postprocessing/hw_postprocess.h
index 7a70ac484..902810c7c 100644
--- a/src/rendering/hwrenderer/postprocessing/hw_postprocess.h
+++ b/src/rendering/hwrenderer/postprocessing/hw_postprocess.h
@@ -78,6 +78,9 @@ class PPRenderState
 public:
 	virtual ~PPRenderState() = default;
 
+	virtual void PushGroup(const FString &name) = 0;
+	virtual void PopGroup() = 0;
+
 	virtual void Draw() = 0;
 
 	void Clear()
diff --git a/src/rendering/vulkan/renderer/vk_postprocess.cpp b/src/rendering/vulkan/renderer/vk_postprocess.cpp
index 8716b8d38..3bb441f19 100644
--- a/src/rendering/vulkan/renderer/vk_postprocess.cpp
+++ b/src/rendering/vulkan/renderer/vk_postprocess.cpp
@@ -419,6 +419,16 @@ FString VkPPShader::LoadShaderCode(const FString &lumpName, const FString &defin
 
 /////////////////////////////////////////////////////////////////////////////
 
+void VkPPRenderState::PushGroup(const FString &name)
+{
+	GetVulkanFrameBuffer()->PushGroup(name);
+}
+
+void VkPPRenderState::PopGroup()
+{
+	GetVulkanFrameBuffer()->PopGroup();
+}
+
 void VkPPRenderState::Draw()
 {
 	auto fb = GetVulkanFrameBuffer();
diff --git a/src/rendering/vulkan/renderer/vk_postprocess.h b/src/rendering/vulkan/renderer/vk_postprocess.h
index a1b6b9e87..8a4489dde 100644
--- a/src/rendering/vulkan/renderer/vk_postprocess.h
+++ b/src/rendering/vulkan/renderer/vk_postprocess.h
@@ -128,6 +128,9 @@ private:
 class VkPPRenderState : public PPRenderState
 {
 public:
+	void PushGroup(const FString &name) override;
+	void PopGroup() override;
+
 	void Draw() override;
 
 private:
diff --git a/src/rendering/vulkan/system/vk_framebuffer.cpp b/src/rendering/vulkan/system/vk_framebuffer.cpp
index dbb6da9c3..1b7528747 100644
--- a/src/rendering/vulkan/system/vk_framebuffer.cpp
+++ b/src/rendering/vulkan/system/vk_framebuffer.cpp
@@ -72,6 +72,10 @@ extern bool NoInterpolateView;
 extern int rendered_commandbuffers;
 int current_rendered_commandbuffers;
 
+extern bool gpuStatActive;
+extern bool keepGpuStatActive;
+extern FString gpuStatOutput;
+
 VulkanFrameBuffer::VulkanFrameBuffer(void *hMonitor, bool fullscreen, VulkanDevice *dev) : 
 	Super(hMonitor, fullscreen) 
 {
@@ -160,6 +164,12 @@ void VulkanFrameBuffer::InitializeState()
 #else
 	mRenderState.reset(new VkRenderState());
 #endif
+
+	QueryPoolBuilder querybuilder;
+	querybuilder.setQueryType(VK_QUERY_TYPE_TIMESTAMP, MaxTimestampQueries);
+	mTimestampQueryPool = querybuilder.create(device);
+
+	GetDrawCommands()->resetQueryPool(mTimestampQueryPool.get(), 0, MaxTimestampQueries);
 }
 
 void VulkanFrameBuffer::Update()
@@ -180,6 +190,7 @@ void VulkanFrameBuffer::Update()
 	Flush3D.Unclock();
 
 	WaitForCommands(true);
+	UpdateGpuStats();
 
 	Super::Update();
 }
@@ -779,6 +790,12 @@ TArray<uint8_t> VulkanFrameBuffer::GetScreenshotBuffer(int &pitch, ESSType &colo
 
 void VulkanFrameBuffer::BeginFrame()
 {
+	if (mNextTimestampQuery > 0)
+	{
+		GetDrawCommands()->resetQueryPool(mTimestampQueryPool.get(), 0, mNextTimestampQuery);
+		mNextTimestampQuery = 0;
+	}
+
 	SetViewportRects(nullptr);
 	mScreenBuffers->BeginFrame(screen->mScreenViewport.width, screen->mScreenViewport.height, screen->mSceneViewport.width, screen->mSceneViewport.height);
 	mSaveBuffers->BeginFrame(SAVEPICWIDTH, SAVEPICHEIGHT, SAVEPICWIDTH, SAVEPICHEIGHT);
@@ -787,6 +804,65 @@ void VulkanFrameBuffer::BeginFrame()
 	mRenderPassManager->UpdateDynamicSet();
 }
 
+void VulkanFrameBuffer::PushGroup(const FString &name)
+{
+	if (!gpuStatActive)
+		return;
+
+	if (mNextTimestampQuery < VulkanFrameBuffer::MaxTimestampQueries)
+	{
+		TimestampQuery q;
+		q.name = name;
+		q.startIndex = mNextTimestampQuery++;
+		q.endIndex = 0;
+		GetDrawCommands()->writeTimestamp(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, mTimestampQueryPool.get(), q.startIndex);
+		mGroupStack.push_back(timeElapsedQueries.size());
+		timeElapsedQueries.push_back(q);
+	}
+}
+
+void VulkanFrameBuffer::PopGroup()
+{
+	if (!gpuStatActive || mGroupStack.empty())
+		return;
+
+	TimestampQuery &q = timeElapsedQueries[mGroupStack.back()];
+	mGroupStack.pop_back();
+
+	if (mNextTimestampQuery < VulkanFrameBuffer::MaxTimestampQueries)
+	{
+		q.endIndex = mNextTimestampQuery++;
+		GetDrawCommands()->writeTimestamp(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, mTimestampQueryPool.get(), q.endIndex);
+	}
+}
+
+void VulkanFrameBuffer::UpdateGpuStats()
+{
+	uint64_t timestamps[MaxTimestampQueries];
+	mTimestampQueryPool->getResults(0, mNextTimestampQuery, sizeof(uint64_t) * mNextTimestampQuery, timestamps, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
+
+	double timestampPeriod = device->PhysicalDevice.Properties.limits.timestampPeriod;
+
+	gpuStatOutput = "";
+	for (auto &q : timeElapsedQueries)
+	{
+		if (q.endIndex <= q.startIndex)
+			continue;
+
+		int64_t timeElapsed = std::max(static_cast<int64_t>(timestamps[q.endIndex] - timestamps[q.startIndex]), (int64_t)0);
+		double timeNS = timeElapsed * timestampPeriod;
+
+		FString out;
+		out.Format("%s=%04.2f ms\n", q.name.GetChars(), timeNS / 1000000.0f);
+		gpuStatOutput += out;
+	}
+	timeElapsedQueries.clear();
+	mGroupStack.clear();
+
+	gpuStatActive = keepGpuStatActive;
+	keepGpuStatActive = false;
+}
+
 void VulkanFrameBuffer::Draw2D()
 {
 	::Draw2D(&m2DDrawer, *mRenderState);
diff --git a/src/rendering/vulkan/system/vk_framebuffer.h b/src/rendering/vulkan/system/vk_framebuffer.h
index 2a239c3d3..af3c7047a 100644
--- a/src/rendering/vulkan/system/vk_framebuffer.h
+++ b/src/rendering/vulkan/system/vk_framebuffer.h
@@ -103,6 +103,10 @@ public:
 
 	void WaitForCommands(bool finish);
 
+	void PushGroup(const FString &name);
+	void PopGroup();
+	void UpdateGpuStats();
+
 private:
 	sector_t *RenderViewpoint(FRenderViewpoint &mainvp, AActor * camera, IntRect * bounds, float fov, float ratio, float fovratio, bool mainview, bool toscreen);
 	void RenderTextureView(FCanvasTexture *tex, AActor *Viewpoint, double FOV);
@@ -136,6 +140,19 @@ private:
 	std::unique_ptr<VulkanSemaphore> mRenderFinishedSemaphore;
 
 	VkRenderBuffers *mActiveRenderBuffers = nullptr;
+
+	struct TimestampQuery
+	{
+		FString name;
+		uint32_t startIndex;
+		uint32_t endIndex;
+	};
+
+	enum { MaxTimestampQueries = 100 };
+	std::unique_ptr<VulkanQueryPool> mTimestampQueryPool;
+	int mNextTimestampQuery = 0;
+	std::vector<size_t> mGroupStack;
+	std::vector<TimestampQuery> timeElapsedQueries;
 };
 
 inline VulkanFrameBuffer *GetVulkanFrameBuffer() { return static_cast<VulkanFrameBuffer*>(screen); }
diff --git a/src/rendering/vulkan/system/vk_objects.h b/src/rendering/vulkan/system/vk_objects.h
index 419ea3295..799f64560 100644
--- a/src/rendering/vulkan/system/vk_objects.h
+++ b/src/rendering/vulkan/system/vk_objects.h
@@ -830,7 +830,7 @@ inline void VulkanCommandBuffer::resetQueryPool(VkQueryPool queryPool, uint32_t
 
 inline void VulkanCommandBuffer::writeTimestamp(VkPipelineStageFlagBits pipelineStage, VulkanQueryPool *queryPool, uint32_t query)
 {
-	writeTimestamp(pipelineStage, queryPool, query);
+	writeTimestamp(pipelineStage, queryPool->pool, query);
 }
 
 inline void VulkanCommandBuffer::writeTimestamp(VkPipelineStageFlagBits pipelineStage, VkQueryPool queryPool, uint32_t query)