Merge branch 'optick-profiler'

This commit is contained in:
Robert Beckebans 2023-03-03 19:53:24 +01:00
commit f4f416bc75
37 changed files with 13798 additions and 12 deletions

View file

@ -48,28 +48,31 @@ option(WINDOWS10
"Build for Windows 10+" ON)
option(USE_SYSTEM_ZLIB
"Use the system zlib instead of the bundled one" OFF)
"Use the system zlib instead of the bundled one" OFF)
option(USE_SYSTEM_LIBPNG
"Use the system libpng instead of the bundled one" OFF)
"Use the system libpng instead of the bundled one" OFF)
option(USE_SYSTEM_LIBJPEG
"Use the system libjpeg instead of the bundled one" OFF)
"Use the system libjpeg instead of the bundled one" OFF)
option(USE_SYSTEM_LIBGLEW
"Use the system libglew instead of the bundled one" OFF)
"Use the system libglew instead of the bundled one" OFF)
option(USE_SYSTEM_RAPIDJSON
"Use the system rapidjson instead of the bundled one" OFF)
"Use the system rapidjson instead of the bundled one" OFF)
option(USE_DX12
"Use DirectX 12" ON)
"Use DirectX 12" ON)
option(USE_VULKAN
"Use Vulkan" ON)
"Use Vulkan" ON)
option(USE_VMA
"Use VMA allocator instead of the NVRHI builtin one" ON)
"Use VMA allocator instead of the NVRHI builtin one" ON)
option(OPTICK
"Enable profiling with Optick" OFF)
set(NVRHI_INSTALL OFF)
@ -466,7 +469,7 @@ include_directories(${NVRHI_DIR}/include)
if(USE_SYSTEM_RAPIDJSON)
find_package(rapidjson REQUIRED)
find_package(rapidjson REQUIRED)
endif(USE_SYSTEM_RAPIDJSON)
if (RAPIDJSON_FOUND)
@ -476,6 +479,17 @@ else (RAPIDJSON_FOUND)
include_directories("libs/rapidjson/include")
endif (RAPIDJSON_FOUND)
include_directories("libs/optick")
if(OPTICK)
file(GLOB OPTICK_INCLUDES libs/optick/*.h)
file(GLOB OPTICK_SOURCES libs/optick/*.cpp)
source_group("libs\\optick" FILES ${OPTICK_INCLUDES})
source_group("libs\\optick" FILES ${OPTICK_SOURCES})
endif()
add_subdirectory(idlib)
file(GLOB NATVIS_SOURCES .natvis)
@ -1348,6 +1362,7 @@ set(RBDOOM3_INCLUDES
#${FREETYPE_SOURCES}
${SOUND_INCLUDES}
${OGGVORBIS_INCLUDES}
${OPTICK_INCLUDES}
${UI_INCLUDES}
${SWF_INCLUDES}
${COMMON_INCLUDES}
@ -1397,6 +1412,7 @@ set(RBDOOM3_SOURCES
#${FREETYPE_SOURCES}
${SOUND_SOURCES}
${OGGVORBIS_SOURCES}
${OPTICK_SOURCES}
${UI_SOURCES}
${SWF_SOURCES}
${COMMON_SOURCES}
@ -1556,7 +1572,7 @@ if(MSVC)
if(USE_PRECOMPILED_HEADERS)
set(RBDOOM3_PRECOMPILED_SOURCES ${RBDOOM3_SOURCES})
list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${TIMIDITY_SOURCES} ${JPEG_SOURCES} ${PNG_SOURCES} ${ZLIB_SOURCES} ${GLEW_SOURCES} ${BINKDEC_SOURCES} ${IMGUI_SOURCES} ${MIKKTSPACE_SOURCES} ${OGGVORBIS_SOURCES})
list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${TIMIDITY_SOURCES} ${JPEG_SOURCES} ${PNG_SOURCES} ${ZLIB_SOURCES} ${GLEW_SOURCES} ${BINKDEC_SOURCES} ${IMGUI_SOURCES} ${MIKKTSPACE_SOURCES} ${OGGVORBIS_SOURCES} ${OPTICK_SOURCES})
list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/libs/zlib/minizip/ioapi.c)
list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/renderer/DXT/DXTDecoder.cpp)
list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/renderer/DXT/DXTEncoder.cpp)
@ -1698,7 +1714,7 @@ else()
if(USE_PRECOMPILED_HEADERS)
set(RBDOOM3_PRECOMPILED_SOURCES ${RBDOOM3_SOURCES})
list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${TIMIDITY_SOURCES} ${JPEG_SOURCES} ${PNG_SOURCES} ${ZLIB_SOURCES} ${GLEW_SOURCES} ${BINKDEC_SOURCES} ${IMGUI_SOURCES} ${MIKKTSPACE_SOURCES} ${OGGVORBIS_SOURCES})
list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${TIMIDITY_SOURCES} ${JPEG_SOURCES} ${PNG_SOURCES} ${ZLIB_SOURCES} ${GLEW_SOURCES} ${BINKDEC_SOURCES} ${IMGUI_SOURCES} ${MIKKTSPACE_SOURCES} ${OGGVORBIS_SOURCES} ${OPTICK_SOURCES})
list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/libs/zlib/minizip/ioapi.c)
list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/renderer/DXT/DXTDecoder.cpp)
list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/renderer/DXT/DXTEncoder.cpp)
@ -1838,6 +1854,11 @@ if(USE_VULKAN)
target_link_libraries(RBDoom3BFG nvrhi_vk)
endif()
if(OPTICK)
target_compile_definitions(RBDoom3BFG PUBLIC USE_OPTICK=1)
else()
target_compile_definitions(RBDoom3BFG PUBLIC USE_OPTICK=0)
endif()
# needs to come after nvrhi_d3d11 etc. for link order
target_link_libraries(RBDoom3BFG nvrhi)

View file

@ -0,0 +1,6 @@
cd ..
del /s /q build
mkdir build
cd build
cmake -G "Visual Studio 16" -A x64 -DFFMPEG=OFF -DBINKDEC=ON -DOPTICK=ON ../neo
pause

View file

@ -2616,6 +2616,8 @@ idGameLocal::RunFrame
*/
void idGameLocal::RunFrame( idUserCmdMgr& cmdMgr, gameReturn_t& ret )
{
SCOPED_PROFILE_EVENT( "RunFrame" );
idEntity* ent;
int num;
float ms;

View file

@ -103,7 +103,11 @@ public:
}
};
#define SCOPED_PROFILE_EVENT( x ) idScopedProfileEvent scopedProfileEvent_##__LINE__( x )
#if USE_OPTICK
#define SCOPED_PROFILE_EVENT( x ) OPTICK_EVENT( x )
#else
#define SCOPED_PROFILE_EVENT( x ) idScopedProfileEvent scopedProfileEvent_##__LINE__( x )
#endif
ID_INLINE bool BeginTraceRecording( const char* szName )
{

View file

@ -71,6 +71,8 @@ class idSGFcompressThread : public idSysThread
public:
virtual int Run()
{
OPTICK_THREAD( "idSGFcompressThread" );
sgf->CompressBlock();
return 0;
}
@ -81,6 +83,8 @@ class idSGFdecompressThread : public idSysThread
public:
virtual int Run()
{
OPTICK_THREAD( "idSGFdecompressThread" );
sgf->DecompressBlock();
return 0;
}
@ -91,6 +95,8 @@ class idSGFwriteThread : public idSysThread
public:
virtual int Run()
{
OPTICK_THREAD( "idSGFwriteThread" );
sgf->WriteBlock();
return 0;
}
@ -101,6 +107,8 @@ class idSGFreadThread : public idSysThread
public:
virtual int Run()
{
OPTICK_THREAD( "idSGFreadThread" );
sgf->ReadBlock();
return 0;
}

View file

@ -90,6 +90,8 @@ be called directly in the foreground thread for comparison.
*/
int idGameThread::Run()
{
OPTICK_THREAD( "idGameThread" );
commonLocal.frameTiming.startGameTime = Sys_Microseconds();
// debugging tool to test frame dropping behavior

View file

@ -144,6 +144,11 @@ else()
endif()
if(OPTICK)
target_compile_definitions(idlib PUBLIC USE_OPTICK=1)
else()
target_compile_definitions(idlib PUBLIC USE_OPTICK=0)
endif()
# if(MSVC)
# # set_source_files_properties(precompiled.cpp

View file

@ -1145,6 +1145,8 @@ idJobThread::Run
*/
int idJobThread::Run()
{
OPTICK_THREAD( GetName() );
threadJobListState_t threadJobListState[MAX_JOBLISTS];
int numJobLists = 0;
int lastStalledJobList = -1;

View file

@ -105,6 +105,11 @@ const int MAX_EXPRESSION_REGISTERS = 4096;
#include <GL/glew.h>
#endif
// RB: make Optick profiling available everywhere
#if defined( USE_OPTICK )
#include "../libs/optick/optick.h"
#endif
#include "../renderer/Cinematic.h"
#include "../renderer/Material.h"
#include "../renderer/BufferObject.h"

22
neo/libs/optick/LICENSE Normal file
View file

@ -0,0 +1,22 @@
The MIT License (MIT)
Copyright (c) 2019 Vadim Slyusarev
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -0,0 +1,73 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// GLOBAL SETTINGS
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// [x] USE_OPTICK - (Master Switch)
// [x] OPTICK_ENABLE_TRACING - (Enable Kernel-level tracing)
// [x] OPTICK_ENABLE_GPU_D3D12 - (GPU D3D12)
// [x] OPTICK_ENABLE_GPU_VULKAN - (GPU VULKAN)
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// MASTER SWITCH - use it for disabling profiler in final builds //
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if !defined(USE_OPTICK)
#define USE_OPTICK (1)
#endif
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Enable Low-level platform-specific tracing (Switch Contexts, Autosampling, etc.)
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if !defined(OPTICK_ENABLE_TRACING)
#define OPTICK_ENABLE_TRACING (USE_OPTICK /*&& 0*/)
#endif //OPTICK_ENABLE_TRACING
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// GPU Counters
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if !defined(OPTICK_ENABLE_GPU)
#define OPTICK_ENABLE_GPU (USE_OPTICK /*&& 0*/)
#endif //OPTICK_ENABLE_GPU
// D3D12
#if !defined(OPTICK_ENABLE_GPU_D3D12)
#if defined(_MSC_VER)
#define OPTICK_ENABLE_GPU_D3D12 (OPTICK_ENABLE_GPU /*&& 0*/)
#else
#define OPTICK_ENABLE_GPU_D3D12 (0)
#endif
#endif
// VULKAN
#if !defined(OPTICK_ENABLE_GPU_VULKAN)
#if defined(_MSC_VER)
#define OPTICK_ENABLE_GPU_VULKAN (OPTICK_ENABLE_GPU /*&& 0*/)
#else
#define OPTICK_ENABLE_GPU_VULKAN (0)
#endif
#endif

1113
neo/libs/optick/optick.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,110 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "optick_capi.h"
#if USE_OPTICK
#include "optick_core.h"
#if defined(__MACH__)
#include <stdlib.h>
#else
#include <malloc.h>
#endif
#include <string.h>
OPTICK_API void OptickAPI_RegisterThread(const char* inThreadName, uint16_t inThreadNameLength)
{
Optick::OptickString<256> threadName(inThreadName, inThreadNameLength);
Optick::RegisterThread(threadName.data);
}
OPTICK_API uint64_t OptickAPI_CreateEventDescription(const char* inFunctionName, uint16_t inFunctionLength, const char* inFileName, uint16_t inFileNameLenght, uint32_t inFileLine)
{
Optick::OptickString<128> name(inFunctionName, inFunctionLength);
Optick::OptickString<256> file(inFileName, inFileNameLenght);
uint8_t flags = Optick::EventDescription::COPY_NAME_STRING | Optick::EventDescription::COPY_FILENAME_STRING | Optick::EventDescription::IS_CUSTOM_NAME;
return (uint64_t)::Optick::CreateDescription(name.data, file.data, inFileLine, nullptr, Optick::Category::None, flags);
}
OPTICK_API uint64_t OptickAPI_PushEvent(uint64_t inEventDescription)
{
return (uint64_t)Optick::Event::Start(*((Optick::EventDescription*)inEventDescription));
}
OPTICK_API void OptickAPI_PopEvent(uint64_t inEventData)
{
Optick::Event::Stop(*((Optick::EventData*)inEventData));
}
OPTICK_API void OptickAPI_NextFrame()
{
Optick::Event::Pop();
Optick::EndFrame();
Optick::Update();
Optick::BeginFrame();
Optick::Event::Push(*Optick::GetFrameDescription());
}
OPTICK_API void OptickAPI_StartCapture()
{
Optick::StartCapture();
}
OPTICK_API void OptickAPI_StopCapture(const char* inFileName, uint16_t inFileNameLength)
{
Optick::OptickString<256> fileName(inFileName, inFileNameLength);
Optick::StopCapture();
Optick::SaveCapture(fileName.data);
}
OPTICK_API void OptickAPI_AttachTag_String(uint64_t inEventDescription, const char* inValue, uint16_t inValueLength)
{
Optick::Tag::Attach(*(Optick::EventDescription*)inEventDescription, inValue, inValueLength);
}
OPTICK_API void OptickAPI_AttachTag_Int32(uint64_t inEventDescription, int32_t inValue)
{
Optick::Tag::Attach(*(Optick::EventDescription*)inEventDescription, inValue);
}
OPTICK_API void OptickAPI_AttachTag_Float(uint64_t inEventDescription, float inValue)
{
Optick::Tag::Attach(*(Optick::EventDescription*)inEventDescription, inValue);
}
OPTICK_API void OptickAPI_AttachTag_UInt32(uint64_t inEventDescription, uint32_t inValue)
{
Optick::Tag::Attach(*(Optick::EventDescription*)inEventDescription, inValue);
}
OPTICK_API void OptickAPI_AttachTag_UInt64(uint64_t inEventDescription, uint64_t inValue)
{
Optick::Tag::Attach(*(Optick::EventDescription*)inEventDescription, inValue);
}
OPTICK_API void OptickAPI_AttachTag_Point(uint64_t inEventDescription, float x, float y, float z)
{
Optick::Tag::Attach(*(Optick::EventDescription*)inEventDescription, x, y, z);
}
#endif //USE_OPTICK

View file

@ -0,0 +1,80 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Config
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#include "optick.config.h"
#include <stdint.h>
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// EXPORTS
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if defined(OPTICK_EXPORTS) && defined(_MSC_VER)
#define OPTICK_API __declspec(dllexport)
#else
#define OPTICK_API
#endif
#ifdef __cplusplus
extern "C" {
#endif
#if USE_OPTICK
OPTICK_API void OptickAPI_RegisterThread(const char* inThreadName, uint16_t inThreadNameLength);
OPTICK_API uint64_t OptickAPI_CreateEventDescription(const char* inFunctionName, uint16_t inFunctionLength, const char* inFileName, uint16_t inFileNameLenght, uint32_t inFileLine);
OPTICK_API uint64_t OptickAPI_PushEvent(uint64_t inEventDescription);
OPTICK_API void OptickAPI_PopEvent(uint64_t inEventData);
OPTICK_API void OptickAPI_NextFrame();
OPTICK_API void OptickAPI_StartCapture();
OPTICK_API void OptickAPI_StopCapture(const char* inFileName, uint16_t inFileNameLength);
OPTICK_API void OptickAPI_AttachTag_String(uint64_t inEventDescription, const char* inValue, uint16_t intValueLength);
OPTICK_API void OptickAPI_AttachTag_Int32(uint64_t inEventDescription, int inValue);
OPTICK_API void OptickAPI_AttachTag_Float(uint64_t inEventDescription, float inValue);
OPTICK_API void OptickAPI_AttachTag_UInt32(uint64_t inEventDescription, uint32_t inValue);
OPTICK_API void OptickAPI_AttachTag_UInt64(uint64_t inEventDescription, uint64_t inValue);
OPTICK_API void OptickAPI_AttachTag_Point(uint64_t inEventDescription, float x, float y, float z);
#else
inline void OptickAPI_RegisterThread(const char* inThreadName, uint16_t inThreadNameLength) {}
inline uint64_t OptickAPI_CreateEventDescription(const char* inFunctionName, uint16_t inFunctionLength, const char* inFileName, uint16_t inFileNameLenght, uint32_t inFileLine) { return 0; }
inline uint64_t OptickAPI_PushEvent(uint64_t inEventDescription) { return 0; }
inline void OptickAPI_PopEvent(uint64_t inEventData) {}
inline void OptickAPI_NextFrame() {}
inline void OptickAPI_StartCapture() {}
inline void OptickAPI_StopCapture(const char* inFileName, uint16_t inFileNameLength) {}
inline void OptickAPI_AttachTag_String(uint64_t inEventDescription, const char* inValue, uint16_t intValueLength) {}
inline void OptickAPI_AttachTag_Int(uint64_t inEventDescription, int inValue) {}
inline void OptickAPI_AttachTag_Float(uint64_t inEventDescription, float inValue) {}
inline void OptickAPI_AttachTag_Int32(uint64_t inEventDescription, uint32_t inValue) {}
inline void OptickAPI_AttachTag_UInt64(uint64_t inEventDescription, uint64_t inValue) {}
inline void OptickAPI_AttachTag_Point(uint64_t inEventDescription, float x, float y, float z) {}
#endif
#ifdef __cplusplus
} // extern "C"
#endif

View file

@ -0,0 +1,187 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "optick.config.h"
#if USE_OPTICK
#include "optick.h"
#include <cstdio>
#include <stdarg.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#if defined(OPTICK_MSVC)
#ifdef OPTICK_UE4
#include "Core/Public/Windows/AllowWindowsPlatformTypes.h"
#endif
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
#ifdef OPTICK_UE4
#include "Core/Public/Windows/HideWindowsPlatformTypes.h"
#endif
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
#endif
namespace Optick
{
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Types
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
typedef signed char int8;
typedef unsigned char uint8;
typedef unsigned char byte;
typedef short int16;
typedef unsigned short uint16;
typedef int int32;
typedef unsigned int uint32;
#if defined(OPTICK_MSVC)
typedef __int64 int64;
typedef unsigned __int64 uint64;
#elif defined(OPTICK_GCC)
typedef int64_t int64;
typedef uint64_t uint64;
#else
#error Compiler is not supported
#endif
static_assert(sizeof(int8) == 1, "Invalid type size, int8");
static_assert(sizeof(uint8) == 1, "Invalid type size, uint8");
static_assert(sizeof(byte) == 1, "Invalid type size, byte");
static_assert(sizeof(int16) == 2, "Invalid type size, int16");
static_assert(sizeof(uint16) == 2, "Invalid type size, uint16");
static_assert(sizeof(int32) == 4, "Invalid type size, int32");
static_assert(sizeof(uint32) == 4, "Invalid type size, uint32");
static_assert(sizeof(int64) == 8, "Invalid type size, int64");
static_assert(sizeof(uint64) == 8, "Invalid type size, uint64");
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
typedef uint64 ThreadID;
static const ThreadID INVALID_THREAD_ID = (ThreadID)-1;
typedef uint32 ProcessID;
static const ProcessID INVALID_PROCESS_ID = (ProcessID)-1;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Memory
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if defined(OPTICK_MSVC)
#define OPTICK_ALIGN(N) __declspec( align( N ) )
#elif defined(OPTICK_GCC)
#define OPTICK_ALIGN(N) __attribute__((aligned(N)))
#else
#error Can not define OPTICK_ALIGN. Unknown platform.
#endif
#define OPTICK_ARRAY_SIZE(ARR) (sizeof(ARR)/sizeof((ARR)[0]))
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if defined(OPTICK_MSVC)
#define OPTICK_NOINLINE __declspec(noinline)
#elif defined(OPTICK_GCC)
#define OPTICK_NOINLINE __attribute__((__noinline__))
#else
#error Compiler is not supported
#endif
////////////////////////////////////////////////////////////////////////
// OPTICK_THREAD_LOCAL
////////////////////////////////////////////////////////////////////////
#if defined(OPTICK_MSVC)
#define OPTICK_THREAD_LOCAL __declspec(thread)
#elif defined(OPTICK_GCC)
#define OPTICK_THREAD_LOCAL __thread
#else
#error Can not define OPTICK_THREAD_LOCAL. Unknown platform.
#endif
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Asserts
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if defined(OPTICK_MSVC)
#define OPTICK_DEBUG_BREAK __debugbreak()
#elif defined(OPTICK_GCC)
#define OPTICK_DEBUG_BREAK __builtin_trap()
#else
#error Can not define OPTICK_DEBUG_BREAK. Unknown platform.
#endif
#define OPTICK_UNUSED(x) (void)(x)
#ifdef _DEBUG
#define OPTICK_ASSERT(arg, description) if (!(arg)) { OPTICK_DEBUG_BREAK; }
#define OPTICK_FAILED(description) { OPTICK_DEBUG_BREAK; }
#else
#define OPTICK_ASSERT(arg, description)
#define OPTICK_FAILED(description)
#endif
#define OPTICK_VERIFY(arg, description, operation) if (!(arg)) { OPTICK_DEBUG_BREAK; operation; }
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Safe functions
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if defined(OPTICK_LINUX) || defined(OPTICK_OSX)
template<size_t sizeOfBuffer>
inline int sprintf_s(char(&buffer)[sizeOfBuffer], const char* format, ...)
{
va_list ap;
va_start(ap, format);
int result = vsnprintf(buffer, sizeOfBuffer, format, ap);
va_end(ap);
return result;
}
#endif
#if defined(OPTICK_GCC)
#include <string.h>
template<size_t sizeOfBuffer>
inline int wcstombs_s(char(&buffer)[sizeOfBuffer], const wchar_t* src, size_t maxCount)
{
return wcstombs(buffer, src, maxCount);
}
#endif
#if defined(OPTICK_MSVC)
template<size_t sizeOfBuffer>
inline int wcstombs_s(char(&buffer)[sizeOfBuffer], const wchar_t* src, size_t maxCount)
{
size_t converted = 0;
return ::wcstombs_s(&converted, buffer, src, maxCount);
}
#endif
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#endif //USE_OPTICK

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,77 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#if defined(__FreeBSD__)
#include "optick.config.h"
#if USE_OPTICK
#include "optick_core.platform.h"
#include <sys/time.h>
#include <sys/types.h>
#include <pthread.h>
#include <unistd.h>
namespace Optick
{
const char* Platform::GetName()
{
return "PS4";
}
ThreadID Platform::GetThreadID()
{
return (uint64_t)pthread_self();
}
ProcessID Platform::GetProcessID()
{
return (ProcessID)getpid();
}
int64 Platform::GetFrequency()
{
return 1000000000;
}
int64 Platform::GetTime()
{
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return ts.tv_sec * 1000000000LL + ts.tv_nsec;
}
Trace* Platform::CreateTrace()
{
return nullptr;
}
SymbolEngine* Platform::CreateSymbolEngine()
{
return nullptr;
}
}
#endif //USE_OPTICK
#endif //__FreeBSD__

View file

@ -0,0 +1,653 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "optick.config.h"
#if USE_OPTICK
#include <mutex>
#include <thread>
#include "optick_common.h"
#include "optick_memory.h"
#include "optick_message.h"
#include "optick_serialization.h"
#include "optick_gpu.h"
#include <atomic>
// We expect to have 1k unique strings going through Optick at once
// The chances to hit a collision are 1 in 10 trillion (odds of a meteor landing on your house)
// We should be quite safe here :)
// https://preshing.com/20110504/hash-collision-probabilities/
// Feel free to add a seed and wait for another strike if armageddon starts
namespace Optick
{
struct StringHash
{
uint64 hash;
StringHash(size_t h) : hash(h) {}
StringHash(const char* str) : hash(CalcHash(str)) {}
bool operator==(const StringHash& other) const { return hash == other.hash; }
bool operator<(const StringHash& other) const { return hash < other.hash; }
static uint64 CalcHash(const char* str);
};
}
// Overriding default hash function to return hash value directly
namespace std
{
template<>
struct hash<Optick::StringHash>
{
size_t operator()(const Optick::StringHash& x) const
{
return (size_t)x.hash;
}
};
}
namespace Optick
{
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct Trace;
struct SymbolEngine;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct ScopeHeader
{
EventTime event;
uint32 boardNumber;
int32 threadNumber;
int32 fiberNumber;
FrameType::Type type;
ScopeHeader();
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
OutputDataStream& operator << ( OutputDataStream& stream, const ScopeHeader& ob);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct ScopeData
{
ScopeHeader header;
vector<EventData> categories;
vector<EventData> events;
ScopeData()
{
ResetHeader();
}
void AddEvent(const EventData& data)
{
events.push_back(data);
if (data.description->color != Color::Null)
{
categories.push_back(data);
}
}
void InitRootEvent(const EventData& data)
{
header.event.start = std::min(data.start, header.event.start);
header.event.finish = std::max(data.finish, header.event.finish);
AddEvent(data);
header.type = FrameType::NONE;
for (int i = 0; i < FrameType::COUNT; ++i)
if (GetFrameDescription((FrameType::Type)i) == data.description)
header.type = (FrameType::Type)i;
}
void ResetHeader();
void Send();
void Clear();
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if defined(OPTICK_MSVC)
#pragma warning( push )
#pragma warning( disable : 4996 )
#endif //OPTICK_MSVC
template<int N>
struct OptickString
{
char data[N];
OptickString() {}
OptickString<N>& operator=(const char* text) { strncpy(data, text ? text : "null", N - 1); data[N - 1] = 0; return *this; }
OptickString(const char* text) { *this = text; }
OptickString(const char* text, uint16_t length) { uint16_t maxLength = std::min((uint16_t)(N - 1), length); strncpy(data, text ? text : "null", maxLength); data[maxLength] = 0; }
};
#if defined(OPTICK_MSVC)
#pragma warning( pop )
#endif
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct Point
{
float x, y, z;
Point() {}
Point(float _x, float _y, float _z) : x(_x), y(_y), z(_z) {}
Point(float pos[3]) : x(pos[0]), y(pos[1]), z(pos[2]) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<int N>
OutputDataStream& operator<<(OutputDataStream &stream, const OptickString<N>& ob)
{
size_t length = strnlen(ob.data, N);
stream << (uint32)length;
return stream.Write(ob.data, length);
}
OutputDataStream& operator<<(OutputDataStream& stream, const Point& ob);
OutputDataStream& operator<<(OutputDataStream& stream, const ScopeData& ob);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
typedef MemoryPool<EventData, 1024> EventBuffer;
typedef MemoryPool<const EventData*, 32> CategoryBuffer;
typedef MemoryPool<SyncData, 1024> SynchronizationBuffer;
typedef MemoryPool<FiberSyncData, 1024> FiberSyncBuffer;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
typedef OptickString<32> ShortString;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
typedef TagData<float> TagFloat;
typedef TagData<int32> TagS32;
typedef TagData<uint32> TagU32;
typedef TagData<uint64> TagU64;
typedef TagData<Point> TagPoint;
typedef TagData<ShortString> TagString;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
typedef MemoryPool<TagFloat, 1024> TagFloatBuffer;
typedef MemoryPool<TagS32, 1024> TagS32Buffer;
typedef MemoryPool<TagU32, 1024> TagU32Buffer;
typedef MemoryPool<TagU64, 1024> TagU64Buffer;
typedef MemoryPool<TagPoint, 64> TagPointBuffer;
typedef MemoryPool<TagString, 1024> TagStringBuffer;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Base64
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
string base64_decode(string const& encoded_string);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Board
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
typedef MemoryPool<EventDescription, 4096> EventDescriptionList;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class EventDescriptionBoard
{
// List of stored Event Descriptions
EventDescriptionList boardDescriptions;
// Shared Descriptions
typedef unordered_map<StringHash, EventDescription*> DescriptionMap;
DescriptionMap sharedDescriptions;
MemoryBuffer<64 * 1024> sharedNames;
std::mutex sharedLock;
const char* CacheString(const char* text);
public:
EventDescription* CreateDescription(const char* name, const char* file = nullptr, uint32_t line = 0, uint32_t color = Color::Null, uint32_t filter = 0, uint8_t flags = 0);
EventDescription* CreateSharedDescription(const char* name, const char* file = nullptr, uint32_t line = 0, uint32_t color = Color::Null, uint32_t filter = 0);
static EventDescriptionBoard& Get();
const EventDescriptionList& GetEvents() const;
void Shutdown();
friend OutputDataStream& operator << (OutputDataStream& stream, const EventDescriptionBoard& ob);
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct EventStorage
{
Mode::Type currentMode;
EventBuffer eventBuffer;
FiberSyncBuffer fiberSyncBuffer;
TagFloatBuffer tagFloatBuffer;
TagS32Buffer tagS32Buffer;
TagU32Buffer tagU32Buffer;
TagU64Buffer tagU64Buffer;
TagPointBuffer tagPointBuffer;
TagStringBuffer tagStringBuffer;
struct GPUStorage
{
static const int MAX_GPU_NODES = 2;
array<array<EventBuffer, GPU_QUEUE_COUNT>, MAX_GPU_NODES> gpuBuffer;
GPUContext context;
void Clear(bool preserveMemory);
EventData* Start(const EventDescription& desc);
void Stop(EventData& data);
};
GPUStorage gpuStorage;
uint32 pushPopEventStackIndex;
array<EventData*, 32> pushPopEventStack;
bool isFiberStorage;
EventStorage();
OPTICK_INLINE EventData& NextEvent()
{
return eventBuffer.Add();
}
// Free all temporary memory
void Clear(bool preserveContent)
{
currentMode = Mode::OFF;
eventBuffer.Clear(preserveContent);
fiberSyncBuffer.Clear(preserveContent);
gpuStorage.Clear(preserveContent);
ClearTags(preserveContent);
while (pushPopEventStackIndex)
{
if (--pushPopEventStackIndex < pushPopEventStack.size())
pushPopEventStack[pushPopEventStackIndex] = nullptr;
}
}
void ClearTags(bool preserveContent)
{
tagFloatBuffer.Clear(preserveContent);
tagS32Buffer.Clear(preserveContent);
tagU32Buffer.Clear(preserveContent);
tagU64Buffer.Clear(preserveContent);
tagPointBuffer.Clear(preserveContent);
tagStringBuffer.Clear(preserveContent);
}
void Reset()
{
Clear(true);
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct ProcessDescription
{
string name;
ProcessID processID;
uint64 uniqueKey;
ProcessDescription(const char* processName, ProcessID pid, uint64 key);
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct ThreadDescription
{
string name;
ThreadID threadID;
ProcessID processID;
int32 maxDepth;
int32 priority;
uint32 mask;
bool operator==(const ThreadDescription& other) const { return name == other.name && threadID == other.threadID && processID == other.processID; }
ThreadDescription(const char* threadName, ThreadID tid, ProcessID pid, int32 maxDepth = 1, int32 priority = 0, uint32 mask = 0);
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct FiberDescription
{
uint64 id;
FiberDescription(uint64 _id)
: id(_id)
{}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct ThreadEntry
{
ThreadDescription description;
EventStorage storage;
EventStorage** threadTLS;
bool isAlive;
ThreadEntry(const ThreadDescription& desc, EventStorage** tls) : description(desc), threadTLS(tls), isAlive(true) {}
// RB: see Fix for crash on stop capture #1
// https://github.com/ulricheck/optick/pull/1/commits/1e5e1919816a64f235caa0f4b0bf20495225b1fa
~ThreadEntry()
{
if((*threadTLS)!=nullptr)
{
*threadTLS = nullptr;
}
}
void Activate(Mode::Type mode);
void Sort();
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct FiberEntry
{
FiberDescription description;
EventStorage storage;
FiberEntry(const FiberDescription& desc) : description(desc) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
typedef vector<ThreadEntry*> ThreadList;
typedef vector<FiberEntry*> FiberList;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct SysCallData : EventData
{
uint64 id;
uint64 threadID;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
OutputDataStream &operator << (OutputDataStream &stream, const SysCallData &ob);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class SysCallCollector
{
typedef MemoryPool<SysCallData, 1024 * 32> SysCallPool;
public:
SysCallPool syscallPool;
SysCallData& Add();
void Clear();
bool Serialize(OutputDataStream& stream);
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct CallstackDesc
{
uint64 threadID;
uint64 timestamp;
uint64* callstack;
uint8 count;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class CallstackCollector
{
// Packed callstack list: {ThreadID, Timestamp, Count, Callstack[Count]}
typedef MemoryPool<uint64, 1024 * 32> CallstacksPool;
CallstacksPool callstacksPool;
public:
void Add(const CallstackDesc& desc);
void Clear();
bool SerializeModules(OutputDataStream& stream);
bool SerializeSymbols(OutputDataStream& stream);
bool SerializeCallstacks(OutputDataStream& stream);
bool IsEmpty() const;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct SwitchContextDesc
{
int64_t timestamp;
uint64 oldThreadId;
uint64 newThreadId;
uint8 cpuId;
uint8 reason;
};
//////////////////////////////////////////////////////////////////////////
OutputDataStream &operator << (OutputDataStream &stream, const SwitchContextDesc &ob);
//////////////////////////////////////////////////////////////////////////
class SwitchContextCollector
{
typedef MemoryPool<SwitchContextDesc, 1024 * 32> SwitchContextPool;
SwitchContextPool switchContextPool;
public:
void Add(const SwitchContextDesc& desc);
void Clear();
bool Serialize(OutputDataStream& stream);
};
//////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct CaptureStatus
{
enum Type
{
OK = 0,
ERR_TRACER_ALREADY_EXISTS = 1,
ERR_TRACER_ACCESS_DENIED = 2,
ERR_TRACER_FAILED = 3,
ERR_TRACER_INVALID_PASSWORD = 4,
ERR_TRACER_NOT_IMPLEMENTED = 5,
};
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct FrameData : public EventData
{
uint64_t threadID;
FrameData() : threadID(INVALID_THREAD_ID) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
typedef MemoryPool<FrameData, 128> FrameBuffer;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct FrameStorage
{
const EventDescription* m_Description;
FrameBuffer m_Frames;
std::atomic<uint32_t> m_FrameNumber;
void Clear(bool preserveMemory = true)
{
m_Frames.Clear(preserveMemory);
}
FrameStorage() : m_Description(nullptr) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class Core
{
std::recursive_mutex coreLock;
std::recursive_mutex threadsLock;
ThreadList threads;
FiberList fibers;
int64 progressReportedLastTimestampMS;
array<FrameStorage, FrameType::COUNT> frames;
uint32 boardNumber;
CallstackCollector callstackCollector;
SwitchContextCollector switchContextCollector;
vector<std::pair<string, string>> summary;
struct Attachment
{
string name;
vector<uint8_t> data;
File::Type type;
Attachment(File::Type t, const char* n) : name(n), type(t) {}
};
list<Attachment> attachments;
StateCallback stateCallback;
vector<ProcessDescription> processDescs;
vector<ThreadDescription> threadDescs;
State::Type currentState;
State::Type pendingState;
CaptureSettings settings;
uint32 forcedMainThreadIndex;
void UpdateEvents();
bool UpdateState();
uint32_t BeginUpdateFrame(FrameType::Type frame, int64_t timestamp, uint64_t threadID);
uint32_t EndUpdateFrame(FrameType::Type frame, int64_t timestamp, uint64_t threadID);
Core();
~Core();
void DumpCapturingProgress();
void SendHandshakeResponse(CaptureStatus::Type status);
void DumpEvents(EventStorage& entry, const EventTime& timeSlice, ScopeData& scope);
void DumpTags(EventStorage& entry, ScopeData& scope);
void DumpThread(ThreadEntry& entry, const EventTime& timeSlice, ScopeData& scope);
void DumpFiber(FiberEntry& entry, const EventTime& timeSlice, ScopeData& scope);
void CleanupThreadsAndFibers();
void DumpBoard(uint32 mode, EventTime timeSlice);
void GenerateCommonSummary();
public:
void Activate(Mode::Type mode);
volatile Mode::Type currentMode;
volatile Mode::Type previousMode;
// Active Frame (is used as buffer)
static OPTICK_THREAD_LOCAL EventStorage* storage;
// Resolves symbols
SymbolEngine* symbolEngine;
// Controls GPU activity
// Graphics graphics;
// System scheduler trace
Trace* tracer;
// SysCall Collector
SysCallCollector syscallCollector;
// GPU Profiler
GPUProfiler* gpuProfiler;
// Returns thread collection
const vector<ThreadEntry*>& GetThreads() const;
// Request to start a new capture
void StartCapture();
// Request to stop an active capture
void StopCapture();
// Request to stop an active capture
void CancelCapture();
// Requests to dump current capture
void DumpCapture();
// Report switch context event
bool ReportSwitchContext(const SwitchContextDesc& desc);
// Report switch context event
bool ReportStackWalk(const CallstackDesc& desc);
// Serialize and send current profiling progress
void DumpProgress(const char* message = "");
void DumpProgressFormatted(const char* format, ...);
// Too much time from last report
bool IsTimeToReportProgress() const;
// Serialize and send frames
void DumpFrames(uint32 mode = Mode::DEFAULT);
// Serialize and send frames
void DumpSummary();
// Registers thread and create EventStorage
ThreadEntry* RegisterThread(const ThreadDescription& description, EventStorage** slot);
// UnRegisters thread
bool UnRegisterThread(ThreadID threadId, bool keepAlive = false);
// Check is registered thread
bool IsRegistredThread(ThreadID id);
// Registers finer and create EventStorage
bool RegisterFiber(const FiberDescription& description, EventStorage** slot);
// Registers ProcessDescription
bool RegisterProcessDescription(const ProcessDescription& description);
// Registers ThreaDescription (used for threads from other processes)
bool RegisterThreadDescription(const ThreadDescription& description);
// Sets state change callback
bool SetStateChangedCallback(StateCallback cb);
// Attaches a key-value pair to the next capture
bool AttachSummary(const char* key, const char* value);
// Attaches a screenshot to the current capture
bool AttachFile(File::Type type, const char* name, const uint8_t* data, uint32_t size);
bool AttachFile(File::Type type, const char* name, std::istream& stream);
bool AttachFile(File::Type type, const char* name, const char* path);
bool AttachFile(File::Type type, const char* name, const wchar_t* path);
// Initalizes GPU profiler
void InitGPUProfiler(GPUProfiler* profiler);
// Initializes root password for the device
bool SetSettings(const CaptureSettings& settings);
// Current Frame Number (since the game started)
uint32_t GetCurrentFrame(FrameType::Type frameType) const { return frames[frameType].m_FrameNumber; }
// Returns Frame Description
const EventDescription* GetFrameDescription(FrameType::Type frame) const;
// Main Update Function
void Update();
// Full Destruction
void Shutdown();
// Frame Flip functions
static uint32_t BeginFrame(FrameType::Type frame, int64_t timestamp, uint64_t threadID) { return Get().BeginUpdateFrame(frame, timestamp, threadID); }
static uint32_t EndFrame(FrameType::Type frame, int64_t timestamp, uint64_t threadID) { return Get().EndUpdateFrame(frame, timestamp, threadID); }
// Initialize Main ThreadID
void SetMainThreadID(uint64_t threadID);
// NOT Thread Safe singleton (performance)
static Core& Get();
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif //USE_OPTICK

View file

@ -0,0 +1,446 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#if defined(__linux__)
#include "optick.config.h"
#if USE_OPTICK
#include "optick_core.platform.h"
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <pthread.h>
#include <unistd.h>
namespace Optick
{
const char* Platform::GetName()
{
#if defined(__ANDROID__)
return "Android";
#else
return "Linux";
#endif
}
ThreadID Platform::GetThreadID()
{
return syscall(SYS_gettid);
}
ProcessID Platform::GetProcessID()
{
return (ProcessID)getpid();
}
int64 Platform::GetFrequency()
{
return 1000000000;
}
int64 Platform::GetTime()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec * 1000000000LL + ts.tv_nsec;
}
}
#if OPTICK_ENABLE_TRACING
#include "optick_memory.h"
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
namespace ft
{
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct base_event
{
int64_t timestamp;
short common_type;
uint8_t cpu_id;
base_event(short type) : timestamp(-1), common_type(type), cpu_id(uint8_t(-1)) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<short TYPE>
struct event : public base_event
{
static const short type = TYPE;
event() : base_event(TYPE) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct process_state
{
enum type
{
Unknown,
//D Uninterruptible sleep(usually IO)
UninterruptibleSleep,
//R Running or runnable(on run queue)
Running,
//S Interruptible sleep(waiting for an event to complete)
InterruptibleSleep,
//T Stopped, either by a job control signal or because it is being traced.
Stopped,
//X dead(should never be seen)
Dead,
//Z Defunct(“zombie”) process, terminated but not reaped by its parent.
Zombie,
};
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct sched_switch : public event<305>
{
char prev_comm[16];
pid_t prev_pid;
int prev_prio;
process_state::type prev_state;
char next_comm[16];
pid_t next_pid;
int next_prio;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
} // namespace ft
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
namespace Optick
{
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
static const char* KERNEL_TRACING_PATH = "/sys/kernel/debug/tracing";
static const char* FTRACE_TRACE = "trace";
static const char* FTRACE_TRACING_ON = "tracing_on";
static const char* FTRACE_TRACE_CLOCK = "trace_clock";
static const char* FTRACE_OPTIONS_IRQ_INFO = "options/irq-info";
static const char* FTRACE_SCHED_SWITCH = "events/sched/sched_switch/enable";
static const uint8_t PROCESS_STATE_REASON_START = 38;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class FTrace : public Trace
{
bool isActive;
string password;
unordered_set<pid_t> pidCache;
bool Parse(const char* line);
bool ProcessEvent(const ft::base_event& ev);
bool Set(const char* name, bool value);
bool Set(const char* name, const char* value);
bool Exec(const char* cmd);
public:
FTrace();
~FTrace();
virtual void SetPassword(const char* pwd) override { password = pwd; }
virtual CaptureStatus::Type Start(Mode::Type mode, int frequency, const ThreadList& threads) override;
virtual bool Stop() override;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct Parser
{
const char* cursor;
const char* finish;
size_t length;
Parser(const char* b) : cursor(b), finish(b + strlen(b)) {}
bool Skip(size_t count)
{
if ((size_t)(finish - cursor) > count)
{
cursor += count;
return true;
}
return false;
}
bool Skip(const char* text, char* output = nullptr, size_t size = 0)
{
if (const char* ptr = strstr(cursor, text))
{
if (output != nullptr)
{
size_t count = std::min(size - 1, (size_t)(ptr - cursor));
strncpy(output, cursor, count);
output[count] = '\0';
}
cursor = ptr + strlen(text);
return true;
}
return false;
}
void SkipSpaces()
{
while (cursor != finish && (*cursor == ' ' || *cursor == '\t' || *cursor == '\n'))
++cursor;
}
bool Starts(const char* text) const
{
return strncmp(cursor, text, strlen(text)) == 0;
}
int GetInt() const
{
return atoi(cursor);
}
char GetChar() const
{
return *cursor;
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
CaptureStatus::Type FTrace::Start(Mode::Type mode, int /*frequency*/, const ThreadList& /*threads*/)
{
if (!isActive)
{
// Disable tracing
if (!Set(FTRACE_TRACING_ON, false))
return CaptureStatus::ERR_TRACER_INVALID_PASSWORD;
// Cleanup old data
Set(FTRACE_TRACE, "");
// Set clock type
Set(FTRACE_TRACE_CLOCK, "mono");
// Disable irq info
Set(FTRACE_OPTIONS_IRQ_INFO, false);
// Enable switch events
Set(FTRACE_SCHED_SWITCH, (mode & Mode::SWITCH_CONTEXT) != 0);
// Enable tracing
Set(FTRACE_TRACING_ON, true);
isActive = true;
}
return CaptureStatus::OK;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
bool FTrace::Stop()
{
if (!isActive)
{
return false;
}
// Reset variables
Set(FTRACE_TRACING_ON, false);
Set(FTRACE_SCHED_SWITCH, false);
// Parsing the output
char buffer[256] = { 0 };
sprintf_s(buffer, "echo \'%s\' | sudo -S sh -c \'cat %s/%s\'", password.c_str(), KERNEL_TRACING_PATH, FTRACE_TRACE);
if (FILE* pipe = popen(buffer, "r"))
{
char* line = NULL;
size_t len = 0;
while ((getline(&line, &len, pipe)) != -1)
Parse(line);
pclose(pipe);
}
// Cleanup data
Set(FTRACE_TRACE, "");
pidCache.clear();
isActive = false;
return true;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
bool FTrace::Parse(const char * line)
{
// sched_switch:
// ConsoleApp-8687 [000] 181944.352057: sched_switch: prev_comm=ConsoleApp prev_pid=8687 prev_prio=120 prev_state=S ==> next_comm=ConsoleApp next_pid=8686 next_prio=120
Parser p(line);
if (p.Starts("#"))
return true;
if (!p.Skip(16))
return false;
if (!p.Skip("["))
return false;
int cpu = p.GetInt();
if (!p.Skip("]"))
return false;
int64 timestampInt = p.GetInt();
if (!p.Skip("."))
return false;
int64 timestampFraq = p.GetInt();
if (!p.Skip(": "))
return false;
int64 timestamp = ((timestampInt * 1000000) + timestampFraq) * 1000;
if (p.Starts("sched_switch:"))
{
ft::sched_switch ev;
ev.cpu_id = cpu;
ev.timestamp = timestamp;
if (!p.Skip("prev_comm="))
return false;
if (!p.Skip(" prev_pid=", ev.prev_comm, OPTICK_ARRAY_SIZE(ev.prev_comm)))
return false;
ev.prev_pid = p.GetInt();
if (!p.Skip(" prev_prio="))
return false;
ev.prev_prio = p.GetInt();
if (!p.Skip(" prev_state="))
return false;
switch (p.GetChar())
{
case 'D':
ev.prev_state = ft::process_state::UninterruptibleSleep;
break;
case 'R':
ev.prev_state = ft::process_state::Running;
break;
case 'S':
ev.prev_state = ft::process_state::InterruptibleSleep;
break;
case 'T':
ev.prev_state = ft::process_state::Stopped;
break;
case 'X':
ev.prev_state = ft::process_state::Dead;
break;
case 'Z':
ev.prev_state = ft::process_state::Zombie;
break;
default:
ev.prev_state = ft::process_state::Unknown;
break;
}
if (!p.Skip("==> next_comm="))
return false;
if (!p.Skip(" next_pid=", ev.next_comm, OPTICK_ARRAY_SIZE(ev.prev_comm)))
return false;
ev.next_pid = p.GetInt();
if (!p.Skip(" next_prio="))
return false;
ev.next_prio = p.GetInt();
return ProcessEvent(ev);
}
return true;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
bool FTrace::ProcessEvent(const ft::base_event& ev)
{
switch (ev.common_type)
{
case ft::sched_switch::type:
{
const ft::sched_switch& switchEv = (const ft::sched_switch&)ev;
SwitchContextDesc desc;
desc.reason = switchEv.prev_state + PROCESS_STATE_REASON_START;
desc.cpuId = switchEv.cpu_id;
desc.oldThreadId = (uint64)switchEv.prev_pid;
desc.newThreadId = (uint64)switchEv.next_pid;
desc.timestamp = switchEv.timestamp;
Core::Get().ReportSwitchContext(desc);
if (pidCache.find(switchEv.next_pid) == pidCache.end())
{
pidCache.insert(switchEv.next_pid);
Core::Get().RegisterThreadDescription(ThreadDescription(switchEv.next_comm, (ThreadID)switchEv.next_pid, (ProcessID)switchEv.next_pid, switchEv.next_prio));
}
return true;
}
break;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
bool FTrace::Set(const char * name, bool value)
{
return Set(name, value ? "1" : "0");
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
bool FTrace::Set(const char* name, const char* value)
{
char buffer[256] = { 0 };
sprintf_s(buffer, "echo %s > %s/%s", value, KERNEL_TRACING_PATH, name);
return Exec(buffer);
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
bool FTrace::Exec(const char* cmd)
{
char buffer[256] = { 0 };
sprintf_s(buffer, "echo \'%s\' | sudo -S sh -c \'%s\' 2> /dev/null", password.c_str(), cmd);
return std::system(buffer) == 0;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
FTrace::FTrace() : isActive(false)
{
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
FTrace::~FTrace()
{
Stop();
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Trace* Platform::CreateTrace()
{
return Memory::New<FTrace>();
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
SymbolEngine* Platform::CreateSymbolEngine()
{
return nullptr;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif //OPTICK_ENABLE_TRACING
#endif //USE_OPTICK
#endif //__linux__

View file

@ -0,0 +1,309 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#if defined(__APPLE_CC__)
#include "optick.config.h"
#if USE_OPTICK
#include "optick_core.platform.h"
#include <mach/mach_time.h>
#include <sys/time.h>
#include <sys/types.h>
#include <pthread.h>
#include <unistd.h>
namespace Optick
{
const char* Platform::GetName()
{
return "MacOS";
}
ThreadID Platform::GetThreadID()
{
uint64_t tid;
pthread_threadid_np(pthread_self(), &tid);
return tid;
}
ProcessID Platform::GetProcessID()
{
return (ProcessID)getpid();
}
int64 Platform::GetFrequency()
{
return 1000000000;
}
int64 Platform::GetTime()
{
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return ts.tv_sec * 1000000000LL + ts.tv_nsec;
}
}
#if OPTICK_ENABLE_TRACING
#include "optick_core.h"
namespace Optick
{
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class DTrace : public Trace
{
static const bool isSilent = true;
std::thread processThread;
string password;
enum State
{
STATE_IDLE,
STATE_RUNNING,
STATE_ABORT,
};
volatile State state;
volatile int64 timeout;
struct CoreState
{
ProcessID pid;
ThreadID tid;
int prio;
bool IsValid() const { return tid != INVALID_THREAD_ID; }
CoreState() : pid(INVALID_PROCESS_ID), tid(INVALID_THREAD_ID), prio(0) {}
};
static const int MAX_CPU_CORES = 256;
array<CoreState, MAX_CPU_CORES> cores;
static void AsyncProcess(DTrace* trace);
void Process();
bool CheckRootAccess();
enum ParseResult
{
PARSE_OK,
PARSE_TIMEOUT,
PARSE_FAILED,
};
ParseResult Parse(const char* line);
public:
DTrace();
virtual void SetPassword(const char* pwd) override { password = pwd; }
virtual CaptureStatus::Type Start(Mode::Type mode, int frequency, const ThreadList& threads) override;
virtual bool Stop() override;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
DTrace::DTrace() : state(STATE_IDLE), timeout(0)
{
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
bool DTrace::CheckRootAccess()
{
char cmd[256] = { 0 };
sprintf_s(cmd, "echo \'%s\' | sudo -S echo %s", password.c_str(), isSilent ? "2> /dev/null" : "");
return system(cmd) == 0;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
CaptureStatus::Type DTrace::Start(Mode::Type mode, int /*frequency*/, const ThreadList& /*threads*/)
{
if (state == STATE_IDLE && (mode & Mode::SWITCH_CONTEXT) != 0)
{
if (!CheckRootAccess())
return CaptureStatus::ERR_TRACER_INVALID_PASSWORD;
state = STATE_RUNNING;
timeout = INT64_MAX;
cores.fill(CoreState());
processThread = std::thread(AsyncProcess, this);
}
return CaptureStatus::OK;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
bool DTrace::Stop()
{
if (state != STATE_RUNNING)
{
return false;
}
timeout = Platform::GetTime();
processThread.join();
state = STATE_IDLE;
return true;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
FILE* popen2(const char *program, const char *type, pid_t* outPid)
{
FILE *iop;
int pdes[2];
pid_t pid;
if ((*type != 'r' && *type != 'w') || type[1] != '\0') {
errno = EINVAL;
return (NULL);
}
if (pipe(pdes) < 0) {
return (NULL);
}
switch (pid = fork()) {
case -1: /* Error. */
(void)close(pdes[0]);
(void)close(pdes[1]);
return (NULL);
/* NOTREACHED */
case 0: /* Child. */
{
if (*type == 'r') {
(void)close(pdes[0]);
if (pdes[1] != STDOUT_FILENO) {
(void)dup2(pdes[1], STDOUT_FILENO);
(void)close(pdes[1]);
}
}
else {
(void)close(pdes[1]);
if (pdes[0] != STDIN_FILENO) {
(void)dup2(pdes[0], STDIN_FILENO);
(void)close(pdes[0]);
}
}
execl("/bin/sh", "sh", "-c", program, NULL);
perror("execl");
exit(1);
/* NOTREACHED */
}
}
/* Parent; assume fdopen can't fail. */
if (*type == 'r') {
iop = fdopen(pdes[0], type);
(void)close(pdes[1]);
}
else {
iop = fdopen(pdes[1], type);
(void)close(pdes[0]);
}
if (outPid)
*outPid = pid;
return (iop);
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void DTrace::Process()
{
const char* command = "dtrace -n fbt::thread_dispatch:return'\\''{printf(\"@%d %d %d %d\", pid, tid, curthread->sched_pri, walltimestamp)}'\\''";
char buffer[256] = { 0 };
sprintf_s(buffer, "echo \'%s\' | sudo -S sh -c \'%s\' %s", password.c_str(), command, isSilent ? "2> /dev/null" : "");
pid_t pid;
if (FILE* pipe = popen2(buffer, "r", &pid))
{
char* line = NULL;
size_t len = 0;
while (state == STATE_RUNNING && (getline(&line, &len, pipe)) != -1)
{
if (Parse(line) == PARSE_TIMEOUT)
break;
}
fclose(pipe);
int internal_stat;
waitpid(pid, &internal_stat, 0);
}
else
{
OPTICK_FAILED("Failed to open communication pipe!");
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
DTrace::ParseResult DTrace::Parse(const char* line)
{
if (const char* cmd = strchr(line, '@'))
{
int cpu = atoi(line);
CoreState currState;
currState.pid = atoi(cmd + 1);
cmd = strchr(cmd, ' ') + 1;
currState.tid = atoi(cmd);
cmd = strchr(cmd, ' ') + 1;
currState.prio = atoi(cmd);
cmd = strchr(cmd, ' ') + 1;
int64_t timestamp = (int64_t)atoll(cmd);
if (timestamp > timeout)
return PARSE_TIMEOUT;
const CoreState& prevState = cores[cpu];
if (prevState.IsValid())
{
SwitchContextDesc desc;
desc.reason = 0;
desc.cpuId = cpu;
desc.oldThreadId = prevState.tid;
desc.newThreadId = currState.tid;
desc.timestamp = timestamp;
Core::Get().ReportSwitchContext(desc);
}
cores[cpu] = currState;
}
return PARSE_FAILED;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void DTrace::AsyncProcess(DTrace *trace) {
trace->Process();
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Trace* Platform::CreateTrace()
{
return Memory::New<DTrace>();
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
SymbolEngine* Platform::CreateSymbolEngine()
{
return nullptr;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif //OPTICK_ENABLE_TRACING
#endif //USE_OPTICK
#endif //__APPLE_CC__

View file

@ -0,0 +1,114 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "optick.config.h"
#if USE_OPTICK
#include "optick_common.h"
#include "optick_memory.h"
//////////////////////////////////////////////////////////////////////////
// Platform-specific stuff
//////////////////////////////////////////////////////////////////////////
namespace Optick
{
struct Trace;
struct Module;
struct Symbol;
struct SymbolEngine;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Platform API
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct Platform
{
// Platform Name
static OPTICK_INLINE const char* GetName();
// Thread ID (system thread id)
static OPTICK_INLINE ThreadID GetThreadID();
// Process ID
static OPTICK_INLINE ProcessID GetProcessID();
// CPU Frequency
static OPTICK_INLINE int64 GetFrequency();
// CPU Time (Ticks)
static OPTICK_INLINE int64 GetTime();
// System Tracer
static OPTICK_INLINE Trace* CreateTrace();
// Symbol Resolver
static OPTICK_INLINE SymbolEngine* CreateSymbolEngine();
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Tracing API
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct Trace
{
virtual void SetPassword(const char* /*pwd*/) {};
virtual CaptureStatus::Type Start(Mode::Type mode, int frequency, const ThreadList& threads) = 0;
virtual bool Stop() = 0;
virtual ~Trace() {};
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Symbol API
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct Module
{
string path;
void* address;
size_t size;
Module(const char* p, void* a, size_t s) : path(p), address(a), size(s) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct Symbol
{
uint64 address;
uint64 offset;
wstring file;
wstring function;
uint32 line;
Symbol()
: address(0)
, offset(0)
, line(0)
{}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct SymbolEngine
{
// Get list of loaded modules
virtual const vector<Module>& GetModules() = 0;
// Get Symbol from address
virtual const Symbol* GetSymbol(uint64 dwAddress) = 0;
virtual ~SymbolEngine() {};
};
}
//////////////////////////////////////////////////////////////////////////
#endif //USE_OPTICK

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,157 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "optick_gpu.h"
#if USE_OPTICK
#include "optick_core.h"
#include "optick_memory.h"
#include <thread>
namespace Optick
{
static_assert((1ULL << 32) % GPUProfiler::MAX_QUERIES_COUNT == 0, "(1 << 32) should be a multiple of MAX_QUERIES_COUNT to handle query index overflow!");
GPUProfiler::GPUProfiler() : currentState(STATE_OFF), currentNode(0), frameNumber(0)
{
}
void GPUProfiler::InitNode(const char *nodeName, uint32_t nodeIndex)
{
Node* node = Memory::New<Node>();
for (int i = 0; i < GPU_QUEUE_COUNT; ++i)
{
char name[128] = { 0 };
sprintf_s(name, "%s [%s]", nodeName, GetGPUQueueName((GPUQueueType)i));
node->gpuEventStorage[i] = RegisterStorage(name, uint64_t(-1), ThreadMask::GPU);
node->name = nodeName;
}
nodes[nodeIndex] = node;
}
void GPUProfiler::Start(uint32 /*mode*/)
{
std::lock_guard<std::recursive_mutex> lock(updateLock);
Reset();
currentState = STATE_STARTING;
}
void GPUProfiler::Stop(uint32 /*mode*/)
{
std::lock_guard<std::recursive_mutex> lock(updateLock);
currentState = STATE_OFF;
}
void GPUProfiler::Dump(uint32 /*mode*/)
{
for (size_t nodeIndex = 0; nodeIndex < nodes.size(); ++nodeIndex)
{
Node* node = nodes[nodeIndex];
for (int queueIndex = 0; queueIndex < GPU_QUEUE_COUNT; ++queueIndex)
{
EventBuffer& gpuBuffer = node->gpuEventStorage[queueIndex]->eventBuffer;
const vector<ThreadEntry*>& threads = Core::Get().GetThreads();
for (size_t threadIndex = 0; threadIndex < threads.size(); ++threadIndex)
{
ThreadEntry* thread = threads[threadIndex];
thread->storage.gpuStorage.gpuBuffer[nodeIndex][queueIndex].ForEachChunk([&gpuBuffer](const EventData* events, int count)
{
gpuBuffer.AddRange(events, count);
});
}
}
}
}
string GPUProfiler::GetName() const
{
return !nodes.empty() ? nodes[0]->name : string();
}
GPUProfiler::~GPUProfiler()
{
for (Node* node : nodes)
Memory::Delete(node);
nodes.clear();
}
void GPUProfiler::Reset()
{
for (uint32_t nodeIndex = 0; nodeIndex < nodes.size(); ++nodeIndex)
{
Node& node = *nodes[nodeIndex];
node.Reset();
node.clock = GetClockSynchronization(nodeIndex);
}
}
EventData& GPUProfiler::AddFrameEvent()
{
static const EventDescription* GPUFrameDescription = EventDescription::Create("GPU Frame", __FILE__, __LINE__);
EventData& event = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_GRAPHICS]->eventBuffer.Add();
event.description = GPUFrameDescription;
event.start = EventTime::INVALID_TIMESTAMP;
event.finish = EventTime::INVALID_TIMESTAMP;
return event;
}
EventData& GPUProfiler::AddVSyncEvent()
{
static const EventDescription* VSyncDescription = EventDescription::Create("VSync", __FILE__, __LINE__);
EventData& event = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_VSYNC]->eventBuffer.Add();
event.description = VSyncDescription;
event.start = EventTime::INVALID_TIMESTAMP;
event.finish = EventTime::INVALID_TIMESTAMP;
return event;
}
TagData<uint32>& GPUProfiler::AddFrameTag()
{
static const EventDescription* FrameTagDescription = EventDescription::CreateShared("Frame");
TagData<uint32>& tag = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_GRAPHICS]->tagU32Buffer.Add();
tag.description = FrameTagDescription;
tag.timestamp = EventTime::INVALID_TIMESTAMP;
tag.data = Core::Get().GetCurrentFrame(FrameType::CPU);
return tag;
}
const char * GetGPUQueueName(GPUQueueType queue)
{
const char* GPUQueueToName[GPU_QUEUE_COUNT] = { "Graphics", "Compute", "Transfer", "VSync" };
return GPUQueueToName[queue];
}
void GPUProfiler::Node::Reset()
{
queryIndex = 0;
for (size_t frameIndex = 0; frameIndex < queryGpuframes.size(); ++frameIndex)
queryGpuframes[frameIndex].Reset();
}
}
#endif //USE_OPTICK

View file

@ -0,0 +1,402 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "optick.config.h"
#if USE_OPTICK
#if OPTICK_ENABLE_GPU_D3D12
#include "optick_common.h"
#include "optick_memory.h"
#include "optick_core.h"
#include "optick_gpu.h"
#include <atomic>
#include <thread>
#include <d3d12.h>
#include <dxgi.h>
#include <dxgi1_4.h>
#define OPTICK_CHECK(args) do { HRESULT __hr = args; (void)__hr; OPTICK_ASSERT(__hr == S_OK, "Failed check"); } while(false);
namespace Optick
{
class GPUProfilerD3D12 : public GPUProfiler
{
struct Frame
{
ID3D12CommandAllocator* commandAllocator;
ID3D12GraphicsCommandList* commandList;
Frame() : commandAllocator(nullptr), commandList(nullptr)
{
Reset();
}
void Reset()
{
}
void Shutdown();
~Frame()
{
Shutdown();
}
};
struct NodePayload
{
ID3D12CommandQueue* commandQueue;
ID3D12QueryHeap* queryHeap;
ID3D12Fence* syncFence;
array<Frame, NUM_FRAMES_DELAY> frames;
NodePayload() : commandQueue(nullptr), queryHeap(nullptr), syncFence(nullptr) {}
~NodePayload();
};
vector<NodePayload*> nodePayloads;
ID3D12Resource* queryBuffer;
ID3D12Device* device;
// VSync Stats
DXGI_FRAME_STATISTICS prevFrameStatistics;
//void UpdateRange(uint32_t start, uint32_t finish)
void InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue);
void ResolveTimestamps(uint32_t startIndex, uint32_t count);
void WaitForFrame(uint64_t frameNumber);
public:
GPUProfilerD3D12();
~GPUProfilerD3D12();
void InitDevice(ID3D12Device* pDevice, ID3D12CommandQueue** pCommandQueues, uint32_t numCommandQueues);
void QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp);
void Flip(IDXGISwapChain* swapChain);
// Interface implementation
ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) override;
void QueryTimestamp(void* context, int64_t* outCpuTimestamp) override
{
QueryTimestamp((ID3D12GraphicsCommandList*)context, outCpuTimestamp);
}
void Flip(void* swapChain) override
{
Flip(static_cast<IDXGISwapChain*>(swapChain));
}
};
template <class T> void SafeRelease(T **ppT)
{
if (*ppT)
{
(*ppT)->Release();
*ppT = NULL;
}
}
void InitGpuD3D12(ID3D12Device* device, ID3D12CommandQueue** cmdQueues, uint32_t numQueues)
{
GPUProfilerD3D12* gpuProfiler = Memory::New<GPUProfilerD3D12>();
gpuProfiler->InitDevice(device, cmdQueues, numQueues);
Core::Get().InitGPUProfiler(gpuProfiler);
}
GPUProfilerD3D12::GPUProfilerD3D12() : queryBuffer(nullptr), device(nullptr)
{
prevFrameStatistics = { 0 };
}
GPUProfilerD3D12::~GPUProfilerD3D12()
{
for (NodePayload* payload : nodePayloads)
Memory::Delete(payload);
nodePayloads.clear();
for (Node* node : nodes)
Memory::Delete(node);
nodes.clear();
SafeRelease(&queryBuffer);
}
void GPUProfilerD3D12::InitDevice(ID3D12Device* pDevice, ID3D12CommandQueue** pCommandQueues, uint32_t numCommandQueues)
{
device = pDevice;
uint32_t nodeCount = numCommandQueues; // device->GetNodeCount();
nodes.resize(nodeCount);
nodePayloads.resize(nodeCount);
D3D12_HEAP_PROPERTIES heapDesc;
heapDesc.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heapDesc.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
heapDesc.CreationNodeMask = 0;
heapDesc.VisibleNodeMask = (1u << nodeCount) - 1u;
heapDesc.Type = D3D12_HEAP_TYPE_READBACK;
D3D12_RESOURCE_DESC resourceDesc;
resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
resourceDesc.Alignment = 0;
resourceDesc.Width = MAX_QUERIES_COUNT * sizeof(int64_t);
resourceDesc.Height = 1;
resourceDesc.DepthOrArraySize = 1;
resourceDesc.MipLevels = 1;
resourceDesc.Format = DXGI_FORMAT_UNKNOWN;
resourceDesc.SampleDesc.Count = 1;
resourceDesc.SampleDesc.Quality = 0;
resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
OPTICK_CHECK(device->CreateCommittedResource(
&heapDesc,
D3D12_HEAP_FLAG_NONE,
&resourceDesc,
D3D12_RESOURCE_STATE_COPY_DEST,
nullptr,
IID_PPV_ARGS(&queryBuffer)));
// Get Device Name
LUID adapterLUID = pDevice->GetAdapterLuid();
IDXGIFactory4* factory;
OPTICK_CHECK(CreateDXGIFactory2(0, IID_PPV_ARGS(&factory)));
IDXGIAdapter1* adapter;
factory->EnumAdapterByLuid(adapterLUID, IID_PPV_ARGS(&adapter));
DXGI_ADAPTER_DESC1 desc;
adapter->GetDesc1(&desc);
adapter->Release();
factory->Release();
char deviceName[128] = { 0 };
wcstombs_s(deviceName, desc.Description, OPTICK_ARRAY_SIZE(deviceName) - 1);
for (uint32_t nodeIndex = 0; nodeIndex < nodeCount; ++nodeIndex)
InitNodeInternal(deviceName, nodeIndex, pCommandQueues[nodeIndex]);
}
void GPUProfilerD3D12::InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue)
{
GPUProfiler::InitNode(nodeName, nodeIndex);
NodePayload* node = Memory::New<NodePayload>();
nodePayloads[nodeIndex] = node;
node->commandQueue = pCmdQueue;
D3D12_QUERY_HEAP_DESC queryHeapDesc;
queryHeapDesc.Count = MAX_QUERIES_COUNT;
queryHeapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
queryHeapDesc.NodeMask = 1u << nodeIndex;
OPTICK_CHECK(device->CreateQueryHeap(&queryHeapDesc, IID_PPV_ARGS(&node->queryHeap)));
OPTICK_CHECK(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&node->syncFence)));
for (Frame& frame : node->frames)
{
OPTICK_CHECK(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&frame.commandAllocator)));
OPTICK_CHECK(device->CreateCommandList(1u << nodeIndex, D3D12_COMMAND_LIST_TYPE_DIRECT, frame.commandAllocator, nullptr, IID_PPV_ARGS(&frame.commandList)));
OPTICK_CHECK(frame.commandList->Close());
}
}
void GPUProfilerD3D12::QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp)
{
if (currentState == STATE_RUNNING)
{
uint32_t index = nodes[currentNode]->QueryTimestamp(outCpuTimestamp);
context->EndQuery(nodePayloads[currentNode]->queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, index);
}
}
void GPUProfilerD3D12::ResolveTimestamps(uint32_t startIndex, uint32_t count)
{
if (count)
{
Node* node = nodes[currentNode];
D3D12_RANGE range = { sizeof(uint64_t)*startIndex, sizeof(uint64_t)*(startIndex + count) };
void* pData = nullptr;
queryBuffer->Map(0, &range, &pData);
memcpy(&node->queryGpuTimestamps[startIndex], (uint64_t*)pData + startIndex, sizeof(uint64_t) * count);
queryBuffer->Unmap(0, 0);
// Convert GPU timestamps => CPU Timestamps
for (uint32_t index = startIndex; index < startIndex + count; ++index)
*node->queryCpuTimestamps[index] = node->clock.GetCPUTimestamp(node->queryGpuTimestamps[index]);
}
}
void GPUProfilerD3D12::WaitForFrame(uint64_t frameNumberToWait)
{
OPTICK_EVENT();
NodePayload* payload = nodePayloads[currentNode];
while (frameNumberToWait > payload->syncFence->GetCompletedValue())
{
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
}
void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain)
{
OPTICK_CATEGORY("GPUProfilerD3D12::Flip", Category::Debug);
std::lock_guard<std::recursive_mutex> lock(updateLock);
if (currentState == STATE_STARTING)
currentState = STATE_RUNNING;
if (currentState == STATE_RUNNING)
{
Node& node = *nodes[currentNode];
NodePayload& payload = *nodePayloads[currentNode];
uint32_t currentFrameIndex = frameNumber % NUM_FRAMES_DELAY;
uint32_t nextFrameIndex = (frameNumber + 1) % NUM_FRAMES_DELAY;
//Frame& currentFrame = frames[frameNumber % NUM_FRAMES_DELAY];
//Frame& nextFrame = frames[(frameNumber + 1) % NUM_FRAMES_DELAY];
QueryFrame& currentFrame = node.queryGpuframes[currentFrameIndex];
QueryFrame& nextFrame = node.queryGpuframes[nextFrameIndex];
ID3D12GraphicsCommandList* commandList = payload.frames[currentFrameIndex].commandList;
ID3D12CommandAllocator* commandAllocator = payload.frames[currentFrameIndex].commandAllocator;
commandAllocator->Reset();
commandList->Reset(commandAllocator, nullptr);
if (EventData* frameEvent = currentFrame.frameEvent)
QueryTimestamp(commandList, &frameEvent->finish);
// Generate GPU Frame event for the next frame
EventData& event = AddFrameEvent();
QueryTimestamp(commandList, &event.start);
QueryTimestamp(commandList, &AddFrameTag().timestamp);
nextFrame.frameEvent = &event;
uint32_t queryBegin = currentFrame.queryIndexStart;
uint32_t queryEnd = node.queryIndex;
if (queryBegin != (uint32_t)-1)
{
OPTICK_ASSERT(queryEnd - queryBegin <= MAX_QUERIES_COUNT, "Too many queries in one frame? Increase GPUProfiler::MAX_QUERIES_COUNT to fix the problem!");
currentFrame.queryIndexCount = queryEnd - queryBegin;
uint32_t startIndex = queryBegin % MAX_QUERIES_COUNT;
uint32_t finishIndex = queryEnd % MAX_QUERIES_COUNT;
if (startIndex < finishIndex)
{
commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, startIndex, queryEnd - queryBegin, queryBuffer, startIndex * sizeof(int64_t));
}
else
{
commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, startIndex, MAX_QUERIES_COUNT - startIndex, queryBuffer, startIndex * sizeof(int64_t));
commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, finishIndex, queryBuffer, 0);
}
}
commandList->Close();
payload.commandQueue->ExecuteCommandLists(1, (ID3D12CommandList*const*)&commandList);
payload.commandQueue->Signal(payload.syncFence, frameNumber);
// Preparing Next Frame
// Try resolve timestamps for the current frame
if (frameNumber >= NUM_FRAMES_DELAY && nextFrame.queryIndexCount)
{
WaitForFrame(frameNumber + 1 - NUM_FRAMES_DELAY);
uint32_t resolveStart = nextFrame.queryIndexStart % MAX_QUERIES_COUNT;
uint32_t resolveFinish = resolveStart + nextFrame.queryIndexCount;
ResolveTimestamps(resolveStart, std::min<uint32_t>(resolveFinish, MAX_QUERIES_COUNT) - resolveStart);
if (resolveFinish > MAX_QUERIES_COUNT)
ResolveTimestamps(0, resolveFinish - MAX_QUERIES_COUNT);
}
nextFrame.queryIndexStart = queryEnd;
nextFrame.queryIndexCount = 0;
// Process VSync
DXGI_FRAME_STATISTICS currentFrameStatistics = { 0 };
HRESULT result = swapChain->GetFrameStatistics(&currentFrameStatistics);
if ((result == S_OK) && (prevFrameStatistics.PresentCount + 1 == currentFrameStatistics.PresentCount))
{
EventData& data = AddVSyncEvent();
data.start = prevFrameStatistics.SyncQPCTime.QuadPart;
data.finish = currentFrameStatistics.SyncQPCTime.QuadPart;
}
prevFrameStatistics = currentFrameStatistics;
}
++frameNumber;
}
GPUProfiler::ClockSynchronization GPUProfilerD3D12::GetClockSynchronization(uint32_t nodeIndex)
{
ClockSynchronization clock;
clock.frequencyCPU = GetHighPrecisionFrequency();
nodePayloads[nodeIndex]->commandQueue->GetTimestampFrequency((uint64_t*)&clock.frequencyGPU);
nodePayloads[nodeIndex]->commandQueue->GetClockCalibration((uint64_t*)&clock.timestampGPU, (uint64_t*)&clock.timestampCPU);
return clock;
}
GPUProfilerD3D12::NodePayload::~NodePayload()
{
SafeRelease(&queryHeap);
SafeRelease(&syncFence);
}
void GPUProfilerD3D12::Frame::Shutdown()
{
SafeRelease(&commandAllocator);
SafeRelease(&commandList);
}
}
#else
#include "optick_common.h"
namespace Optick
{
void InitGpuD3D12(ID3D12Device* /*device*/, ID3D12CommandQueue** /*cmdQueues*/, uint32_t /*numQueues*/)
{
OPTICK_FAILED("OPTICK_ENABLE_GPU_D3D12 is disabled! Can't initialize GPU Profiler!");
}
}
#endif //OPTICK_ENABLE_GPU_D3D12
#endif //USE_OPTICK

View file

@ -0,0 +1,151 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "optick.config.h"
#if USE_OPTICK
#include <atomic>
#include <mutex>
#include "optick_common.h"
#include "optick_memory.h"
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
namespace Optick
{
const char* GetGPUQueueName(GPUQueueType queue);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class GPUProfiler
{
public:
static const int MAX_FRAME_EVENTS = 1024;
static const int NUM_FRAMES_DELAY = 4;
static const int MAX_QUERIES_COUNT = (2 * MAX_FRAME_EVENTS) * NUM_FRAMES_DELAY;
protected:
enum State
{
STATE_OFF,
STATE_STARTING,
STATE_RUNNING,
STATE_FINISHING,
};
struct ClockSynchronization
{
int64_t frequencyCPU;
int64_t frequencyGPU;
int64_t timestampCPU;
int64_t timestampGPU;
int64_t GetCPUTimestamp(int64_t gpuTimestamp)
{
return timestampCPU + (gpuTimestamp - timestampGPU) * frequencyCPU / frequencyGPU;
}
ClockSynchronization() : frequencyCPU(0), frequencyGPU(0), timestampCPU(0), timestampGPU(0) {}
};
struct QueryFrame
{
EventData* frameEvent;
uint32_t queryIndexStart;
uint32_t queryIndexCount;
QueryFrame()
{
Reset();
}
void Reset()
{
frameEvent = nullptr;
queryIndexStart = (uint32_t)-1;
queryIndexCount = 0;
}
};
struct Node
{
array<QueryFrame, NUM_FRAMES_DELAY> queryGpuframes;
array<int64_t, MAX_QUERIES_COUNT> queryGpuTimestamps;
array<int64_t*, MAX_QUERIES_COUNT> queryCpuTimestamps;
std::atomic<uint32_t> queryIndex;
ClockSynchronization clock;
array<EventStorage*, GPU_QUEUE_COUNT> gpuEventStorage;
uint32_t QueryTimestamp(int64_t* outCpuTimestamp)
{
uint32_t index = queryIndex.fetch_add(1) % MAX_QUERIES_COUNT;
queryCpuTimestamps[index] = outCpuTimestamp;
return index;
}
string name;
void Reset();
Node() : queryIndex(0) { gpuEventStorage.fill(nullptr); }
};
std::recursive_mutex updateLock;
volatile State currentState;
vector<Node*> nodes;
uint32_t currentNode;
uint32_t frameNumber;
void Reset();
EventData& AddFrameEvent();
EventData& AddVSyncEvent();
TagData<uint32>& AddFrameTag();
public:
GPUProfiler();
// Init
virtual void InitNode(const char* nodeName, uint32_t nodeIndex);
// Capture Controls
virtual void Start(uint32 mode);
virtual void Stop(uint32 mode);
virtual void Dump(uint32 mode);
virtual string GetName() const;
// Interface to implement
virtual ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) = 0;
virtual void QueryTimestamp(void* context, int64_t* cpuTimestampOut) = 0;
virtual void Flip(void* swapChain) = 0;
virtual ~GPUProfiler();
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif //USE_OPTICK

View file

@ -0,0 +1,422 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "optick.config.h"
#if USE_OPTICK
#if OPTICK_ENABLE_GPU_VULKAN
#include <vulkan/vulkan.h>
#include "optick_core.h"
#include "optick_gpu.h"
#define OPTICK_VK_CHECK(args) do { VkResult __hr = args; OPTICK_ASSERT(__hr == VK_SUCCESS, "Failed check"); (void)__hr; } while(false);
namespace Optick
{
class GPUProfilerVulkan : public GPUProfiler
{
private:
VulkanFunctions vulkanFunctions = {};
protected:
struct Frame
{
VkCommandBuffer commandBuffer;
VkFence fence;
Frame() : commandBuffer(VK_NULL_HANDLE), fence(VK_NULL_HANDLE) {}
};
struct NodePayload
{
VulkanFunctions* vulkanFunctions;
VkDevice device;
VkPhysicalDevice physicalDevice;
VkQueue queue;
VkQueryPool queryPool;
VkCommandPool commandPool;
array<Frame, NUM_FRAMES_DELAY> frames;
NodePayload() : vulkanFunctions(), device(VK_NULL_HANDLE), physicalDevice(VK_NULL_HANDLE), queue(VK_NULL_HANDLE), queryPool(VK_NULL_HANDLE), commandPool(VK_NULL_HANDLE) {}
~NodePayload();
};
vector<NodePayload*> nodePayloads;
void ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count);
void WaitForFrame(uint64_t frameNumber);
public:
GPUProfilerVulkan();
~GPUProfilerVulkan();
void InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions);
void QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp);
// Interface implementation
ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) override;
void QueryTimestamp(void* context, int64_t* outCpuTimestamp) override
{
QueryTimestamp((VkCommandBuffer)context, outCpuTimestamp);
}
void Flip(void* swapChain) override;
};
void InitGpuVulkan(VkDevice* vkDevices, VkPhysicalDevice* vkPhysicalDevices, VkQueue* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues, const VulkanFunctions* functions)
{
GPUProfilerVulkan* gpuProfiler = Memory::New<GPUProfilerVulkan>();
gpuProfiler->InitDevice(vkDevices, vkPhysicalDevices, vkQueues, cmdQueuesFamily, numQueues, functions);
Core::Get().InitGPUProfiler(gpuProfiler);
}
GPUProfilerVulkan::GPUProfilerVulkan()
{
}
void GPUProfilerVulkan::InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions)
{
if (functions != nullptr)
{
vulkanFunctions = *functions;
}
else
{
vulkanFunctions = {
vkGetPhysicalDeviceProperties,
(PFN_vkCreateQueryPool_)vkCreateQueryPool,
(PFN_vkCreateCommandPool_)vkCreateCommandPool,
(PFN_vkAllocateCommandBuffers_)vkAllocateCommandBuffers,
(PFN_vkCreateFence_)vkCreateFence,
vkCmdResetQueryPool,
(PFN_vkQueueSubmit_)vkQueueSubmit,
(PFN_vkWaitForFences_)vkWaitForFences,
(PFN_vkResetCommandBuffer_)vkResetCommandBuffer,
(PFN_vkCmdWriteTimestamp_)vkCmdWriteTimestamp,
(PFN_vkGetQueryPoolResults_)vkGetQueryPoolResults,
(PFN_vkBeginCommandBuffer_)vkBeginCommandBuffer,
(PFN_vkEndCommandBuffer_)vkEndCommandBuffer,
(PFN_vkResetFences_)vkResetFences,
vkDestroyCommandPool,
vkDestroyQueryPool,
vkDestroyFence,
vkFreeCommandBuffers,
};
}
VkQueryPoolCreateInfo queryPoolCreateInfo;
queryPoolCreateInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
queryPoolCreateInfo.pNext = 0;
queryPoolCreateInfo.flags = 0;
queryPoolCreateInfo.queryType = VK_QUERY_TYPE_TIMESTAMP;
queryPoolCreateInfo.queryCount = MAX_QUERIES_COUNT + 1;
VkCommandPoolCreateInfo commandPoolCreateInfo;
commandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
commandPoolCreateInfo.pNext = 0;
commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
nodes.resize(nodeCount);
nodePayloads.resize(nodeCount);
VkResult r;
for (uint32_t i = 0; i < nodeCount; ++i)
{
VkPhysicalDeviceProperties properties = { 0 };
(*vulkanFunctions.vkGetPhysicalDeviceProperties)(physicalDevices[i], &properties);
GPUProfiler::InitNode(properties.deviceName, i);
NodePayload* nodePayload = Memory::New<NodePayload>();
nodePayloads[i] = nodePayload;
nodePayload->vulkanFunctions = &vulkanFunctions;
nodePayload->device = devices[i];
nodePayload->physicalDevice = physicalDevices[i];
nodePayload->queue = cmdQueues[i];
r = (VkResult)(*vulkanFunctions.vkCreateQueryPool)(devices[i], &queryPoolCreateInfo, 0, &nodePayload->queryPool);
OPTICK_ASSERT(r == VK_SUCCESS, "Failed");
(void)r;
commandPoolCreateInfo.queueFamilyIndex = cmdQueuesFamily[i];
r = (VkResult)(*vulkanFunctions.vkCreateCommandPool)(nodePayload->device, &commandPoolCreateInfo, 0, &nodePayload->commandPool);
OPTICK_ASSERT(r == VK_SUCCESS, "Failed");
(void)r;
for (uint32_t j = 0; j < nodePayload->frames.size(); ++j)
{
Frame& frame = nodePayload->frames[j];
VkCommandBufferAllocateInfo allocInfo;
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocInfo.pNext = 0;
allocInfo.commandBufferCount = 1;
allocInfo.commandPool = nodePayload->commandPool;
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
r = (VkResult)(*vulkanFunctions.vkAllocateCommandBuffers)(nodePayload->device, &allocInfo, &frame.commandBuffer);
OPTICK_ASSERT(r == VK_SUCCESS, "Failed");
(void)r;
VkFenceCreateInfo fenceCreateInfo;
fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fenceCreateInfo.pNext = 0;
fenceCreateInfo.flags = j == 0 ? 0 : VK_FENCE_CREATE_SIGNALED_BIT;
r = (VkResult)(*vulkanFunctions.vkCreateFence)(nodePayload->device, &fenceCreateInfo, 0, &frame.fence);
OPTICK_ASSERT(r == VK_SUCCESS, "Failed");
(void)r;
if (j == 0)
{
VkCommandBufferBeginInfo commandBufferBeginInfo;
commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
commandBufferBeginInfo.pNext = 0;
commandBufferBeginInfo.pInheritanceInfo = 0;
commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
(*vulkanFunctions.vkBeginCommandBuffer)(frame.commandBuffer, &commandBufferBeginInfo);
(*vulkanFunctions.vkCmdResetQueryPool)(frame.commandBuffer, nodePayload->queryPool, 0, MAX_QUERIES_COUNT);
(*vulkanFunctions.vkEndCommandBuffer)(frame.commandBuffer);
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.pNext = nullptr;
submitInfo.waitSemaphoreCount = 0;
submitInfo.pWaitSemaphores = nullptr;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &frame.commandBuffer;
submitInfo.signalSemaphoreCount = 0;
submitInfo.pSignalSemaphores = nullptr;
(*vulkanFunctions.vkQueueSubmit)(nodePayload->queue, 1, &submitInfo, frame.fence);
(*vulkanFunctions.vkWaitForFences)(nodePayload->device, 1, &frame.fence, 1, (uint64_t)-1);
(*vulkanFunctions.vkResetCommandBuffer)(frame.commandBuffer, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
}
}
}
}
void GPUProfilerVulkan::QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp)
{
if (currentState == STATE_RUNNING)
{
uint32_t index = nodes[currentNode]->QueryTimestamp(outCpuTimestamp);
(*vulkanFunctions.vkCmdWriteTimestamp)(commandBuffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[currentNode]->queryPool, index);
}
}
void GPUProfilerVulkan::ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count)
{
if (count)
{
Node* node = nodes[currentNode];
NodePayload* payload = nodePayloads[currentNode];
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkGetQueryPoolResults)(payload->device, payload->queryPool, startIndex, count, 8 * count, &nodes[currentNode]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT));
(*vulkanFunctions.vkCmdResetQueryPool)(commandBuffer, payload->queryPool, startIndex, count);
// Convert GPU timestamps => CPU Timestamps
for (uint32_t index = startIndex; index < startIndex + count; ++index)
*node->queryCpuTimestamps[index] = node->clock.GetCPUTimestamp(node->queryGpuTimestamps[index]);
}
}
void GPUProfilerVulkan::WaitForFrame(uint64_t frameNumberToWait)
{
OPTICK_EVENT();
int r = VK_SUCCESS;
do
{
NodePayload& payload = *nodePayloads[currentNode];
r = (*vulkanFunctions.vkWaitForFences)(nodePayloads[currentNode]->device, 1, &payload.frames[frameNumberToWait % payload.frames.size()].fence, 1, 1000 * 30);
} while (r != VK_SUCCESS);
}
void GPUProfilerVulkan::Flip(void* /*swapChain*/)
{
OPTICK_CATEGORY("GPUProfilerVulkan::Flip", Category::Debug);
std::lock_guard<std::recursive_mutex> lock(updateLock);
if (currentState == STATE_STARTING)
currentState = STATE_RUNNING;
if (currentState == STATE_RUNNING)
{
Node& node = *nodes[currentNode];
NodePayload& payload = *nodePayloads[currentNode];
uint32_t currentFrameIndex = frameNumber % NUM_FRAMES_DELAY;
uint32_t nextFrameIndex = (frameNumber + 1) % NUM_FRAMES_DELAY;
QueryFrame& currentFrame = node.queryGpuframes[currentFrameIndex];
QueryFrame& nextFrame = node.queryGpuframes[nextFrameIndex];
VkCommandBuffer commandBuffer = payload.frames[currentFrameIndex].commandBuffer;
VkFence fence = payload.frames[currentFrameIndex].fence;
VkDevice device = payload.device;
VkQueue queue = payload.queue;
(*vulkanFunctions.vkWaitForFences)(device, 1, &fence, 1, (uint64_t)-1);
VkCommandBufferBeginInfo commandBufferBeginInfo;
commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
commandBufferBeginInfo.pNext = 0;
commandBufferBeginInfo.pInheritanceInfo = 0;
commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkBeginCommandBuffer)(commandBuffer, &commandBufferBeginInfo));
(*vulkanFunctions.vkResetFences)(device, 1, &fence);
if (EventData* frameEvent = currentFrame.frameEvent)
QueryTimestamp(commandBuffer, &frameEvent->finish);
// Generate GPU Frame event for the next frame
EventData& event = AddFrameEvent();
QueryTimestamp(commandBuffer, &event.start);
QueryTimestamp(commandBuffer, &AddFrameTag().timestamp);
nextFrame.frameEvent = &event;
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkEndCommandBuffer)(commandBuffer));
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.pNext = nullptr;
submitInfo.waitSemaphoreCount = 0;
submitInfo.pWaitSemaphores = nullptr;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &commandBuffer;
submitInfo.signalSemaphoreCount = 0;
submitInfo.pSignalSemaphores = nullptr;
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkQueueSubmit)(queue, 1, &submitInfo, fence));
uint32_t queryBegin = currentFrame.queryIndexStart;
uint32_t queryEnd = node.queryIndex;
if (queryBegin != (uint32_t)-1)
{
currentFrame.queryIndexCount = queryEnd - queryBegin;
}
// Preparing Next Frame
// Try resolve timestamps for the current frame
if (nextFrame.queryIndexStart != (uint32_t)-1)
{
uint32_t startIndex = nextFrame.queryIndexStart % MAX_QUERIES_COUNT;
uint32_t finishIndex = (startIndex + nextFrame.queryIndexCount) % MAX_QUERIES_COUNT;
if (startIndex < finishIndex)
{
ResolveTimestamps(commandBuffer, startIndex, finishIndex - startIndex);
}
else if (startIndex > finishIndex)
{
ResolveTimestamps(commandBuffer, startIndex, MAX_QUERIES_COUNT - startIndex);
ResolveTimestamps(commandBuffer, 0, finishIndex);
}
}
nextFrame.queryIndexStart = queryEnd;
nextFrame.queryIndexCount = 0;
}
++frameNumber;
}
GPUProfiler::ClockSynchronization GPUProfilerVulkan::GetClockSynchronization(uint32_t nodeIndex)
{
GPUProfiler::ClockSynchronization clock;
NodePayload& node = *nodePayloads[nodeIndex];
Frame& currentFrame = node.frames[frameNumber % NUM_FRAMES_DELAY];
VkCommandBufferBeginInfo commandBufferBeginInfo;
commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
commandBufferBeginInfo.pNext = 0;
commandBufferBeginInfo.pInheritanceInfo = 0;
commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
VkCommandBuffer CB = currentFrame.commandBuffer;
VkDevice Device = node.device;
VkFence Fence = currentFrame.fence;
(*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
(*vulkanFunctions.vkResetFences)(Device, 1, &Fence);
(*vulkanFunctions.vkResetCommandBuffer)(CB, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
(*vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
(*vulkanFunctions.vkCmdResetQueryPool)(CB, nodePayloads[nodeIndex]->queryPool, 0, 1);
(*vulkanFunctions.vkCmdWriteTimestamp)(CB, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[nodeIndex]->queryPool, 0);
(*vulkanFunctions.vkEndCommandBuffer)(CB);
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.pNext = nullptr;
submitInfo.waitSemaphoreCount = 0;
submitInfo.pWaitSemaphores = nullptr;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &CB;
submitInfo.signalSemaphoreCount = 0;
submitInfo.pSignalSemaphores = nullptr;
(*vulkanFunctions.vkQueueSubmit)(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence);
(*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
clock.timestampGPU = 0;
(*vulkanFunctions.vkGetQueryPoolResults)(Device, nodePayloads[nodeIndex]->queryPool, 0, 1, 8, &clock.timestampGPU, 8, VK_QUERY_RESULT_64_BIT);
clock.timestampCPU = GetHighPrecisionTime();
clock.frequencyCPU = GetHighPrecisionFrequency();
VkPhysicalDeviceProperties Properties;
(*vulkanFunctions.vkGetPhysicalDeviceProperties)(nodePayloads[nodeIndex]->physicalDevice, &Properties);
clock.frequencyGPU = (uint64_t)(1000000000ll / Properties.limits.timestampPeriod);
return clock;
}
GPUProfilerVulkan::NodePayload::~NodePayload()
{
(*vulkanFunctions->vkDestroyCommandPool)(device, commandPool, nullptr);
(*vulkanFunctions->vkDestroyQueryPool)(device, queryPool, nullptr);
}
GPUProfilerVulkan::~GPUProfilerVulkan()
{
for (NodePayload* payload : nodePayloads)
{
for (Frame& frame : payload->frames)
{
(*vulkanFunctions.vkDestroyFence)(payload->device, frame.fence, nullptr);
(*vulkanFunctions.vkFreeCommandBuffers)(payload->device, payload->commandPool, 1, &frame.commandBuffer);
}
Memory::Delete(payload);
}
nodePayloads.clear();
}
}
#else
#include "optick_common.h"
namespace Optick
{
void InitGpuVulkan(VkDevice* /*vkDevices*/, VkPhysicalDevice* /*vkPhysicalDevices*/, VkQueue* /*vkQueues*/, uint32_t* /*cmdQueuesFamily*/, uint32_t /*numQueues*/, const VulkanFunctions* /*functions*/)
{
OPTICK_FAILED("OPTICK_ENABLE_GPU_VULKAN is disabled! Can't initialize GPU Profiler!");
}
}
#endif //OPTICK_ENABLE_GPU_D3D12
#endif //USE_OPTICK

View file

@ -0,0 +1,470 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "optick_common.h"
#if USE_OPTICK
#include <cstring>
#include <new>
#include <stdlib.h>
#include <atomic>
#include <array>
#include <list>
#include <string>
#include <sstream>
#include <unordered_set>
#include <unordered_map>
#include <vector>
namespace Optick
{
class Memory
{
struct Header
{
uint64_t size;
};
#if defined(OPTICK_32BIT)
static std::atomic<uint32_t> memAllocated;
#else
static std::atomic<uint64_t> memAllocated;
#endif
static void* (*allocate)(size_t);
static void (*deallocate)(void*);
static void (*initThread)(void);
public:
static OPTICK_INLINE void* Alloc(size_t size)
{
size_t totalSize = size + sizeof(Header);
void *ptr = allocate(totalSize);
OPTICK_VERIFY(ptr, "Can't allocate memory", return nullptr);
Header* header = (Header*)ptr;
header->size = totalSize;
memAllocated += totalSize;
return (uint8_t*)ptr + sizeof(Header);
}
static OPTICK_INLINE void Free(void* p)
{
if (p != nullptr)
{
uint8_t* basePtr = (uint8_t*)p - sizeof(Header);
Header* header = (Header*)basePtr;
memAllocated -= header->size;
deallocate(basePtr);
}
}
static OPTICK_INLINE size_t GetAllocatedSize()
{
return (size_t)memAllocated;
}
template<class T>
static T* New()
{
return new (Memory::Alloc(sizeof(T))) T();
}
template<class T, class P1>
static T* New(P1 p1)
{
return new (Memory::Alloc(sizeof(T))) T(p1);
}
template<class T, class P1, class P2>
static T* New(P1 p1, P2 p2)
{
return new (Memory::Alloc(sizeof(T))) T(p1, p2);
}
template<class T>
static void Delete(T* p)
{
if (p)
{
p->~T();
Free(p);
}
}
static void SetAllocator(AllocateFn allocateFn, DeallocateFn deallocateFn, InitThreadCb initThreadCb)
{
allocate = allocateFn;
deallocate = deallocateFn;
initThread = initThreadCb;
}
static void InitThread()
{
if (initThread != nullptr)
initThread();
}
template<typename T>
struct Allocator : public std::allocator<T>
{
Allocator() {}
template<class U>
Allocator(const Allocator<U>&) {}
template<typename U> struct rebind { typedef Allocator<U> other; };
typename std::allocator<T>::value_type* allocate(typename std::allocator<T>::size_type n)
{
return reinterpret_cast<typename std::allocator<T>::value_type*>(Memory::Alloc(n * sizeof(T)));
}
typename std::allocator<T>::value_type* allocate(typename std::allocator<T>::size_type n, const typename std::allocator<void>::value_type*)
{
return reinterpret_cast<typename std::allocator<T>::value_type*>(Memory::Alloc(n * sizeof(T)));
}
void deallocate(typename std::allocator<T>::value_type* p, typename std::allocator<T>::size_type)
{
Memory::Free(p);
}
};
};
// std::* section
template <typename T, size_t _Size> class array : public std::array<T, _Size>{};
template <typename T> class vector : public std::vector<T, Memory::Allocator<T>>{};
template <typename T> class list : public std::list<T, Memory::Allocator<T>>{};
template <typename T> class unordered_set : public std::unordered_set<T, std::hash<T>, std::equal_to<T>, Memory::Allocator<T>>{};
template <typename T, typename V> class unordered_map : public std::unordered_map<T, V, std::hash<T>, std::equal_to<T>, Memory::Allocator<std::pair<const T, V>>>{};
using string = std::basic_string<char, std::char_traits<char>, Memory::Allocator<char>>;
using wstring = std::basic_string<wchar_t, std::char_traits<wchar_t>, Memory::Allocator<wchar_t>>;
using istringstream = std::basic_istringstream<char, std::char_traits<char>, Memory::Allocator<char>>;
using ostringstream = std::basic_ostringstream<char, std::char_traits<char>, Memory::Allocator<char>>;
using stringstream = std::basic_stringstream<char, std::char_traits<char>, Memory::Allocator<char>>;
using fstream = std::basic_fstream<char, std::char_traits<char>>;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class T, uint32 SIZE>
struct MemoryChunk
{
T data[SIZE];
MemoryChunk* next;
MemoryChunk* prev;
MemoryChunk() : next(0), prev(0) {}
~MemoryChunk()
{
MemoryChunk* chunk = this;
while (chunk->next)
chunk = chunk->next;
while (chunk != this)
{
MemoryChunk* toDelete = chunk;
chunk = toDelete->prev;
Memory::Delete(toDelete);
}
if (prev != nullptr)
{
prev->next = nullptr;
prev = nullptr;
}
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class T, uint32 SIZE = 16>
class MemoryPool
{
typedef MemoryChunk<T, SIZE> Chunk;
Chunk* root;
Chunk* chunk;
uint32 index;
OPTICK_INLINE void AddChunk()
{
index = 0;
if (!chunk || !chunk->next)
{
Chunk* newChunk = Memory::New<Chunk>();
if (chunk)
{
chunk->next = newChunk;
newChunk->prev = chunk;
chunk = newChunk;
}
else
{
root = chunk = newChunk;
}
}
else
{
chunk = chunk->next;
}
}
public:
MemoryPool() : root(nullptr), chunk(nullptr), index(SIZE) {}
OPTICK_INLINE T& Add()
{
if (index >= SIZE)
AddChunk();
return chunk->data[index++];
}
OPTICK_INLINE T& Add(const T& item)
{
return Add() = item;
}
OPTICK_INLINE T* AddRange(const T* items, size_t count, bool allowOverlap = true)
{
if (count == 0 || (count > SIZE && !allowOverlap))
return nullptr;
if (count >= (SIZE - index) && !allowOverlap)
{
AddChunk();
}
T* result = &chunk->data[index];
while (count)
{
size_t numLeft = SIZE - index;
size_t numCopy = numLeft < count ? numLeft : count;
std::memcpy(&chunk->data[index], items, sizeof(T) * numCopy);
count -= numCopy;
items += numCopy;
index += (uint32_t)numCopy;
if (count)
AddChunk();
}
return result;
}
OPTICK_INLINE T* TryAdd(int count)
{
if (index + count <= SIZE)
{
T* res = &chunk->data[index];
index += count;
return res;
}
return nullptr;
}
OPTICK_INLINE T* Back()
{
if (chunk && index > 0)
return &chunk->data[index - 1];
if (chunk && chunk->prev != nullptr)
return &chunk->prev->data[SIZE - 1];
return nullptr;
}
OPTICK_INLINE T* Front()
{
return !IsEmpty() ? &root->data[0] : nullptr;
}
OPTICK_INLINE size_t Size() const
{
if (root == nullptr)
return 0;
size_t count = 0;
for (const Chunk* it = root; it != chunk; it = it->next)
count += SIZE;
return count + index;
}
OPTICK_INLINE bool IsEmpty() const
{
return (chunk == nullptr) || (chunk == root && index == 0);
}
OPTICK_INLINE void Clear(bool preserveMemory = true)
{
if (!preserveMemory)
{
if (root)
{
Memory::Delete(root);
root = nullptr;
chunk = nullptr;
index = SIZE;
}
}
else if (root)
{
index = 0;
chunk = root;
}
}
class const_iterator
{
void advance()
{
if (chunkIndex < SIZE - 1)
{
++chunkIndex;
}
else
{
chunkPtr = chunkPtr->next;
chunkIndex = 0;
}
}
public:
typedef const_iterator self_type;
typedef T value_type;
typedef T& reference;
typedef T* pointer;
typedef int difference_type;
const_iterator(const Chunk* ptr, size_t index) : chunkPtr(ptr), chunkIndex(index) { }
self_type operator++()
{
self_type i = *this;
advance();
return i;
}
self_type operator++(int /*junk*/)
{
advance();
return *this;
}
reference operator*() { return (reference)chunkPtr->data[chunkIndex]; }
pointer operator->() { return &chunkPtr->data[chunkIndex]; }
bool operator==(const self_type& rhs) const { return (chunkPtr == rhs.chunkPtr) && (chunkIndex == rhs.chunkIndex); }
bool operator!=(const self_type& rhs) const { return (chunkPtr != rhs.chunkPtr) || (chunkIndex != rhs.chunkIndex); }
private:
const Chunk* chunkPtr;
size_t chunkIndex;
};
const_iterator begin() const
{
return const_iterator(root, root ? 0 : SIZE);
}
const_iterator end() const
{
return const_iterator(chunk, index);
}
template<class Func>
void ForEach(Func func) const
{
for (const Chunk* it = root; it != chunk; it = it->next)
for (uint32 i = 0; i < SIZE; ++i)
func(it->data[i]);
if (chunk)
for (uint32 i = 0; i < index; ++i)
func(chunk->data[i]);
}
template<class Func>
void ForEach(Func func)
{
for (Chunk* it = root; it != chunk; it = it->next)
for (uint32 i = 0; i < SIZE; ++i)
func(it->data[i]);
if (chunk)
for (uint32 i = 0; i < index; ++i)
func(chunk->data[i]);
}
template<class Func>
void ForEachChunk(Func func) const
{
for (const Chunk* it = root; it != chunk; it = it->next)
func(it->data, SIZE);
if (chunk)
func(chunk->data, index);
}
void ToArray(T* destination) const
{
uint32 curIndex = 0;
for (const Chunk* it = root; it != chunk; it = it->next)
{
memcpy(&destination[curIndex], it->data, sizeof(T) * SIZE);
curIndex += SIZE;
}
if (chunk && index > 0)
{
memcpy(&destination[curIndex], chunk->data, sizeof(T) * index);
}
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<uint32 CHUNK_SIZE>
class MemoryBuffer : private MemoryPool<uint8, CHUNK_SIZE>
{
public:
template<class U>
U* Add(U* data, size_t size, bool allowOverlap = true)
{
return (U*)(MemoryPool<uint8, CHUNK_SIZE>::AddRange((uint8*)data, size, allowOverlap));
}
template<class T>
T* Add(const T& val, bool allowOverlap = true)
{
return static_cast<T*>(Add(&val, sizeof(T), allowOverlap));
}
void Clear(bool preserveMemory)
{
MemoryPool<uint8, CHUNK_SIZE>::Clear(preserveMemory);
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif //USE_OPTICK

View file

@ -0,0 +1,195 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "optick_message.h"
#if USE_OPTICK
#include "optick_common.h"
#include "optick_core.h"
#include "optick_server.h"
namespace Optick
{
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct MessageHeader
{
uint32 mark;
uint32 length;
static const uint32 MESSAGE_MARK = 0xB50FB50F;
bool IsValid() const { return mark == MESSAGE_MARK; }
MessageHeader() : mark(0), length(0) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class MessageFactory
{
typedef IMessage* (*MessageCreateFunction)(InputDataStream& str);
MessageCreateFunction factory[IMessage::COUNT];
template<class T>
void RegisterMessage()
{
factory[T::GetMessageType()] = T::Create;
}
MessageFactory()
{
memset(&factory[0], 0, sizeof(MessageCreateFunction));
RegisterMessage<StartMessage>();
RegisterMessage<StopMessage>();
RegisterMessage<CancelMessage>();
RegisterMessage<TurnSamplingMessage>();
for (uint32 msg = 0; msg < IMessage::COUNT; ++msg)
{
OPTICK_ASSERT(factory[msg] != nullptr, "Message is not registered to factory");
}
}
public:
static MessageFactory& Get()
{
static MessageFactory instance;
return instance;
}
IMessage* Create(InputDataStream& str)
{
MessageHeader header;
str.Read(header);
size_t length = str.Length();
uint16 applicationID = 0;
uint16 messageType = IMessage::COUNT;
str >> applicationID;
str >> messageType;
OPTICK_VERIFY( messageType < IMessage::COUNT && factory[messageType] != nullptr, "Unknown message type!", return nullptr )
IMessage* result = factory[messageType](str);
if (header.length + str.Length() != length)
{
OPTICK_FAILED("Message Stream is corrupted! Invalid Protocol?")
return nullptr;
}
return result;
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
OutputDataStream& operator<<(OutputDataStream& os, const DataResponse& val)
{
return os << val.version << (uint32)val.type;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
IMessage* IMessage::Create(InputDataStream& str)
{
MessageHeader header;
while (str.Peek(header))
{
if (header.IsValid())
{
if (str.Length() < header.length + sizeof(MessageHeader))
break; // Not enough data yet
return MessageFactory::Get().Create(str);
}
else
{
// Some garbage in the stream?
str.Skip(1);
}
}
return nullptr;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void StartMessage::Apply()
{
Core& core = Core::Get();
core.SetSettings(settings);
core.StartCapture();
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
IMessage* StartMessage::Create(InputDataStream& stream)
{
StartMessage* msg = Memory::New<StartMessage>();
CaptureSettings& settings = msg->settings;
stream >> settings.mode
>> settings.categoryMask
>> settings.samplingFrequency
>> settings.frameLimit
>> settings.timeLimitUs
>> settings.spikeLimitUs
>> settings.memoryLimitMb
>> settings.password;
if (!settings.password.empty())
settings.password = base64_decode(settings.password);
return msg;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void StopMessage::Apply()
{
Core::Get().DumpCapture();
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
IMessage* StopMessage::Create(InputDataStream&)
{
return Memory::New<StopMessage>();
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void CancelMessage::Apply()
{
Core::Get().CancelCapture();
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
IMessage* CancelMessage::Create(InputDataStream&)
{
return Memory::New<CancelMessage>();
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
IMessage* TurnSamplingMessage::Create( InputDataStream& stream )
{
TurnSamplingMessage* msg = Memory::New<TurnSamplingMessage>();
stream >> msg->index;
stream >> msg->isSampling;
return msg;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void TurnSamplingMessage::Apply()
{
// Backward compatibility
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif //USE_OPTICK

View file

@ -0,0 +1,153 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "optick.config.h"
#if USE_OPTICK
#include "optick_common.h"
#include "optick_serialization.h"
namespace Optick
{
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
static const uint32 NETWORK_PROTOCOL_VERSION = 26;
static const uint16 NETWORK_APPLICATION_ID = 0xB50F;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct DataResponse
{
enum Type : uint16
{
FrameDescriptionBoard = 0, // DescriptionBoard for Instrumental Frames
EventFrame = 1, // Instrumental Data
SamplingFrame = 2, // Sampling Data
NullFrame = 3, // Last Fame Mark
ReportProgress = 4, // Report Current Progress
Handshake = 5, // Handshake Response
Reserved_0 = 6,
SynchronizationData = 7, // Synchronization Data for the thread
TagsPack = 8, // Pack of tags
CallstackDescriptionBoard = 9, // DescriptionBoard with resolved function addresses
CallstackPack = 10, // Pack of CallStacks
Reserved_1 = 11,
Reserved_2 = 12,
Reserved_3 = 13,
Reserved_4 = 14,
//...
Reserved_255 = 255,
FiberSynchronizationData = 1 << 8, // Synchronization Data for the Fibers
SyscallPack,
SummaryPack,
FramesPack,
};
uint32 version;
uint32 size;
Type type;
uint16 application;
DataResponse(Type t, uint32 s) : version(NETWORK_PROTOCOL_VERSION), size(s), type(t), application(NETWORK_APPLICATION_ID){}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
OutputDataStream& operator << (OutputDataStream& os, const DataResponse& val);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class IMessage
{
public:
enum Type : uint16
{
Start,
Stop,
Cancel,
TurnSampling,
COUNT,
};
virtual void Apply() = 0;
virtual ~IMessage() {}
static IMessage* Create( InputDataStream& str );
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<IMessage::Type MESSAGE_TYPE>
class Message : public IMessage
{
enum { id = MESSAGE_TYPE };
public:
static uint32 GetMessageType() { return id; }
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct CaptureSettings
{
// Capture Mode
uint32 mode;
// Category Filter
uint32 categoryMask;
// Tracer: Sampling Frequency
uint32 samplingFrequency;
// Max Duration for a capture (frames)
uint32 frameLimit;
// Max Duration for a capture (us)
uint32 timeLimitUs;
// Max Duration for a capture (us)
uint32 spikeLimitUs;
// Max Memory for a capture (MB)
uint64 memoryLimitMb;
// Tracer: Root Password for the Device
string password;
CaptureSettings() : mode(0), categoryMask(0), samplingFrequency(0), frameLimit(0), timeLimitUs(0), spikeLimitUs(0), memoryLimitMb(0) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct StartMessage : public Message<IMessage::Start>
{
CaptureSettings settings;
static IMessage* Create(InputDataStream&);
virtual void Apply() override;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct StopMessage : public Message<IMessage::Stop>
{
static IMessage* Create(InputDataStream&);
virtual void Apply() override;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct CancelMessage : public Message<IMessage::Cancel>
{
static IMessage* Create(InputDataStream&);
virtual void Apply() override;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct TurnSamplingMessage : public Message<IMessage::TurnSampling>
{
int32 index;
byte isSampling;
static IMessage* Create(InputDataStream& stream);
virtual void Apply() override;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif //USE_OPTICK

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,940 @@
/**************************************************************************
*
* Copyright 2013-2014 RAD Game Tools and Valve Software
* Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
**************************************************************************/
/* miniz.c 2.1.0 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing
See "unlicense" statement at the end of this file.
Rich Geldreich <richgel99@gmail.com>, last updated Oct. 13, 2013
Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt
Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define
MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros).
* Low-level Deflate/Inflate implementation notes:
Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or
greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses
approximately as well as zlib.
Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function
coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory
block large enough to hold the entire file.
The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation.
* zlib-style API notes:
miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in
zlib replacement in many apps:
The z_stream struct, optional memory allocation callbacks
deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound
inflateInit/inflateInit2/inflate/inflateReset/inflateEnd
compress, compress2, compressBound, uncompress
CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines.
Supports raw deflate streams or standard zlib streams with adler-32 checking.
Limitations:
The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries.
I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but
there are no guarantees that miniz.c pulls this off perfectly.
* PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by
Alex Evans. Supports 1-4 bytes/pixel images.
* ZIP archive API notes:
The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to
get the job done with minimal fuss. There are simple API's to retrieve file information, read files from
existing archives, create new archives, append new files to existing archives, or clone archive data from
one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h),
or you can specify custom file read/write callbacks.
- Archive reading: Just call this function to read a single file from a disk archive:
void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name,
size_t *pSize, mz_uint zip_flags);
For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central
directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files.
- Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file:
int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags);
The locate operation can optionally check file comments too, which (as one example) can be used to identify
multiple versions of the same file in an archive. This function uses a simple linear search through the central
directory, so it's not very fast.
Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and
retrieve detailed info on each file by calling mz_zip_reader_file_stat().
- Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data
to disk and builds an exact image of the central directory in memory. The central directory image is written
all at once at the end of the archive file when the archive is finalized.
The archive writer can optionally align each file's local header and file data to any power of 2 alignment,
which can be useful when the archive will be read from optical media. Also, the writer supports placing
arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still
readable by any ZIP tool.
- Archive appending: The simple way to add a single file to an archive is to call this function:
mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name,
const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
The archive will be created if it doesn't already exist, otherwise it'll be appended to.
Note the appending is done in-place and is not an atomic operation, so if something goes wrong
during the operation it's possible the archive could be left without a central directory (although the local
file headers and file data will be fine, so the archive will be recoverable).
For more complex archive modification scenarios:
1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to
preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the
compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and
you're done. This is safe but requires a bunch of temporary disk space or heap memory.
2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(),
append new files as needed, then finalize the archive which will write an updated central directory to the
original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a
possibility that the archive's central directory could be lost with this method if anything goes wrong, though.
- ZIP archive support limitations:
No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files.
Requires streams capable of seeking.
* This is a header file library, like stb_image.c. To get only a header file, either cut and paste the
below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it.
* Important: For best perf. be sure to customize the below macros for your target platform:
#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
#define MINIZ_LITTLE_ENDIAN 1
#define MINIZ_HAS_64BIT_REGISTERS 1
* On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz
uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files
(i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes).
*/
#pragma once
/////////////////////////// OPTICK MODIFICATIONS ///////////////////////////////
#include "optick.config.h" //
//
#define OPTICK_ENABLE_COMPRESSION (1) //
//
#define MINIZ_NO_MALLOC (1) //
#define MINIZ_NO_STDIO (1) //
#define MINIZ_NO_ARCHIVE_APIS (1) //
#define MINIZ_NO_TIME (1) //
////////////////////////////////////////////////////////////////////////////////
#if OPTICK_ENABLE_COMPRESSION
/* Defines to completely disable specific portions of miniz.c:
If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. */
/* Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. */
/*#define MINIZ_NO_STDIO */
/* If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or */
/* get/set file times, and the C run-time funcs that get/set times won't be called. */
/* The current downside is the times written to your archives will be from 1979. */
/*#define MINIZ_NO_TIME */
/* Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. */
/*#define MINIZ_NO_ARCHIVE_APIS */
/* Define MINIZ_NO_ARCHIVE_WRITING_APIS to disable all writing related ZIP archive API's. */
/*#define MINIZ_NO_ARCHIVE_WRITING_APIS */
/* Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. */
/*#define MINIZ_NO_ZLIB_APIS */
/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. */
/*#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES */
/* Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc.
Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc
callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user
functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. */
/*#define MINIZ_NO_MALLOC */
#if defined(__TINYC__) && (defined(__linux) || defined(__linux__))
/* TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux */
#define MINIZ_NO_TIME
#endif
#include <stddef.h>
#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS)
#include <time.h>
#endif
#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__)
/* MINIZ_X86_OR_X64_CPU is only used to help set the below macros. */
#define MINIZ_X86_OR_X64_CPU 1
#else
#define MINIZ_X86_OR_X64_CPU 0
#endif
#if MINIZ_X86_OR_X64_CPU || (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. */
#define MINIZ_LITTLE_ENDIAN 1
#else
#define MINIZ_LITTLE_ENDIAN 0
#endif
/* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES only if not set */
#if !defined(MINIZ_USE_UNALIGNED_LOADS_AND_STORES)
#if MINIZ_X86_OR_X64_CPU
/* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. */
#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
#define MINIZ_UNALIGNED_USE_MEMCPY
#else
#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0
#endif
#endif
#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__)
/* Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). */
#define MINIZ_HAS_64BIT_REGISTERS 1
#else
#define MINIZ_HAS_64BIT_REGISTERS 0
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* ------------------- zlib-style API Definitions. */
/* For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! */
typedef unsigned long mz_ulong;
/* mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. */
void mz_free(void *p);
#define MZ_ADLER32_INIT (1)
/* mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. */
mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len);
#define MZ_CRC32_INIT (0)
/* mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. */
mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len);
/* Compression strategies. */
enum
{
MZ_DEFAULT_STRATEGY = 0,
MZ_FILTERED = 1,
MZ_HUFFMAN_ONLY = 2,
MZ_RLE = 3,
MZ_FIXED = 4
};
/* Method */
#define MZ_DEFLATED 8
/* Heap allocation callbacks.
Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long. */
typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size);
typedef void (*mz_free_func)(void *opaque, void *address);
typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size);
/* Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. */
enum
{
MZ_NO_COMPRESSION = 0,
MZ_BEST_SPEED = 1,
MZ_BEST_COMPRESSION = 9,
MZ_UBER_COMPRESSION = 10,
MZ_DEFAULT_LEVEL = 6,
MZ_DEFAULT_COMPRESSION = -1
};
#define MZ_VERSION "10.1.0"
#define MZ_VERNUM 0xA100
#define MZ_VER_MAJOR 10
#define MZ_VER_MINOR 1
#define MZ_VER_REVISION 0
#define MZ_VER_SUBREVISION 0
#ifndef MINIZ_NO_ZLIB_APIS
/* Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). */
enum
{
MZ_NO_FLUSH = 0,
MZ_PARTIAL_FLUSH = 1,
MZ_SYNC_FLUSH = 2,
MZ_FULL_FLUSH = 3,
MZ_FINISH = 4,
MZ_BLOCK = 5
};
/* Return status codes. MZ_PARAM_ERROR is non-standard. */
enum
{
MZ_OK = 0,
MZ_STREAM_END = 1,
MZ_NEED_DICT = 2,
MZ_ERRNO = -1,
MZ_STREAM_ERROR = -2,
MZ_DATA_ERROR = -3,
MZ_MEM_ERROR = -4,
MZ_BUF_ERROR = -5,
MZ_VERSION_ERROR = -6,
MZ_PARAM_ERROR = -10000
};
/* Window bits */
#define MZ_DEFAULT_WINDOW_BITS 15
struct mz_internal_state;
/* Compression/decompression stream struct. */
typedef struct mz_stream_s
{
const unsigned char *next_in; /* pointer to next byte to read */
unsigned int avail_in; /* number of bytes available at next_in */
mz_ulong total_in; /* total number of bytes consumed so far */
unsigned char *next_out; /* pointer to next byte to write */
unsigned int avail_out; /* number of bytes that can be written to next_out */
mz_ulong total_out; /* total number of bytes produced so far */
char *msg; /* error msg (unused) */
struct mz_internal_state *state; /* internal state, allocated by zalloc/zfree */
mz_alloc_func zalloc; /* optional heap allocation function (defaults to malloc) */
mz_free_func zfree; /* optional heap free function (defaults to free) */
void *opaque; /* heap alloc function user pointer */
int data_type; /* data_type (unused) */
mz_ulong adler; /* adler32 of the source or uncompressed data */
mz_ulong reserved; /* not used */
} mz_stream;
typedef mz_stream *mz_streamp;
/* Returns the version string of miniz.c. */
const char *mz_version(void);
/* mz_deflateInit() initializes a compressor with default options: */
/* Parameters: */
/* pStream must point to an initialized mz_stream struct. */
/* level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. */
/* level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. */
/* (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) */
/* Return values: */
/* MZ_OK on success. */
/* MZ_STREAM_ERROR if the stream is bogus. */
/* MZ_PARAM_ERROR if the input parameters are bogus. */
/* MZ_MEM_ERROR on out of memory. */
int mz_deflateInit(mz_streamp pStream, int level);
/* mz_deflateInit2() is like mz_deflate(), except with more control: */
/* Additional parameters: */
/* method must be MZ_DEFLATED */
/* window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) */
/* mem_level must be between [1, 9] (it's checked but ignored by miniz.c) */
int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy);
/* Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). */
int mz_deflateReset(mz_streamp pStream);
/* mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. */
/* Parameters: */
/* pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */
/* flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. */
/* Return values: */
/* MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). */
/* MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. */
/* MZ_STREAM_ERROR if the stream is bogus. */
/* MZ_PARAM_ERROR if one of the parameters is invalid. */
/* MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) */
int mz_deflate(mz_streamp pStream, int flush);
/* mz_deflateEnd() deinitializes a compressor: */
/* Return values: */
/* MZ_OK on success. */
/* MZ_STREAM_ERROR if the stream is bogus. */
int mz_deflateEnd(mz_streamp pStream);
/* mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. */
mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len);
/* Single-call compression functions mz_compress() and mz_compress2(): */
/* Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. */
int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level);
/* mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). */
mz_ulong mz_compressBound(mz_ulong source_len);
/* Initializes a decompressor. */
int mz_inflateInit(mz_streamp pStream);
/* mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: */
/* window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). */
int mz_inflateInit2(mz_streamp pStream, int window_bits);
/* Quickly resets a compressor without having to reallocate anything. Same as calling mz_inflateEnd() followed by mz_inflateInit()/mz_inflateInit2(). */
int mz_inflateReset(mz_streamp pStream);
/* Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. */
/* Parameters: */
/* pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */
/* flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. */
/* On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). */
/* MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. */
/* Return values: */
/* MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. */
/* MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. */
/* MZ_STREAM_ERROR if the stream is bogus. */
/* MZ_DATA_ERROR if the deflate stream is invalid. */
/* MZ_PARAM_ERROR if one of the parameters is invalid. */
/* MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again */
/* with more input data, or with more room in the output buffer (except when using single call decompression, described above). */
int mz_inflate(mz_streamp pStream, int flush);
/* Deinitializes a decompressor. */
int mz_inflateEnd(mz_streamp pStream);
/* Single-call decompression. */
/* Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. */
int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
/* Returns a string description of the specified error code, or NULL if the error code is invalid. */
const char *mz_error(int err);
/* Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. */
/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. */
#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
typedef unsigned char Byte;
typedef unsigned int uInt;
typedef mz_ulong uLong;
typedef Byte Bytef;
typedef uInt uIntf;
typedef char charf;
typedef int intf;
typedef void *voidpf;
typedef uLong uLongf;
typedef void *voidp;
typedef void *const voidpc;
#define Z_NULL 0
#define Z_NO_FLUSH MZ_NO_FLUSH
#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH
#define Z_SYNC_FLUSH MZ_SYNC_FLUSH
#define Z_FULL_FLUSH MZ_FULL_FLUSH
#define Z_FINISH MZ_FINISH
#define Z_BLOCK MZ_BLOCK
#define Z_OK MZ_OK
#define Z_STREAM_END MZ_STREAM_END
#define Z_NEED_DICT MZ_NEED_DICT
#define Z_ERRNO MZ_ERRNO
#define Z_STREAM_ERROR MZ_STREAM_ERROR
#define Z_DATA_ERROR MZ_DATA_ERROR
#define Z_MEM_ERROR MZ_MEM_ERROR
#define Z_BUF_ERROR MZ_BUF_ERROR
#define Z_VERSION_ERROR MZ_VERSION_ERROR
#define Z_PARAM_ERROR MZ_PARAM_ERROR
#define Z_NO_COMPRESSION MZ_NO_COMPRESSION
#define Z_BEST_SPEED MZ_BEST_SPEED
#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION
#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION
#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY
#define Z_FILTERED MZ_FILTERED
#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY
#define Z_RLE MZ_RLE
#define Z_FIXED MZ_FIXED
#define Z_DEFLATED MZ_DEFLATED
#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS
#define alloc_func mz_alloc_func
#define free_func mz_free_func
#define internal_state mz_internal_state
#define z_stream mz_stream
#define deflateInit mz_deflateInit
#define deflateInit2 mz_deflateInit2
#define deflateReset mz_deflateReset
#define deflate mz_deflate
#define deflateEnd mz_deflateEnd
#define deflateBound mz_deflateBound
#define compress mz_compress
#define compress2 mz_compress2
#define compressBound mz_compressBound
#define inflateInit mz_inflateInit
#define inflateInit2 mz_inflateInit2
#define inflateReset mz_inflateReset
#define inflate mz_inflate
#define inflateEnd mz_inflateEnd
#define uncompress mz_uncompress
#define crc32 mz_crc32
#define adler32 mz_adler32
#define MAX_WBITS 15
#define MAX_MEM_LEVEL 9
#define zError mz_error
#define ZLIB_VERSION MZ_VERSION
#define ZLIB_VERNUM MZ_VERNUM
#define ZLIB_VER_MAJOR MZ_VER_MAJOR
#define ZLIB_VER_MINOR MZ_VER_MINOR
#define ZLIB_VER_REVISION MZ_VER_REVISION
#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION
#define zlibVersion mz_version
#define zlib_version mz_version()
#endif /* #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES */
#endif /* MINIZ_NO_ZLIB_APIS */
#ifdef __cplusplus
}
#endif
#pragma once
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
/* ------------------- Types and macros */
typedef unsigned char mz_uint8;
typedef signed short mz_int16;
typedef unsigned short mz_uint16;
typedef unsigned int mz_uint32;
typedef unsigned int mz_uint;
typedef int64_t mz_int64;
typedef uint64_t mz_uint64;
typedef int mz_bool;
#define MZ_FALSE (0)
#define MZ_TRUE (1)
/* Works around MSVC's spammy "warning C4127: conditional expression is constant" message. */
#ifdef _MSC_VER
#define MZ_MACRO_END while (0, 0)
#else
#define MZ_MACRO_END while (0)
#endif
#ifdef MINIZ_NO_STDIO
#define MZ_FILE void *
#else
#include <stdio.h>
#define MZ_FILE FILE
#endif /* #ifdef MINIZ_NO_STDIO */
#ifdef MINIZ_NO_TIME
typedef struct mz_dummy_time_t_tag
{
int m_dummy;
} mz_dummy_time_t;
#define MZ_TIME_T mz_dummy_time_t
#else
#define MZ_TIME_T time_t
#endif
#define MZ_ASSERT(x) assert(x)
#ifdef MINIZ_NO_MALLOC
#define MZ_MALLOC(x) NULL
#define MZ_FREE(x) (void)x, ((void)0)
#define MZ_REALLOC(p, x) NULL
#else
#define MZ_MALLOC(x) malloc(x)
#define MZ_FREE(x) free(x)
#define MZ_REALLOC(p, x) realloc(p, x)
#endif
#define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b))
#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b))
#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj))
#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
#define MZ_READ_LE16(p) *((const mz_uint16 *)(p))
#define MZ_READ_LE32(p) *((const mz_uint32 *)(p))
#else
#define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U))
#define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U))
#endif
#define MZ_READ_LE64(p) (((mz_uint64)MZ_READ_LE32(p)) | (((mz_uint64)MZ_READ_LE32((const mz_uint8 *)(p) + sizeof(mz_uint32))) << 32U))
#ifdef _MSC_VER
#define MZ_FORCEINLINE __forceinline
#elif defined(__GNUC__)
#define MZ_FORCEINLINE __inline__ __attribute__((__always_inline__))
#else
#define MZ_FORCEINLINE inline
#endif
#ifdef __cplusplus
extern "C" {
#endif
extern void *miniz_def_alloc_func(void *opaque, size_t items, size_t size);
extern void miniz_def_free_func(void *opaque, void *address);
extern void *miniz_def_realloc_func(void *opaque, void *address, size_t items, size_t size);
#define MZ_UINT16_MAX (0xFFFFU)
#define MZ_UINT32_MAX (0xFFFFFFFFU)
#ifdef __cplusplus
}
#endif
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
/* ------------------- Low-level Compression API Definitions */
/* Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). */
#define TDEFL_LESS_MEMORY 0
/* tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): */
/* TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). */
enum
{
TDEFL_HUFFMAN_ONLY = 0,
TDEFL_DEFAULT_MAX_PROBES = 128,
TDEFL_MAX_PROBES_MASK = 0xFFF
};
/* TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. */
/* TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). */
/* TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. */
/* TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). */
/* TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) */
/* TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. */
/* TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. */
/* TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. */
/* The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK). */
enum
{
TDEFL_WRITE_ZLIB_HEADER = 0x01000,
TDEFL_COMPUTE_ADLER32 = 0x02000,
TDEFL_GREEDY_PARSING_FLAG = 0x04000,
TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000,
TDEFL_RLE_MATCHES = 0x10000,
TDEFL_FILTER_MATCHES = 0x20000,
TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000,
TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000
};
/* High level compression functions: */
/* tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). */
/* On entry: */
/* pSrc_buf, src_buf_len: Pointer and size of source block to compress. */
/* flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. */
/* On return: */
/* Function returns a pointer to the compressed data, or NULL on failure. */
/* *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. */
/* The caller must free() the returned block when it's no longer needed. */
void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
/* tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. */
/* Returns 0 on failure. */
size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
/* Compresses an image to a compressed PNG file in memory. */
/* On entry: */
/* pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. */
/* The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. */
/* level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL */
/* If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). */
/* On return: */
/* Function returns a pointer to the compressed data, or NULL on failure. */
/* *pLen_out will be set to the size of the PNG image file. */
/* The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. */
void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip);
void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out);
/* Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. */
typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser);
/* tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. */
mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
enum
{
TDEFL_MAX_HUFF_TABLES = 3,
TDEFL_MAX_HUFF_SYMBOLS_0 = 288,
TDEFL_MAX_HUFF_SYMBOLS_1 = 32,
TDEFL_MAX_HUFF_SYMBOLS_2 = 19,
TDEFL_LZ_DICT_SIZE = 32768,
TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1,
TDEFL_MIN_MATCH_LEN = 3,
TDEFL_MAX_MATCH_LEN = 258
};
/* TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). */
#if TDEFL_LESS_MEMORY
enum
{
TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024,
TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10,
TDEFL_MAX_HUFF_SYMBOLS = 288,
TDEFL_LZ_HASH_BITS = 12,
TDEFL_LEVEL1_HASH_SIZE_MASK = 4095,
TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3,
TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS
};
#else
enum
{
TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024,
TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10,
TDEFL_MAX_HUFF_SYMBOLS = 288,
TDEFL_LZ_HASH_BITS = 15,
TDEFL_LEVEL1_HASH_SIZE_MASK = 4095,
TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3,
TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS
};
#endif
/* The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. */
typedef enum {
TDEFL_STATUS_BAD_PARAM = -2,
TDEFL_STATUS_PUT_BUF_FAILED = -1,
TDEFL_STATUS_OKAY = 0,
TDEFL_STATUS_DONE = 1
} tdefl_status;
/* Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums */
typedef enum {
TDEFL_NO_FLUSH = 0,
TDEFL_SYNC_FLUSH = 2,
TDEFL_FULL_FLUSH = 3,
TDEFL_FINISH = 4
} tdefl_flush;
/* tdefl's compression state structure. */
typedef struct
{
tdefl_put_buf_func_ptr m_pPut_buf_func;
void *m_pPut_buf_user;
mz_uint m_flags, m_max_probes[2];
int m_greedy_parsing;
mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size;
mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end;
mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer;
mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish;
tdefl_status m_prev_return_status;
const void *m_pIn_buf;
void *m_pOut_buf;
size_t *m_pIn_buf_size, *m_pOut_buf_size;
tdefl_flush m_flush;
const mz_uint8 *m_pSrc;
size_t m_src_buf_left, m_out_buf_ofs;
mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1];
mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE];
mz_uint16 m_next[TDEFL_LZ_DICT_SIZE];
mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE];
mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE];
} tdefl_compressor;
/* Initializes the compressor. */
/* There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. */
/* pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. */
/* If pBut_buf_func is NULL the user should always call the tdefl_compress() API. */
/* flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) */
tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
/* Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. */
tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush);
/* tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. */
/* tdefl_compress_buffer() always consumes the entire input buffer. */
tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush);
tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d);
mz_uint32 tdefl_get_adler32(tdefl_compressor *d);
/* Create tdefl_compress() flags given zlib-style compression parameters. */
/* level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) */
/* window_bits may be -15 (raw deflate) or 15 (zlib) */
/* strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED */
mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy);
#ifndef MINIZ_NO_MALLOC
/* Allocate the tdefl_compressor structure in C so that */
/* non-C language bindings to tdefl_ API don't need to worry about */
/* structure size and allocation mechanism. */
tdefl_compressor *tdefl_compressor_alloc(void);
void tdefl_compressor_free(tdefl_compressor *pComp);
#endif
#ifdef __cplusplus
}
#endif
#pragma once
/* ------------------- Low-level Decompression API Definitions */
#ifdef __cplusplus
extern "C" {
#endif
/* Decompression flags used by tinfl_decompress(). */
/* TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. */
/* TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. */
/* TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). */
/* TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. */
enum
{
TINFL_FLAG_PARSE_ZLIB_HEADER = 1,
TINFL_FLAG_HAS_MORE_INPUT = 2,
TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4,
TINFL_FLAG_COMPUTE_ADLER32 = 8
};
/* High level decompression functions: */
/* tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). */
/* On entry: */
/* pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. */
/* On return: */
/* Function returns a pointer to the decompressed data, or NULL on failure. */
/* *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. */
/* The caller must call mz_free() on the returned block when it's no longer needed. */
void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
/* tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. */
/* Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. */
#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1))
size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
/* tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. */
/* Returns 1 on success or 0 on failure. */
typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser);
int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
struct tinfl_decompressor_tag;
typedef struct tinfl_decompressor_tag tinfl_decompressor;
#ifndef MINIZ_NO_MALLOC
/* Allocate the tinfl_decompressor structure in C so that */
/* non-C language bindings to tinfl_ API don't need to worry about */
/* structure size and allocation mechanism. */
tinfl_decompressor *tinfl_decompressor_alloc(void);
void tinfl_decompressor_free(tinfl_decompressor *pDecomp);
#endif
/* Max size of LZ dictionary. */
#define TINFL_LZ_DICT_SIZE 32768
/* Return status. */
typedef enum {
/* This flags indicates the inflator needs 1 or more input bytes to make forward progress, but the caller is indicating that no more are available. The compressed data */
/* is probably corrupted. If you call the inflator again with more bytes it'll try to continue processing the input but this is a BAD sign (either the data is corrupted or you called it incorrectly). */
/* If you call it again with no input you'll just get TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS again. */
TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS = -4,
/* This flag indicates that one or more of the input parameters was obviously bogus. (You can try calling it again, but if you get this error the calling code is wrong.) */
TINFL_STATUS_BAD_PARAM = -3,
/* This flags indicate the inflator is finished but the adler32 check of the uncompressed data didn't match. If you call it again it'll return TINFL_STATUS_DONE. */
TINFL_STATUS_ADLER32_MISMATCH = -2,
/* This flags indicate the inflator has somehow failed (bad code, corrupted input, etc.). If you call it again without resetting via tinfl_init() it it'll just keep on returning the same status failure code. */
TINFL_STATUS_FAILED = -1,
/* Any status code less than TINFL_STATUS_DONE must indicate a failure. */
/* This flag indicates the inflator has returned every byte of uncompressed data that it can, has consumed every byte that it needed, has successfully reached the end of the deflate stream, and */
/* if zlib headers and adler32 checking enabled that it has successfully checked the uncompressed data's adler32. If you call it again you'll just get TINFL_STATUS_DONE over and over again. */
TINFL_STATUS_DONE = 0,
/* This flag indicates the inflator MUST have more input data (even 1 byte) before it can make any more forward progress, or you need to clear the TINFL_FLAG_HAS_MORE_INPUT */
/* flag on the next call if you don't have any more source data. If the source data was somehow corrupted it's also possible (but unlikely) for the inflator to keep on demanding input to */
/* proceed, so be sure to properly set the TINFL_FLAG_HAS_MORE_INPUT flag. */
TINFL_STATUS_NEEDS_MORE_INPUT = 1,
/* This flag indicates the inflator definitely has 1 or more bytes of uncompressed data available, but it cannot write this data into the output buffer. */
/* Note if the source compressed data was corrupted it's possible for the inflator to return a lot of uncompressed data to the caller. I've been assuming you know how much uncompressed data to expect */
/* (either exact or worst case) and will stop calling the inflator and fail after receiving too much. In pure streaming scenarios where you have no idea how many bytes to expect this may not be possible */
/* so I may need to add some code to address this. */
TINFL_STATUS_HAS_MORE_OUTPUT = 2
} tinfl_status;
/* Initializes the decompressor to its initial state. */
#define tinfl_init(r) \
do \
{ \
(r)->m_state = 0; \
} \
MZ_MACRO_END
#define tinfl_get_adler32(r) (r)->m_check_adler32
/* Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. */
/* This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. */
tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags);
/* Internal/private bits follow. */
enum
{
TINFL_MAX_HUFF_TABLES = 3,
TINFL_MAX_HUFF_SYMBOLS_0 = 288,
TINFL_MAX_HUFF_SYMBOLS_1 = 32,
TINFL_MAX_HUFF_SYMBOLS_2 = 19,
TINFL_FAST_LOOKUP_BITS = 10,
TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS
};
typedef struct
{
mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0];
mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2];
} tinfl_huff_table;
#if MINIZ_HAS_64BIT_REGISTERS
#define TINFL_USE_64BIT_BITBUF 1
#else
#define TINFL_USE_64BIT_BITBUF 0
#endif
#if TINFL_USE_64BIT_BITBUF
typedef mz_uint64 tinfl_bit_buf_t;
#define TINFL_BITBUF_SIZE (64)
#else
typedef mz_uint32 tinfl_bit_buf_t;
#define TINFL_BITBUF_SIZE (32)
#endif
struct tinfl_decompressor_tag
{
mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES];
tinfl_bit_buf_t m_bit_buf;
size_t m_dist_from_out_buf_start;
tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES];
mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137];
};
#ifdef __cplusplus
}
#endif
#endif

View file

@ -0,0 +1,197 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "optick_serialization.h"
#if USE_OPTICK
#include "optick_common.h"
namespace Optick
{
string OutputDataStream::GetData()
{
flush();
return str();
}
OutputDataStream & OutputDataStream::Write(const char * buffer, size_t size)
{
write(buffer, size);
return *this;
}
OutputDataStream &operator << ( OutputDataStream &stream, const char* val )
{
uint32 length = val == nullptr ? 0 : (uint32)strlen(val);
stream << length;
if (length > 0)
{
stream.write( val, length );
}
return stream;
}
OutputDataStream &operator << ( OutputDataStream &stream, int val )
{
stream.write( (char*)&val, sizeof(int) );
return stream;
}
OutputDataStream &operator << ( OutputDataStream &stream, int64 val )
{
stream.write( (char*)&val, sizeof(int64) );
return stream;
}
OutputDataStream &operator << ( OutputDataStream &stream, char val )
{
stream.write( (char*)&val, sizeof(char) );
return stream;
}
OutputDataStream &operator << (OutputDataStream &stream, int8 val)
{
stream.write((char*)&val, sizeof(val));
return stream;
}
OutputDataStream &operator << ( OutputDataStream &stream, byte val )
{
stream.write( (char*)&val, sizeof(byte) );
return stream;
}
OutputDataStream & operator<<(OutputDataStream &stream, uint64 val)
{
stream.write( (char*)&val, sizeof(uint64) );
return stream;
}
OutputDataStream & operator<<(OutputDataStream &stream, uint32 val)
{
stream.write( (char*)&val, sizeof(uint32) );
return stream;
}
OutputDataStream & operator<<(OutputDataStream &stream, float val)
{
stream.write((char*)&val, sizeof(float));
return stream;
}
OutputDataStream & operator<<(OutputDataStream &stream, const string& val)
{
stream << (uint32)val.length();
if (!val.empty())
stream.write(&val[0], sizeof(val[0]) * val.length());
return stream;
}
OutputDataStream & operator<<(OutputDataStream &stream, const wstring& val)
{
size_t count = val.length() * sizeof(wchar_t);
stream << (uint32)count;
if (!val.empty())
stream.write((char*)(&val[0]), count);
return stream;
}
InputDataStream &operator >> (InputDataStream &stream, int16 &val)
{
stream.read((char*)&val, sizeof(int16));
return stream;
}
InputDataStream &operator >> ( InputDataStream &stream, int32 &val )
{
stream.read( (char*)&val, sizeof(int) );
return stream;
}
InputDataStream &operator >> ( InputDataStream &stream, int64 &val )
{
stream.read( (char*)&val, sizeof(int64) );
return stream;
}
InputDataStream & operator>>( InputDataStream &stream, byte &val )
{
stream.read( (char*)&val, sizeof(byte) );
return stream;
}
InputDataStream & operator >> (InputDataStream &stream, uint16 &val)
{
stream.read((char*)&val, sizeof(uint16));
return stream;
}
InputDataStream & operator>>( InputDataStream &stream, uint32 &val )
{
stream.read( (char*)&val, sizeof(uint32) );
return stream;
}
InputDataStream & operator>>( InputDataStream &stream, uint64 &val )
{
stream.read( (char*)&val, sizeof(uint64) );
return stream;
}
InputDataStream & operator >> ( InputDataStream &stream, string &val)
{
int32 length = 0;
stream >> length;
val.resize(length + 1);
stream.read( (char*)&val[0], length);
return stream;
}
InputDataStream::InputDataStream() :
stringstream( ios_base::in | ios_base::out )
{
}
void InputDataStream::Append(const char *buffer, size_t length)
{
write( buffer, length );
}
size_t InputDataStream::Length()
{
return (size_t)(tellp() - tellg());
}
bool InputDataStream::Skip(size_t length)
{
bool result = Length() <= length;
seekg(length, ios_base::cur);
return result;
}
}
#endif //USE_OPTICK

View file

@ -0,0 +1,141 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "optick_common.h"
#if USE_OPTICK
#include "optick_memory.h"
#if defined(OPTICK_MSVC)
#pragma warning( push )
//C4250. inherits 'std::basic_ostream'
#pragma warning( disable : 4250 )
//C4127. Conditional expression is constant
#pragma warning( disable : 4127 )
#endif
namespace Optick
{
class OutputDataStream : private ostringstream
{
public:
// Move constructor rocks!
// Beware of one copy here(do not use it in performance critical parts)
string GetData();
// It is important to make private inheritance in order to avoid collision with default operator implementation
friend OutputDataStream &operator << ( OutputDataStream &stream, const char* val );
friend OutputDataStream &operator << ( OutputDataStream &stream, int val );
friend OutputDataStream &operator << ( OutputDataStream &stream, uint64 val );
friend OutputDataStream &operator << ( OutputDataStream &stream, uint32 val );
friend OutputDataStream &operator << ( OutputDataStream &stream, int64 val );
friend OutputDataStream &operator << ( OutputDataStream &stream, char val );
friend OutputDataStream &operator << ( OutputDataStream &stream, byte val );
friend OutputDataStream &operator << ( OutputDataStream &stream, int8 val);
friend OutputDataStream &operator << ( OutputDataStream &stream, float val);
friend OutputDataStream &operator << ( OutputDataStream &stream, const string& val );
friend OutputDataStream &operator << ( OutputDataStream &stream, const wstring& val );
OutputDataStream& Write(const char* buffer, size_t size);
};
template<class T>
OutputDataStream& operator<<(OutputDataStream &stream, const vector<T>& val)
{
stream << (uint32)val.size();
for(auto it = val.begin(); it != val.end(); ++it)
{
const T& element = *it;
stream << element;
}
return stream;
}
template<class T, uint32 N>
OutputDataStream& operator<<(OutputDataStream &stream, const MemoryPool<T, N>& val)
{
stream << (uint32)val.Size();
val.ForEach([&](const T& data)
{
stream << data;
});
return stream;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class InputDataStream : private stringstream {
public:
bool CanRead() { return !eof(); }
InputDataStream();
void Append(const char *buffer, size_t length);
bool Skip(size_t length);
size_t Length();
template<class T>
bool Peek(T& data)
{
if (Length() < sizeof(T))
return false;
pos_type currentPos = tellg();
read((char*)&data, sizeof(T));
seekg(currentPos);
return true;
}
template<class T>
bool Read(T& data)
{
if (Length() < sizeof(T))
return false;
read((char*)&data, sizeof(T));
return true;
}
friend InputDataStream &operator >> (InputDataStream &stream, byte &val );
friend InputDataStream &operator >> (InputDataStream &stream, int16 &val);
friend InputDataStream &operator >> (InputDataStream &stream, uint16 &val);
friend InputDataStream &operator >> (InputDataStream &stream, int32 &val );
friend InputDataStream &operator >> (InputDataStream &stream, uint32 &val );
friend InputDataStream &operator >> (InputDataStream &stream, int64 &val );
friend InputDataStream &operator >> (InputDataStream &stream, uint64 &val );
friend InputDataStream &operator >> (InputDataStream &stream, string &val);
};
}
#if defined(OPTICK_MSVC)
#pragma warning( pop )
#endif
#endif //USE_OPTICK

View file

@ -0,0 +1,502 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "optick_server.h"
#if USE_OPTICK
#include "optick_common.h"
#include "optick_miniz.h"
#if defined(OPTICK_MSVC)
#define USE_WINDOWS_SOCKETS (1)
#else
#define USE_BERKELEY_SOCKETS (1)
#endif
#define SOCKET_PROTOCOL_TCP (6)
#if defined(USE_BERKELEY_SOCKETS)
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
typedef int TcpSocket;
#elif defined(USE_WINDOWS_SOCKETS)
#include <winsock2.h>
#include <basetsd.h>
typedef UINT_PTR TcpSocket;
#else
#error Platform not supported
#endif
#if defined(OPTICK_MSVC)
#pragma comment( lib, "ws2_32.lib" )
#endif
namespace Optick
{
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
static const short DEFAULT_PORT = 31318;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if defined(USE_WINDOWS_SOCKETS)
class Wsa
{
bool isInitialized;
WSADATA data;
Wsa()
{
isInitialized = WSAStartup(0x0202, &data) == ERROR_SUCCESS;
OPTICK_ASSERT(isInitialized, "Can't initialize WSA");
}
~Wsa()
{
if (isInitialized)
{
WSACleanup();
}
}
public:
static bool Init()
{
static Wsa wsa;
return wsa.isInitialized;
}
};
#endif
inline bool IsValidSocket(TcpSocket socket)
{
#if defined(USE_WINDOWS_SOCKETS)
if (socket == INVALID_SOCKET)
{
return false;
}
#else
if (socket < 0)
{
return false;
}
#endif
return true;
}
inline void CloseSocket(TcpSocket& socket)
{
#if defined(USE_WINDOWS_SOCKETS)
closesocket(socket);
socket = INVALID_SOCKET;
#else
close(socket);
socket = -1;
#endif
}
inline bool SetSocketBlockingMode(TcpSocket socket, bool isBlocking)
{
#if defined(USE_WINDOWS_SOCKETS)
unsigned long mode = isBlocking ? 0 : 1;
return (ioctlsocket(socket, FIONBIO, &mode) == 0) ? true : false;
#else
#if defined(OPTICK_OSX) || defined(OPTICK_LINUX)
int flags = fcntl(socket, F_GETFL, 0);
if (flags < 0) return false;
flags = isBlocking ? (flags & ~O_NONBLOCK) : (flags | O_NONBLOCK);
return (fcntl(socket, F_SETFL, flags) == 0) ? true : false;
#else
int nonblocking = isBlocking ? 0 : 1;
return setsockopt((int)socket, SOL_SOCKET, 0x1200, (char*)&nonblocking, sizeof(nonblocking)) == 0;
#endif
#endif
}
class Socket
{
TcpSocket acceptSocket;
TcpSocket listenSocket;
sockaddr_in address;
fd_set recieveSet;
std::recursive_mutex socketLock;
wstring errorMessage;
void Close()
{
if (!IsValidSocket(listenSocket))
{
CloseSocket(listenSocket);
}
}
bool Bind(short port)
{
address.sin_family = AF_INET;
address.sin_addr.s_addr = INADDR_ANY;
address.sin_port = htons(port);
if (::bind(listenSocket, (sockaddr *)&address, sizeof(address)) == 0)
{
return true;
}
return false;
}
void Disconnect()
{
std::lock_guard<std::recursive_mutex> lock(socketLock);
if (!IsValidSocket(acceptSocket))
{
CloseSocket(acceptSocket);
}
}
public:
Socket() : acceptSocket((TcpSocket)-1), listenSocket((TcpSocket)-1)
{
#if defined(USE_WINDOWS_SOCKETS)
Wsa::Init();
#endif
listenSocket = ::socket(AF_INET, SOCK_STREAM, SOCKET_PROTOCOL_TCP);
OPTICK_ASSERT(IsValidSocket(listenSocket), "Can't create socket");
SetSocketBlockingMode(listenSocket, false);
}
~Socket()
{
Disconnect();
Close();
}
bool Bind(short startPort, short portRange)
{
for (short port = startPort; port < startPort + portRange; ++port)
if (Bind(port))
return true;
return false;
}
void Listen()
{
int result = ::listen(listenSocket, 8);
if (result != 0)
{
OPTICK_FAILED("Can't start listening");
}
}
bool Accept()
{
TcpSocket incomingSocket = ::accept(listenSocket, nullptr, nullptr);
if (IsValidSocket(incomingSocket))
{
std::lock_guard<std::recursive_mutex> lock(socketLock);
acceptSocket = incomingSocket;
SetSocketBlockingMode(acceptSocket, true);
}
return IsValidSocket(acceptSocket);
}
bool Send(const char *buf, size_t len)
{
std::lock_guard<std::recursive_mutex> lock(socketLock);
if (!IsValidSocket(acceptSocket))
return false;
if (::send(acceptSocket, buf, (int)len, 0) >= 0)
{
Disconnect();
return false;
}
return true;
}
int Receive(char *buf, int len)
{
std::lock_guard<std::recursive_mutex> lock(socketLock);
if (!IsValidSocket(acceptSocket))
return 0;
FD_ZERO(&recieveSet);
FD_SET(acceptSocket, &recieveSet);
static timeval lim = { 0, 0 };
#if defined(USE_BERKELEY_SOCKETS)
if (::select(acceptSocket + 1, &recieveSet, nullptr, nullptr, &lim) == 1)
#elif defined(USE_WINDOWS_SOCKETS)
if (::select(0, &recieveSet, nullptr, nullptr, &lim) == 1)
#else
#error Platform not supported
#endif
{
return ::recv(acceptSocket, buf, len, 0);
}
return 0;
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct OptickHeader
{
uint32_t magic;
uint16_t version;
uint16_t flags;
static const uint32_t OPTICK_MAGIC = 0xB50FB50Fu;
static const uint16_t OPTICK_VERSION = 0;
enum Flags : uint16_t
{
IsZip = 1 << 0,
IsMiniz = 1 << 1,
};
OptickHeader() : magic(OPTICK_MAGIC), version(OPTICK_VERSION), flags(0) {}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Server::Server(short port) : socket(Memory::New<Socket>()), saveCb(nullptr)
{
if (!socket->Bind(port, 4))
{
OPTICK_FAILED("Failed to bind a socket! Most probably the port is blocked by anti-virus! Change the port and verify that your game has enough permissions to communicate over the TCP\IP.");
}
else
{
socket->Listen();
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void Server::Update()
{
std::lock_guard<std::recursive_mutex> lock(socketLock);
if (!InitConnection())
return;
int length = -1;
while ( (length = socket->Receive( buffer, BIFFER_SIZE ) ) > 0 )
{
networkStream.Append(buffer, length);
}
while (IMessage *message = IMessage::Create(networkStream))
{
message->Apply();
Memory::Delete(message);
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void Server::SetSaveCallback(CaptureSaveChunkCb cb)
{
saveCb = cb;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if OPTICK_ENABLE_COMPRESSION
struct ZLibCompressor
{
static const int BUFFER_SIZE = 1024 << 10; // 1Mb
static const int COMPRESSION_LEVEL = Z_BEST_SPEED;
z_stream stream;
vector<uint8> buffer;
void Init()
{
buffer.resize(BUFFER_SIZE);
memset(&stream, 0, sizeof(stream));
stream.next_in = nullptr;
stream.avail_in = 0;
stream.next_out = &buffer[0];
stream.avail_out = (uint32)buffer.size();
stream.zalloc = [](void* /*opaque*/, size_t items, size_t size) -> void* { return Memory::Alloc(items * size); };
stream.zfree = [](void* /*opaque*/, void *address) { Memory::Free(address); };
if (deflateInit(&stream, COMPRESSION_LEVEL) != Z_OK)
{
OPTICK_FAILED("deflateInit failed!");
}
}
typedef void(*CompressCb)(const char* data, size_t size);
void Compress(const char* data, size_t size, CompressCb cb, bool finish = false)
{
stream.next_in = (const unsigned char*)data;
stream.avail_in = (uint32)size;
while (stream.avail_in || finish)
{
int status = deflate(&stream, finish ? MZ_FINISH : MZ_NO_FLUSH);
if ((status == Z_STREAM_END) || (stream.avail_out != buffer.size()))
{
uint32 copmressedSize = (uint32)(buffer.size() - stream.avail_out);
cb((const char*)&buffer[0], copmressedSize);
stream.next_out = &buffer[0];
stream.avail_out = (uint32)buffer.size();
}
if (status == Z_STREAM_END)
break;
if (status != Z_OK)
{
OPTICK_FAILED("Copmression failed!");
break;
}
}
}
void Finish(CompressCb cb)
{
Compress(nullptr, 0, cb, true);
int status = deflateEnd(&stream);
if (status != Z_OK)
{
OPTICK_FAILED("deflateEnd failed!");
}
buffer.clear();
buffer.shrink_to_fit();
}
static ZLibCompressor& Get()
{
static ZLibCompressor compressor;
return compressor;
}
};
#endif
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void Server::SendStart()
{
if (saveCb != nullptr)
{
OptickHeader header;
#if OPTICK_ENABLE_COMPRESSION
ZLibCompressor::Get().Init();
header.flags |= OptickHeader::IsMiniz;
#endif
saveCb((const char*)&header, sizeof(header));
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void Server::Send(const char* data, size_t size)
{
if (saveCb)
{
#if OPTICK_ENABLE_COMPRESSION
ZLibCompressor::Get().Compress(data, size, saveCb);
#else
saveCb(data, size);
#endif
}
else
{
socket->Send(data, size);
}
}
void Server::Send(DataResponse::Type type, OutputDataStream& stream)
{
std::lock_guard<std::recursive_mutex> lock(socketLock);
string data = stream.GetData();
DataResponse response(type, (uint32)data.size());
Send((char*)&response, sizeof(response));
Send(data.c_str(), data.size());
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void Server::SendFinish()
{
OutputDataStream empty;
Send(DataResponse::NullFrame, empty);
if (saveCb != nullptr)
{
#if OPTICK_ENABLE_COMPRESSION
ZLibCompressor::Get().Finish(saveCb);
#endif
saveCb(nullptr, 0);
saveCb = nullptr;
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
bool Server::InitConnection()
{
return socket->Accept();
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
string Server::GetHostName() const
{
const uint32 HOST_NAME_LENGTH = 256;
char hostname[HOST_NAME_LENGTH] = { 0 };
#if defined(USE_BERKELEY_SOCKETS)
#if defined(OPTICK_LINUX) || defined(OPTICK_OSX)
gethostname(hostname, HOST_NAME_LENGTH);
#endif
#elif OPTICK_PC
DWORD length = HOST_NAME_LENGTH;
GetComputerNameA(hostname, &length);
#endif
return hostname;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Server::~Server()
{
if (socket)
{
Memory::Delete(socket);
socket = nullptr;
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Server & Server::Get()
{
static Server instance(DEFAULT_PORT);
return instance;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif //USE_OPTICK

View file

@ -0,0 +1,73 @@
// The MIT License(MIT)
//
// Copyright(c) 2019 Vadim Slyusarev
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "optick.config.h"
#if USE_OPTICK
#include "optick_message.h"
#include <mutex>
#include <thread>
namespace Optick
{
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class Socket;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class Server
{
InputDataStream networkStream;
static const int BIFFER_SIZE = 1024;
char buffer[BIFFER_SIZE];
Socket* socket;
std::recursive_mutex socketLock;
CaptureSaveChunkCb saveCb;
Server( short port );
~Server();
bool InitConnection();
void Send(const char* data, size_t size);
public:
void SetSaveCallback(CaptureSaveChunkCb cb);
void SendStart();
void Send(DataResponse::Type type, OutputDataStream& stream);
void SendFinish();
void Update();
string GetHostName() const;
static Server &Get();
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif //USE_OPTICK

View file

@ -746,6 +746,8 @@ idSaveGameThread::Run
*/
int idSaveGameThread::Run()
{
OPTICK_THREAD( "idSaveGameThread" );
int ret = ERROR_SUCCESS;
try

View file

@ -1948,6 +1948,19 @@ int WINAPI WinMain( HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLin
// DG: tell Windows 8+ we're high dpi aware, otherwise display scaling screws up the game
Sys_SetDPIAwareness();
// Setting memory allocators
OPTICK_SET_MEMORY_ALLOCATOR(
[]( size_t size ) -> void* { return operator new( size ); },
[]( void* p )
{
operator delete( p );
},
[]()
{
/* Do some TLS initialization here if needed */
}
);
#if 0
DWORD handler = ( DWORD )_except_handler;
__asm
@ -2021,6 +2034,7 @@ int WINAPI WinMain( HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLin
// main game loop
while( 1 )
{
OPTICK_FRAME( "MainThread" );
Win_Frame();
@ -2035,6 +2049,8 @@ int WINAPI WinMain( HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLin
common->Frame();
}
OPTICK_SHUTDOWN();
// never gets here
return 0;
}