diff --git a/neo/CMakeLists.txt b/neo/CMakeLists.txt index 2a620caa..f1c72b92 100644 --- a/neo/CMakeLists.txt +++ b/neo/CMakeLists.txt @@ -48,28 +48,31 @@ option(WINDOWS10 "Build for Windows 10+" ON) option(USE_SYSTEM_ZLIB - "Use the system zlib instead of the bundled one" OFF) + "Use the system zlib instead of the bundled one" OFF) option(USE_SYSTEM_LIBPNG - "Use the system libpng instead of the bundled one" OFF) + "Use the system libpng instead of the bundled one" OFF) option(USE_SYSTEM_LIBJPEG - "Use the system libjpeg instead of the bundled one" OFF) + "Use the system libjpeg instead of the bundled one" OFF) option(USE_SYSTEM_LIBGLEW - "Use the system libglew instead of the bundled one" OFF) + "Use the system libglew instead of the bundled one" OFF) option(USE_SYSTEM_RAPIDJSON - "Use the system rapidjson instead of the bundled one" OFF) + "Use the system rapidjson instead of the bundled one" OFF) option(USE_DX12 - "Use DirectX 12" ON) + "Use DirectX 12" ON) option(USE_VULKAN - "Use Vulkan" ON) + "Use Vulkan" ON) option(USE_VMA - "Use VMA allocator instead of the NVRHI builtin one" ON) + "Use VMA allocator instead of the NVRHI builtin one" ON) + +option(OPTICK + "Enable profiling with Optick" OFF) set(NVRHI_INSTALL OFF) @@ -466,7 +469,7 @@ include_directories(${NVRHI_DIR}/include) if(USE_SYSTEM_RAPIDJSON) -find_package(rapidjson REQUIRED) + find_package(rapidjson REQUIRED) endif(USE_SYSTEM_RAPIDJSON) if (RAPIDJSON_FOUND) @@ -476,6 +479,17 @@ else (RAPIDJSON_FOUND) include_directories("libs/rapidjson/include") endif (RAPIDJSON_FOUND) + +include_directories("libs/optick") + +if(OPTICK) + file(GLOB OPTICK_INCLUDES libs/optick/*.h) + file(GLOB OPTICK_SOURCES libs/optick/*.cpp) + + source_group("libs\\optick" FILES ${OPTICK_INCLUDES}) + source_group("libs\\optick" FILES ${OPTICK_SOURCES}) +endif() + add_subdirectory(idlib) file(GLOB NATVIS_SOURCES .natvis) @@ -1348,6 +1362,7 @@ set(RBDOOM3_INCLUDES #${FREETYPE_SOURCES} ${SOUND_INCLUDES} ${OGGVORBIS_INCLUDES} + ${OPTICK_INCLUDES} ${UI_INCLUDES} ${SWF_INCLUDES} ${COMMON_INCLUDES} @@ -1397,6 +1412,7 @@ set(RBDOOM3_SOURCES #${FREETYPE_SOURCES} ${SOUND_SOURCES} ${OGGVORBIS_SOURCES} + ${OPTICK_SOURCES} ${UI_SOURCES} ${SWF_SOURCES} ${COMMON_SOURCES} @@ -1556,7 +1572,7 @@ if(MSVC) if(USE_PRECOMPILED_HEADERS) set(RBDOOM3_PRECOMPILED_SOURCES ${RBDOOM3_SOURCES}) - list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${TIMIDITY_SOURCES} ${JPEG_SOURCES} ${PNG_SOURCES} ${ZLIB_SOURCES} ${GLEW_SOURCES} ${BINKDEC_SOURCES} ${IMGUI_SOURCES} ${MIKKTSPACE_SOURCES} ${OGGVORBIS_SOURCES}) + list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${TIMIDITY_SOURCES} ${JPEG_SOURCES} ${PNG_SOURCES} ${ZLIB_SOURCES} ${GLEW_SOURCES} ${BINKDEC_SOURCES} ${IMGUI_SOURCES} ${MIKKTSPACE_SOURCES} ${OGGVORBIS_SOURCES} ${OPTICK_SOURCES}) list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/libs/zlib/minizip/ioapi.c) list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/renderer/DXT/DXTDecoder.cpp) list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/renderer/DXT/DXTEncoder.cpp) @@ -1698,7 +1714,7 @@ else() if(USE_PRECOMPILED_HEADERS) set(RBDOOM3_PRECOMPILED_SOURCES ${RBDOOM3_SOURCES}) - list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${TIMIDITY_SOURCES} ${JPEG_SOURCES} ${PNG_SOURCES} ${ZLIB_SOURCES} ${GLEW_SOURCES} ${BINKDEC_SOURCES} ${IMGUI_SOURCES} ${MIKKTSPACE_SOURCES} ${OGGVORBIS_SOURCES}) + list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${TIMIDITY_SOURCES} ${JPEG_SOURCES} ${PNG_SOURCES} ${ZLIB_SOURCES} ${GLEW_SOURCES} ${BINKDEC_SOURCES} ${IMGUI_SOURCES} ${MIKKTSPACE_SOURCES} ${OGGVORBIS_SOURCES} ${OPTICK_SOURCES}) list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/libs/zlib/minizip/ioapi.c) list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/renderer/DXT/DXTDecoder.cpp) list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/renderer/DXT/DXTEncoder.cpp) @@ -1838,6 +1854,11 @@ if(USE_VULKAN) target_link_libraries(RBDoom3BFG nvrhi_vk) endif() +if(OPTICK) + target_compile_definitions(RBDoom3BFG PUBLIC USE_OPTICK=1) +else() + target_compile_definitions(RBDoom3BFG PUBLIC USE_OPTICK=0) +endif() # needs to come after nvrhi_d3d11 etc. for link order target_link_libraries(RBDoom3BFG nvrhi) diff --git a/neo/cmake-vs2019-win64-optick-profiling.bat b/neo/cmake-vs2019-win64-optick-profiling.bat new file mode 100644 index 00000000..4a5ef1de --- /dev/null +++ b/neo/cmake-vs2019-win64-optick-profiling.bat @@ -0,0 +1,6 @@ +cd .. +del /s /q build +mkdir build +cd build +cmake -G "Visual Studio 16" -A x64 -DFFMPEG=OFF -DBINKDEC=ON -DOPTICK=ON ../neo +pause \ No newline at end of file diff --git a/neo/d3xp/Game_local.cpp b/neo/d3xp/Game_local.cpp index 4a7790f8..284adf40 100644 --- a/neo/d3xp/Game_local.cpp +++ b/neo/d3xp/Game_local.cpp @@ -2616,6 +2616,8 @@ idGameLocal::RunFrame */ void idGameLocal::RunFrame( idUserCmdMgr& cmdMgr, gameReturn_t& ret ) { + SCOPED_PROFILE_EVENT( "RunFrame" ); + idEntity* ent; int num; float ms; diff --git a/neo/framework/Common.h b/neo/framework/Common.h index a4a9b369..dc142758 100644 --- a/neo/framework/Common.h +++ b/neo/framework/Common.h @@ -103,7 +103,11 @@ public: } }; -#define SCOPED_PROFILE_EVENT( x ) idScopedProfileEvent scopedProfileEvent_##__LINE__( x ) +#if USE_OPTICK + #define SCOPED_PROFILE_EVENT( x ) OPTICK_EVENT( x ) +#else + #define SCOPED_PROFILE_EVENT( x ) idScopedProfileEvent scopedProfileEvent_##__LINE__( x ) +#endif ID_INLINE bool BeginTraceRecording( const char* szName ) { diff --git a/neo/framework/File_SaveGame.cpp b/neo/framework/File_SaveGame.cpp index dc54eb65..65e2e94b 100644 --- a/neo/framework/File_SaveGame.cpp +++ b/neo/framework/File_SaveGame.cpp @@ -71,6 +71,8 @@ class idSGFcompressThread : public idSysThread public: virtual int Run() { + OPTICK_THREAD( "idSGFcompressThread" ); + sgf->CompressBlock(); return 0; } @@ -81,6 +83,8 @@ class idSGFdecompressThread : public idSysThread public: virtual int Run() { + OPTICK_THREAD( "idSGFdecompressThread" ); + sgf->DecompressBlock(); return 0; } @@ -91,6 +95,8 @@ class idSGFwriteThread : public idSysThread public: virtual int Run() { + OPTICK_THREAD( "idSGFwriteThread" ); + sgf->WriteBlock(); return 0; } @@ -101,6 +107,8 @@ class idSGFreadThread : public idSysThread public: virtual int Run() { + OPTICK_THREAD( "idSGFreadThread" ); + sgf->ReadBlock(); return 0; } diff --git a/neo/framework/common_frame.cpp b/neo/framework/common_frame.cpp index 905777fe..36a3a5ec 100644 --- a/neo/framework/common_frame.cpp +++ b/neo/framework/common_frame.cpp @@ -90,6 +90,8 @@ be called directly in the foreground thread for comparison. */ int idGameThread::Run() { + OPTICK_THREAD( "idGameThread" ); + commonLocal.frameTiming.startGameTime = Sys_Microseconds(); // debugging tool to test frame dropping behavior diff --git a/neo/idlib/CMakeLists.txt b/neo/idlib/CMakeLists.txt index 71c290a5..79232ffe 100644 --- a/neo/idlib/CMakeLists.txt +++ b/neo/idlib/CMakeLists.txt @@ -144,6 +144,11 @@ else() endif() +if(OPTICK) + target_compile_definitions(idlib PUBLIC USE_OPTICK=1) +else() + target_compile_definitions(idlib PUBLIC USE_OPTICK=0) +endif() # if(MSVC) # # set_source_files_properties(precompiled.cpp diff --git a/neo/idlib/ParallelJobList.cpp b/neo/idlib/ParallelJobList.cpp index c05857c9..54b14c5d 100644 --- a/neo/idlib/ParallelJobList.cpp +++ b/neo/idlib/ParallelJobList.cpp @@ -1145,6 +1145,8 @@ idJobThread::Run */ int idJobThread::Run() { + OPTICK_THREAD( GetName() ); + threadJobListState_t threadJobListState[MAX_JOBLISTS]; int numJobLists = 0; int lastStalledJobList = -1; diff --git a/neo/idlib/precompiled.h b/neo/idlib/precompiled.h index aa156c30..90dfe8d3 100644 --- a/neo/idlib/precompiled.h +++ b/neo/idlib/precompiled.h @@ -105,6 +105,11 @@ const int MAX_EXPRESSION_REGISTERS = 4096; #include #endif +// RB: make Optick profiling available everywhere +#if defined( USE_OPTICK ) + #include "../libs/optick/optick.h" +#endif + #include "../renderer/Cinematic.h" #include "../renderer/Material.h" #include "../renderer/BufferObject.h" diff --git a/neo/libs/optick/LICENSE b/neo/libs/optick/LICENSE new file mode 100644 index 00000000..7d2f3684 --- /dev/null +++ b/neo/libs/optick/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2019 Vadim Slyusarev + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/neo/libs/optick/optick.config.h b/neo/libs/optick/optick.config.h new file mode 100644 index 00000000..0270bf94 --- /dev/null +++ b/neo/libs/optick/optick.config.h @@ -0,0 +1,73 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// GLOBAL SETTINGS +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// [x] USE_OPTICK - (Master Switch) +// [x] OPTICK_ENABLE_TRACING - (Enable Kernel-level tracing) +// [x] OPTICK_ENABLE_GPU_D3D12 - (GPU D3D12) +// [x] OPTICK_ENABLE_GPU_VULKAN - (GPU VULKAN) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// MASTER SWITCH - use it for disabling profiler in final builds // +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if !defined(USE_OPTICK) +#define USE_OPTICK (1) +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Enable Low-level platform-specific tracing (Switch Contexts, Autosampling, etc.) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if !defined(OPTICK_ENABLE_TRACING) +#define OPTICK_ENABLE_TRACING (USE_OPTICK /*&& 0*/) +#endif //OPTICK_ENABLE_TRACING +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// GPU Counters +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if !defined(OPTICK_ENABLE_GPU) +#define OPTICK_ENABLE_GPU (USE_OPTICK /*&& 0*/) +#endif //OPTICK_ENABLE_GPU + +// D3D12 +#if !defined(OPTICK_ENABLE_GPU_D3D12) +#if defined(_MSC_VER) +#define OPTICK_ENABLE_GPU_D3D12 (OPTICK_ENABLE_GPU /*&& 0*/) +#else +#define OPTICK_ENABLE_GPU_D3D12 (0) +#endif +#endif + +// VULKAN +#if !defined(OPTICK_ENABLE_GPU_VULKAN) +#if defined(_MSC_VER) +#define OPTICK_ENABLE_GPU_VULKAN (OPTICK_ENABLE_GPU /*&& 0*/) +#else +#define OPTICK_ENABLE_GPU_VULKAN (0) +#endif +#endif + diff --git a/neo/libs/optick/optick.h b/neo/libs/optick/optick.h new file mode 100644 index 00000000..3d57a07a --- /dev/null +++ b/neo/libs/optick/optick.h @@ -0,0 +1,1113 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Config +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#include "optick.config.h" + +#if USE_OPTICK +#include +#include + +#if defined(_MSC_VER) +# define OPTICK_MSVC (1) +# define OPTICK_64BIT (1) +# if defined(_DURANGO) +# define OPTICK_PC (0) +# else +# define OPTICK_PC (1) +# endif +#elif defined(__clang__) || defined(__GNUC__) +# define OPTICK_GCC (1) +# if defined(__APPLE_CC__) +# define OPTICK_OSX (1) +# define OPTICK_64BIT (1) +# elif defined(__linux__) +# define OPTICK_LINUX (1) +# define OPTICK_64BIT (1) +# elif defined(__FreeBSD__) +# define OPTICK_FREEBSD (1) +# define OPTICK_64BIT (1) +# endif +# if defined(__aarch64__) || defined(_M_ARM64) +# define OPTICK_ARM (1) +# define OPTICK_64BIT (1) +# elif defined(__arm__) || defined(_M_ARM) +# define OPTICK_ARM (1) +# define OPTICK_32BIT (1) +# endif +#else +#error Compiler not supported +#endif + +//////////////////////////////////////////////////////////////////////// +// Target Platform +//////////////////////////////////////////////////////////////////////// + +#if defined(OPTICK_GCC) +#define OPTICK_FUNC __PRETTY_FUNCTION__ +#elif defined(OPTICK_MSVC) +#define OPTICK_FUNC __FUNCSIG__ +#else +#error Compiler not supported +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// EXPORTS +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_EXPORTS) && defined(OPTICK_MSVC) +#define OPTICK_API __declspec(dllexport) +#else +#define OPTICK_API +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#define OPTICK_CONCAT_IMPL(x, y) x##y +#define OPTICK_CONCAT(x, y) OPTICK_CONCAT_IMPL(x, y) + +#if defined(OPTICK_MSVC) +#define OPTICK_INLINE __forceinline +#elif defined(OPTICK_GCC) +#define OPTICK_INLINE __attribute__((always_inline)) inline +#else +#error Compiler is not supported +#endif + + +// Vulkan Forward Declarations +#define OPTICK_DEFINE_HANDLE(object) typedef struct object##_T *object; +OPTICK_DEFINE_HANDLE(VkDevice); +OPTICK_DEFINE_HANDLE(VkPhysicalDevice); +OPTICK_DEFINE_HANDLE(VkQueue); +OPTICK_DEFINE_HANDLE(VkCommandBuffer); +OPTICK_DEFINE_HANDLE(VkQueryPool); +OPTICK_DEFINE_HANDLE(VkCommandPool); +OPTICK_DEFINE_HANDLE(VkFence); + +struct VkPhysicalDeviceProperties; +struct VkQueryPoolCreateInfo; +struct VkAllocationCallbacks; +struct VkCommandPoolCreateInfo; +struct VkCommandBufferAllocateInfo; +struct VkFenceCreateInfo; +struct VkSubmitInfo; +struct VkCommandBufferBeginInfo; + +#ifndef VKAPI_PTR +#define OPTICK_VKAPI_PTR_DEFINED 1 +#if defined(_WIN32) + // On Windows, Vulkan commands use the stdcall convention + #define VKAPI_PTR __stdcall +#else + #define VKAPI_PTR +#endif +#endif + +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceProperties_)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties); +typedef int32_t (VKAPI_PTR *PFN_vkCreateQueryPool_)(VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkQueryPool* pQueryPool); +typedef int32_t (VKAPI_PTR *PFN_vkCreateCommandPool_)(VkDevice device, const VkCommandPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkCommandPool* pCommandPool); +typedef int32_t (VKAPI_PTR *PFN_vkAllocateCommandBuffers_)(VkDevice device, const VkCommandBufferAllocateInfo* pAllocateInfo, VkCommandBuffer* pCommandBuffers); +typedef int32_t (VKAPI_PTR *PFN_vkCreateFence_)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkFence* pFence); +typedef void (VKAPI_PTR *PFN_vkCmdResetQueryPool_)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount); +typedef int32_t (VKAPI_PTR *PFN_vkQueueSubmit_)(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence); +typedef int32_t (VKAPI_PTR *PFN_vkWaitForFences_)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, uint32_t waitAll, uint64_t timeout); +typedef int32_t (VKAPI_PTR *PFN_vkResetCommandBuffer_)(VkCommandBuffer commandBuffer, uint32_t flags); +typedef void (VKAPI_PTR *PFN_vkCmdWriteTimestamp_)(VkCommandBuffer commandBuffer, uint32_t pipelineStage, VkQueryPool queryPool, uint32_t query); +typedef int32_t (VKAPI_PTR *PFN_vkGetQueryPoolResults_)(VkDevice device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount, size_t dataSize, void* pData, uint64_t stride, uint32_t flags); +typedef int32_t (VKAPI_PTR *PFN_vkBeginCommandBuffer_)(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo* pBeginInfo); +typedef int32_t (VKAPI_PTR *PFN_vkEndCommandBuffer_)(VkCommandBuffer commandBuffer); +typedef int32_t (VKAPI_PTR *PFN_vkResetFences_)(VkDevice device, uint32_t fenceCount, const VkFence* pFences); +typedef void (VKAPI_PTR *PFN_vkDestroyCommandPool_)(VkDevice device, VkCommandPool commandPool, const VkAllocationCallbacks* pAllocator); +typedef void (VKAPI_PTR *PFN_vkDestroyQueryPool_)(VkDevice device, VkQueryPool queryPool, const VkAllocationCallbacks* pAllocator); +typedef void (VKAPI_PTR *PFN_vkDestroyFence_)(VkDevice device, VkFence fence, const VkAllocationCallbacks* pAllocator); +typedef void (VKAPI_PTR *PFN_vkFreeCommandBuffers_)(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers); + +#if OPTICK_VKAPI_PTR_DEFINED +#undef VKAPI_PTR +#endif + +// D3D12 Forward Declarations +struct ID3D12CommandList; +struct ID3D12Device; +struct ID3D12CommandQueue; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace Optick +{ + struct OPTICK_API VulkanFunctions + { + PFN_vkGetPhysicalDeviceProperties_ vkGetPhysicalDeviceProperties; + PFN_vkCreateQueryPool_ vkCreateQueryPool; + PFN_vkCreateCommandPool_ vkCreateCommandPool; + PFN_vkAllocateCommandBuffers_ vkAllocateCommandBuffers; + PFN_vkCreateFence_ vkCreateFence; + PFN_vkCmdResetQueryPool_ vkCmdResetQueryPool; + PFN_vkQueueSubmit_ vkQueueSubmit; + PFN_vkWaitForFences_ vkWaitForFences; + PFN_vkResetCommandBuffer_ vkResetCommandBuffer; + PFN_vkCmdWriteTimestamp_ vkCmdWriteTimestamp; + PFN_vkGetQueryPoolResults_ vkGetQueryPoolResults; + PFN_vkBeginCommandBuffer_ vkBeginCommandBuffer; + PFN_vkEndCommandBuffer_ vkEndCommandBuffer; + PFN_vkResetFences_ vkResetFences; + PFN_vkDestroyCommandPool_ vkDestroyCommandPool; + PFN_vkDestroyQueryPool_ vkDestroyQueryPool; + PFN_vkDestroyFence_ vkDestroyFence; + PFN_vkFreeCommandBuffers_ vkFreeCommandBuffers; + }; + + // Source: http://msdn.microsoft.com/en-us/library/system.windows.media.colors(v=vs.110).aspx + // Image: http://i.msdn.microsoft.com/dynimg/IC24340.png + struct Color + { + enum + { + Null = 0x00000000, + AliceBlue = 0xFFF0F8FF, + AntiqueWhite = 0xFFFAEBD7, + Aqua = 0xFF00FFFF, + Aquamarine = 0xFF7FFFD4, + Azure = 0xFFF0FFFF, + Beige = 0xFFF5F5DC, + Bisque = 0xFFFFE4C4, + Black = 0xFF000000, + BlanchedAlmond = 0xFFFFEBCD, + Blue = 0xFF0000FF, + BlueViolet = 0xFF8A2BE2, + Brown = 0xFFA52A2A, + BurlyWood = 0xFFDEB887, + CadetBlue = 0xFF5F9EA0, + Chartreuse = 0xFF7FFF00, + Chocolate = 0xFFD2691E, + Coral = 0xFFFF7F50, + CornflowerBlue = 0xFF6495ED, + Cornsilk = 0xFFFFF8DC, + Crimson = 0xFFDC143C, + Cyan = 0xFF00FFFF, + DarkBlue = 0xFF00008B, + DarkCyan = 0xFF008B8B, + DarkGoldenRod = 0xFFB8860B, + DarkGray = 0xFFA9A9A9, + DarkGreen = 0xFF006400, + DarkKhaki = 0xFFBDB76B, + DarkMagenta = 0xFF8B008B, + DarkOliveGreen = 0xFF556B2F, + DarkOrange = 0xFFFF8C00, + DarkOrchid = 0xFF9932CC, + DarkRed = 0xFF8B0000, + DarkSalmon = 0xFFE9967A, + DarkSeaGreen = 0xFF8FBC8F, + DarkSlateBlue = 0xFF483D8B, + DarkSlateGray = 0xFF2F4F4F, + DarkTurquoise = 0xFF00CED1, + DarkViolet = 0xFF9400D3, + DeepPink = 0xFFFF1493, + DeepSkyBlue = 0xFF00BFFF, + DimGray = 0xFF696969, + DodgerBlue = 0xFF1E90FF, + FireBrick = 0xFFB22222, + FloralWhite = 0xFFFFFAF0, + ForestGreen = 0xFF228B22, + Fuchsia = 0xFFFF00FF, + Gainsboro = 0xFFDCDCDC, + GhostWhite = 0xFFF8F8FF, + Gold = 0xFFFFD700, + GoldenRod = 0xFFDAA520, + Gray = 0xFF808080, + Green = 0xFF008000, + GreenYellow = 0xFFADFF2F, + HoneyDew = 0xFFF0FFF0, + HotPink = 0xFFFF69B4, + IndianRed = 0xFFCD5C5C, + Indigo = 0xFF4B0082, + Ivory = 0xFFFFFFF0, + Khaki = 0xFFF0E68C, + Lavender = 0xFFE6E6FA, + LavenderBlush = 0xFFFFF0F5, + LawnGreen = 0xFF7CFC00, + LemonChiffon = 0xFFFFFACD, + LightBlue = 0xFFADD8E6, + LightCoral = 0xFFF08080, + LightCyan = 0xFFE0FFFF, + LightGoldenRodYellow = 0xFFFAFAD2, + LightGray = 0xFFD3D3D3, + LightGreen = 0xFF90EE90, + LightPink = 0xFFFFB6C1, + LightSalmon = 0xFFFFA07A, + LightSeaGreen = 0xFF20B2AA, + LightSkyBlue = 0xFF87CEFA, + LightSlateGray = 0xFF778899, + LightSteelBlue = 0xFFB0C4DE, + LightYellow = 0xFFFFFFE0, + Lime = 0xFF00FF00, + LimeGreen = 0xFF32CD32, + Linen = 0xFFFAF0E6, + Magenta = 0xFFFF00FF, + Maroon = 0xFF800000, + MediumAquaMarine = 0xFF66CDAA, + MediumBlue = 0xFF0000CD, + MediumOrchid = 0xFFBA55D3, + MediumPurple = 0xFF9370DB, + MediumSeaGreen = 0xFF3CB371, + MediumSlateBlue = 0xFF7B68EE, + MediumSpringGreen = 0xFF00FA9A, + MediumTurquoise = 0xFF48D1CC, + MediumVioletRed = 0xFFC71585, + MidnightBlue = 0xFF191970, + MintCream = 0xFFF5FFFA, + MistyRose = 0xFFFFE4E1, + Moccasin = 0xFFFFE4B5, + NavajoWhite = 0xFFFFDEAD, + Navy = 0xFF000080, + OldLace = 0xFFFDF5E6, + Olive = 0xFF808000, + OliveDrab = 0xFF6B8E23, + Orange = 0xFFFFA500, + OrangeRed = 0xFFFF4500, + Orchid = 0xFFDA70D6, + PaleGoldenRod = 0xFFEEE8AA, + PaleGreen = 0xFF98FB98, + PaleTurquoise = 0xFFAFEEEE, + PaleVioletRed = 0xFFDB7093, + PapayaWhip = 0xFFFFEFD5, + PeachPuff = 0xFFFFDAB9, + Peru = 0xFFCD853F, + Pink = 0xFFFFC0CB, + Plum = 0xFFDDA0DD, + PowderBlue = 0xFFB0E0E6, + Purple = 0xFF800080, + Red = 0xFFFF0000, + RosyBrown = 0xFFBC8F8F, + RoyalBlue = 0xFF4169E1, + SaddleBrown = 0xFF8B4513, + Salmon = 0xFFFA8072, + SandyBrown = 0xFFF4A460, + SeaGreen = 0xFF2E8B57, + SeaShell = 0xFFFFF5EE, + Sienna = 0xFFA0522D, + Silver = 0xFFC0C0C0, + SkyBlue = 0xFF87CEEB, + SlateBlue = 0xFF6A5ACD, + SlateGray = 0xFF708090, + Snow = 0xFFFFFAFA, + SpringGreen = 0xFF00FF7F, + SteelBlue = 0xFF4682B4, + Tan = 0xFFD2B48C, + Teal = 0xFF008080, + Thistle = 0xFFD8BFD8, + Tomato = 0xFFFF6347, + Turquoise = 0xFF40E0D0, + Violet = 0xFFEE82EE, + Wheat = 0xFFF5DEB3, + White = 0xFFFFFFFF, + WhiteSmoke = 0xFFF5F5F5, + Yellow = 0xFFFFFF00, + YellowGreen = 0xFF9ACD32, + }; + }; + + struct Filter + { + enum Type : uint32_t + { + None, + + // CPU + AI, + Animation, + Audio, + Debug, + Camera, + Cloth, + GameLogic, + Input, + Navigation, + Network, + Physics, + Rendering, + Scene, + Script, + Streaming, + UI, + VFX, + Visibility, + Wait, + + // IO + IO, + + // GPU + GPU_Cloth, + GPU_Lighting, + GPU_PostFX, + GPU_Reflections, + GPU_Scene, + GPU_Shadows, + GPU_UI, + GPU_VFX, + GPU_Water, + + }; + }; + + #define OPTICK_MAKE_CATEGORY(filter, color) ((Optick::Category::Type)(((uint64_t)(1ull) << (filter + 32)) | (uint64_t)color)) + + struct Category + { + enum Type : uint64_t + { + // CPU + None = OPTICK_MAKE_CATEGORY(Filter::None, Color::Null), + AI = OPTICK_MAKE_CATEGORY(Filter::AI, Color::Purple), + Animation = OPTICK_MAKE_CATEGORY(Filter::Animation, Color::LightSkyBlue), + Audio = OPTICK_MAKE_CATEGORY(Filter::Audio, Color::HotPink), + Debug = OPTICK_MAKE_CATEGORY(Filter::Debug, Color::Black), + Camera = OPTICK_MAKE_CATEGORY(Filter::Camera, Color::Black), + Cloth = OPTICK_MAKE_CATEGORY(Filter::Cloth, Color::DarkGreen), + GameLogic = OPTICK_MAKE_CATEGORY(Filter::GameLogic, Color::RoyalBlue), + Input = OPTICK_MAKE_CATEGORY(Filter::Input, Color::Ivory), + Navigation = OPTICK_MAKE_CATEGORY(Filter::Navigation, Color::Magenta), + Network = OPTICK_MAKE_CATEGORY(Filter::Network, Color::Olive), + Physics = OPTICK_MAKE_CATEGORY(Filter::Physics, Color::LawnGreen), + Rendering = OPTICK_MAKE_CATEGORY(Filter::Rendering, Color::BurlyWood), + Scene = OPTICK_MAKE_CATEGORY(Filter::Scene, Color::RoyalBlue), + Script = OPTICK_MAKE_CATEGORY(Filter::Script, Color::Plum), + Streaming = OPTICK_MAKE_CATEGORY(Filter::Streaming, Color::Gold), + UI = OPTICK_MAKE_CATEGORY(Filter::UI, Color::PaleTurquoise), + VFX = OPTICK_MAKE_CATEGORY(Filter::VFX, Color::SaddleBrown), + Visibility = OPTICK_MAKE_CATEGORY(Filter::Visibility, Color::Snow), + Wait = OPTICK_MAKE_CATEGORY(Filter::Wait, Color::Tomato), + WaitEmpty = OPTICK_MAKE_CATEGORY(Filter::Wait, Color::White), + // IO + IO = OPTICK_MAKE_CATEGORY(Filter::IO, Color::Khaki), + // GPU + GPU_Cloth = OPTICK_MAKE_CATEGORY(Filter::GPU_Cloth, Color::DarkGreen), + GPU_Lighting = OPTICK_MAKE_CATEGORY(Filter::GPU_Lighting, Color::Khaki), + GPU_PostFX = OPTICK_MAKE_CATEGORY(Filter::GPU_PostFX, Color::Maroon), + GPU_Reflections = OPTICK_MAKE_CATEGORY(Filter::GPU_Reflections, Color::CadetBlue), + GPU_Scene = OPTICK_MAKE_CATEGORY(Filter::GPU_Scene, Color::RoyalBlue), + GPU_Shadows = OPTICK_MAKE_CATEGORY(Filter::GPU_Shadows, Color::LightSlateGray), + GPU_UI = OPTICK_MAKE_CATEGORY(Filter::GPU_UI, Color::PaleTurquoise), + GPU_VFX = OPTICK_MAKE_CATEGORY(Filter::GPU_VFX, Color::SaddleBrown), + GPU_Water = OPTICK_MAKE_CATEGORY(Filter::GPU_Water, Color::SteelBlue), + }; + + static uint32_t GetMask(Type t) { return (uint32_t)(t >> 32); } + static uint32_t GetColor(Type t) { return (uint32_t)(t); } + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct Mode +{ + enum Type + { + // OFF + OFF = 0x0, + // Collect Categories (top-level events) + INSTRUMENTATION_CATEGORIES = (1 << 0), + // Collect Events + INSTRUMENTATION_EVENTS = (1 << 1), + // Collect Events + Categories + INSTRUMENTATION = (INSTRUMENTATION_CATEGORIES | INSTRUMENTATION_EVENTS), + // Legacy (keep for compatibility reasons) + SAMPLING = (1 << 2), + // Collect Data Tags + TAGS = (1 << 3), + // Enable Autosampling Events (automatic callstacks) + AUTOSAMPLING = (1 << 4), + // Enable Switch-Contexts Events + SWITCH_CONTEXT = (1 << 5), + // Collect I/O Events + IO = (1 << 6), + // Collect GPU Events + GPU = (1 << 7), + END_SCREENSHOT = (1 << 8), + RESERVED_0 = (1 << 9), + RESERVED_1 = (1 << 10), + // Collect HW Events + HW_COUNTERS = (1 << 11), + // Collect Events in Live mode + LIVE = (1 << 12), + RESERVED_2 = (1 << 13), + RESERVED_3 = (1 << 14), + RESERVED_4 = (1 << 15), + // Collect System Calls + SYS_CALLS = (1 << 16), + // Collect Events from Other Processes + OTHER_PROCESSES = (1 << 17), + // Automation + NOGUI = (1 << 18), + + TRACER = AUTOSAMPLING | SWITCH_CONTEXT | SYS_CALLS, + DEFAULT = INSTRUMENTATION | TAGS | AUTOSAMPLING | SWITCH_CONTEXT | IO | GPU | SYS_CALLS | OTHER_PROCESSES, + }; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct FrameType +{ + enum Type + { + CPU, + GPU, + Render, + COUNT, + + NONE = -1, + }; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API int64_t GetHighPrecisionTime(); +OPTICK_API int64_t GetHighPrecisionFrequency(); +OPTICK_API void Update(); +OPTICK_API uint32_t BeginFrame(FrameType::Type type = FrameType::CPU, int64_t timestamp = -1, uint64_t threadID = (uint64_t)-1); +OPTICK_API uint32_t EndFrame(FrameType::Type type = FrameType::CPU, int64_t timestamp = -1, uint64_t threadID = (uint64_t)-1); +OPTICK_API bool IsActive(Mode::Type mode = Mode::INSTRUMENTATION_EVENTS); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct EventStorage; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool RegisterFiber(uint64_t fiberId, EventStorage** slot); +OPTICK_API bool RegisterThread(const char* name); +OPTICK_API bool RegisterThread(const wchar_t* name); +OPTICK_API bool UnRegisterThread(bool keepAlive); +OPTICK_API EventStorage** GetEventStorageSlotForCurrentThread(); +OPTICK_API bool IsFiberStorage(EventStorage* fiberStorage); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct ThreadMask +{ + enum Type + { + None = 0, + Main = 1 << 0, + GPU = 1 << 1, + IO = 1 << 2, + Idle = 1 << 3, + Render = 1 << 4, + }; +}; + +OPTICK_API EventStorage* RegisterStorage(const char* name, uint64_t threadID = uint64_t(-1), ThreadMask::Type type = ThreadMask::None); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct State +{ + enum Type + { + // Starting a new capture + START_CAPTURE, + + // Stopping current capture + STOP_CAPTURE, + + // Dumping capture to the GUI + // Useful for attaching summary and screenshot to the capture + DUMP_CAPTURE, + + // Cancel current capture + CANCEL_CAPTURE, + }; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Sets a state change callback +typedef bool (*StateCallback)(State::Type state); +OPTICK_API bool SetStateChangedCallback(StateCallback cb); + +// Attaches a key-value pair to the capture's summary +// Example: AttachSummary("Version", "v12.0.1"); +// AttachSummary("Platform", "Windows"); +// AttachSummary("Config", "Release_x64"); +// AttachSummary("Settings", "Ultra"); +// AttachSummary("Map", "Atlantida"); +// AttachSummary("Position", "123.0,120.0,41.1"); +// AttachSummary("CPU", "Intel(R) Xeon(R) CPU E5410@2.33GHz"); +// AttachSummary("GPU", "NVIDIA GeForce GTX 980 Ti"); +OPTICK_API bool AttachSummary(const char* key, const char* value); + +struct File +{ + enum Type + { + // Supported formats: PNG, JPEG, BMP, TIFF + OPTICK_IMAGE, + + // Text file + OPTICK_TEXT, + + // Any other type + OPTICK_OTHER, + }; +}; +// Attaches a file to the current capture +OPTICK_API bool AttachFile(File::Type type, const char* name, const uint8_t* data, uint32_t size); +OPTICK_API bool AttachFile(File::Type type, const char* name, const char* path); +OPTICK_API bool AttachFile(File::Type type, const char* name, const wchar_t* path); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct EventDescription; +struct Frame; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct EventTime +{ + static const int64_t INVALID_TIMESTAMP = (int64_t)-1; + + int64_t start; + int64_t finish; + + OPTICK_INLINE void Start() { start = Optick::GetHighPrecisionTime(); } + OPTICK_INLINE void Stop() { finish = Optick::GetHighPrecisionTime(); } + OPTICK_INLINE bool IsValid() const { return start < finish && start != INVALID_TIMESTAMP && finish != INVALID_TIMESTAMP; } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct EventData : public EventTime +{ + const EventDescription* description; + + bool operator<(const EventData& other) const + { + if (start != other.start) + return start < other.start; + + // Reversed order for finish intervals (parent first) + return finish > other.finish; + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OPTICK_API SyncData : public EventTime +{ + uint64_t newThreadId; + uint64_t oldThreadId; + uint8_t core; + int8_t reason; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OPTICK_API FiberSyncData : public EventTime +{ + uint64_t threadId; + + static void AttachToThread(EventStorage* storage, uint64_t threadId); + static void DetachFromThread(EventStorage* storage); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +template +struct TagData +{ + const EventDescription* description; + int64_t timestamp; + T data; + TagData() {} + TagData(const EventDescription& desc, T d) : description(&desc), timestamp(Optick::GetHighPrecisionTime()), data(d) {} + TagData(const EventDescription& desc, T d, int64_t t) : description(&desc), timestamp(t), data(d) {} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OPTICK_API EventDescription +{ + enum Flags : uint8_t + { + IS_CUSTOM_NAME = 1 << 0, + COPY_NAME_STRING = 1 << 1, + COPY_FILENAME_STRING = 1 << 2, + }; + + const char* name; + const char* file; + uint32_t line; + uint32_t index; + uint32_t color; + uint32_t filter; + uint8_t flags; + + static EventDescription* Create(const char* eventName, const char* fileName, const unsigned long fileLine, const unsigned long eventColor = Color::Null, const unsigned long filter = 0, const uint8_t eventFlags = 0); + static EventDescription* CreateShared(const char* eventName, const char* fileName = nullptr, const unsigned long fileLine = 0, const unsigned long eventColor = Color::Null, const unsigned long filter = 0); + + EventDescription(); +private: + friend class EventDescriptionBoard; + EventDescription& operator=(const EventDescription&); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OPTICK_API Event +{ + EventData* data; + + static EventData* Start(const EventDescription& description); + static void Stop(EventData& data); + + static void Push(const char* name); + static void Push(const EventDescription& description); + static void Pop(); + + static void Add(EventStorage* storage, const EventDescription* description, int64_t timestampStart, int64_t timestampFinish); + static void Push(EventStorage* storage, const EventDescription* description, int64_t timestampStart); + static void Pop(EventStorage* storage, int64_t timestampStart); + + + Event(const EventDescription& description) + { + data = Start(description); + } + + ~Event() + { + if (data) + Stop(*data); + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_INLINE Optick::EventDescription* CreateDescription(const char* functionName, const char* fileName, int fileLine, const char* eventName = nullptr, const ::Optick::Category::Type category = ::Optick::Category::None, uint8_t flags = 0) +{ + if (eventName != nullptr) + flags |= ::Optick::EventDescription::IS_CUSTOM_NAME; + + return ::Optick::EventDescription::Create(eventName != nullptr ? eventName : functionName, fileName, (unsigned long)fileLine, ::Optick::Category::GetColor(category), ::Optick::Category::GetMask(category), flags); +} +OPTICK_INLINE Optick::EventDescription* CreateDescription(const char* functionName, const char* fileName, int fileLine, const ::Optick::Category::Type category) +{ + return ::Optick::EventDescription::Create(functionName, fileName, (unsigned long)fileLine, ::Optick::Category::GetColor(category), ::Optick::Category::GetMask(category)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OPTICK_API GPUEvent +{ + EventData* data; + + static EventData* Start(const EventDescription& description); + static void Stop(EventData& data); + + GPUEvent(const EventDescription& description) + { + data = Start(description); + } + + ~GPUEvent() + { + if (data) + Stop(*data); + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OPTICK_API Tag +{ + static void Attach(const EventDescription& description, float val); + static void Attach(const EventDescription& description, int32_t val); + static void Attach(const EventDescription& description, uint32_t val); + static void Attach(const EventDescription& description, uint64_t val); + static void Attach(const EventDescription& description, float val[3]); + static void Attach(const EventDescription& description, const char* val); + static void Attach(const EventDescription& description, const char* val, uint16_t length); + + // Derived + static void Attach(const EventDescription& description, float x, float y, float z) + { + float p[3] = { x, y, z }; Attach(description, p); + } + +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct ThreadScope +{ + ThreadScope(const char* name) + { + RegisterThread(name); + } + + ThreadScope(const wchar_t* name) + { + RegisterThread(name); + } + + ~ThreadScope() + { + UnRegisterThread(false); + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +enum OPTICK_API GPUQueueType +{ + GPU_QUEUE_GRAPHICS, + GPU_QUEUE_COMPUTE, + GPU_QUEUE_TRANSFER, + GPU_QUEUE_VSYNC, + + GPU_QUEUE_COUNT, +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OPTICK_API GPUContext +{ + void* cmdBuffer; + GPUQueueType queue; + int node; + GPUContext(void* c = nullptr, GPUQueueType q = GPU_QUEUE_GRAPHICS, int n = 0) : cmdBuffer(c), queue(q), node(n) {} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API void InitGpuD3D12(ID3D12Device* device, ID3D12CommandQueue** cmdQueues, uint32_t numQueues); +OPTICK_API void InitGpuVulkan(VkDevice* vkDevices, VkPhysicalDevice* vkPhysicalDevices, VkQueue* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues, const VulkanFunctions* functions); +OPTICK_API void GpuFlip(void* swapChain); +OPTICK_API GPUContext SetGpuContext(GPUContext context); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OPTICK_API GPUContextScope +{ + GPUContext prevContext; + + GPUContextScope(ID3D12CommandList* cmdList, GPUQueueType queue = GPU_QUEUE_GRAPHICS, int node = 0) + { + prevContext = SetGpuContext(GPUContext(cmdList, queue, node)); + } + + GPUContextScope(VkCommandBuffer cmdBuffer, GPUQueueType queue = GPU_QUEUE_GRAPHICS, int node = 0) + { + prevContext = SetGpuContext(GPUContext(cmdBuffer, queue, node)); + } + + ~GPUContextScope() + { + SetGpuContext(prevContext); + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API const EventDescription* GetFrameDescription(FrameType::Type frame = FrameType::CPU); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef void* (*AllocateFn)(size_t); +typedef void (*DeallocateFn)(void*); +typedef void (*InitThreadCb)(void); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API void SetAllocator(AllocateFn allocateFn, DeallocateFn deallocateFn, InitThreadCb initThreadCb); +OPTICK_API void Shutdown(); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef void(*CaptureSaveChunkCb)(const char*,size_t); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool StartCapture(Mode::Type mode = Mode::DEFAULT, int samplingFrequency = 1000, bool force = true); +OPTICK_API bool StopCapture(bool force = true); +OPTICK_API bool SaveCapture(CaptureSaveChunkCb dataCb, bool force = true); +OPTICK_API bool SaveCapture(const char* path, bool force = true); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OptickApp +{ + const char* m_Name; + OptickApp(const char* name) : m_Name(name) { StartCapture(); } + ~OptickApp() { StopCapture(); SaveCapture(m_Name); } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + +#define OPTICK_UNUSED(x) (void)(x) +// Workaround for gcc compiler +#define OPTICK_VA_ARGS(...) , ##__VA_ARGS__ + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Scoped profiling event which automatically grabs current function name. +// Use this macro 95% of the time. +// Example A: +// void Function() +// { +// OPTICK_EVENT(); +// ... code ... +// } +// or +// void Function() +// { +// OPTICK_EVENT("CustomFunctionName"); +// ... code ... +// } +// Notes: +// Optick captures full name of the function including name space and arguments. +// Full name is usually shortened in the Optick GUI in order to highlight the most important bits. +#define OPTICK_EVENT(...) static ::Optick::EventDescription* OPTICK_CONCAT(autogen_description_, __LINE__) = nullptr; \ + if (OPTICK_CONCAT(autogen_description_, __LINE__) == nullptr) OPTICK_CONCAT(autogen_description_, __LINE__) = ::Optick::CreateDescription(OPTICK_FUNC, __FILE__, __LINE__, ##__VA_ARGS__); \ + ::Optick::Event OPTICK_CONCAT(autogen_event_, __LINE__)( *(OPTICK_CONCAT(autogen_description_, __LINE__)) ); + +// Backward compatibility with previous versions of Optick +//#if !defined(PROFILE) +//#define PROFILE OPTICK_EVENT() +//#endif + +// Scoped profiling macro with predefined color. +// Use this macro for high-level function calls (e.g. AI, Physics, Audio, Render etc.). +// Example: +// void UpdateAI() +// { +// OPTICK_CATEGORY("UpdateAI", Optick::Category::AI); +// ... code ... +// } +// +// Macro could automatically capture current function name: +// void UpdateAI() +// { +// OPTICK_CATEGORY(OPTICK_FUNC, Optick::Category::AI); +// ... code ... +// } +#define OPTICK_CATEGORY(NAME, CATEGORY) OPTICK_EVENT(NAME, CATEGORY) + +// Profiling event for Main Loop update. +// You need to call this function in the beginning of the each new frame. +// Example: +// while (true) +// { +// OPTICK_FRAME("MainThread"); +// ... code ... +// } +#define OPTICK_FRAME(FRAME_NAME, ...) static ::Optick::ThreadScope mainThreadScope(FRAME_NAME); \ + OPTICK_UNUSED(mainThreadScope); \ + ::Optick::EndFrame(__VA_ARGS__); \ + ::Optick::Update(); \ + uint32_t frameNumber = ::Optick::BeginFrame(__VA_ARGS__); \ + ::Optick::Event OPTICK_CONCAT(autogen_event_, __LINE__)(*::Optick::GetFrameDescription(__VA_ARGS__)); \ + OPTICK_TAG("Frame", frameNumber); + +#define OPTICK_UPDATE() ::Optick::Update(); +#define OPTICK_FRAME_FLIP(...) ::Optick::EndFrame(__VA_ARGS__); ::Optick::BeginFrame(__VA_ARGS__); + +// Scoped event for categorized frame types. +// Example: +// void UpdateFrame() +// { +// // Flip "Main/Update" frame +// OPTICK_FRAME_EVENT(Optick::FrameType::CPU); +// +// // Root category event +// OPTICK_CATEGORY("UpdateFrame", Optick::Category::GameLogic); +// +// ... +// } +// +#define OPTICK_FRAME_EVENT(FRAME_TYPE, ...) ::Optick::EndFrame(FRAME_TYPE); \ + switch (FRAME_TYPE) { \ + case Optick::FrameType::CPU: \ + ::Optick::Update(); \ + break; \ + default: \ + break; \ + } \ + ::Optick::BeginFrame(FRAME_TYPE); \ + ::Optick::Event OPTICK_CONCAT(autogen_event_, __LINE__)(*::Optick::GetFrameDescription(FRAME_TYPE)); + + +// Thread registration macro. +// Example: +// void WorkerThread(...) +// { +// OPTICK_THREAD("Worker"); +// while (isRunning) +// { +// ... +// } +// } +#define OPTICK_THREAD(THREAD_NAME) ::Optick::ThreadScope brofilerThreadScope(THREAD_NAME); \ + OPTICK_UNUSED(brofilerThreadScope); \ + + +// Thread registration macros. +// Useful for integration with custom job-managers. +#define OPTICK_START_THREAD(THREAD_NAME) ::Optick::RegisterThread(THREAD_NAME); +#define OPTICK_STOP_THREAD() ::Optick::UnRegisterThread(false); + +// Attaches a custom data-tag. +// Supported types: int32, uint32, uint64, vec3, string (cut to 32 characters) +// Example: +// OPTICK_TAG("PlayerName", name[index]); +// OPTICK_TAG("Health", 100); +// OPTICK_TAG("Score", 0x80000000u); +// OPTICK_TAG("Height(cm)", 176.3f); +// OPTICK_TAG("Address", (uint64)*this); +// OPTICK_TAG("Position", 123.0f, 456.0f, 789.0f); +#define OPTICK_TAG(NAME, ...) static ::Optick::EventDescription* OPTICK_CONCAT(autogen_tag_, __LINE__) = nullptr; \ + if (OPTICK_CONCAT(autogen_tag_, __LINE__) == nullptr) OPTICK_CONCAT(autogen_tag_, __LINE__) = ::Optick::EventDescription::Create( NAME, __FILE__, __LINE__ ); \ + ::Optick::Tag::Attach(*OPTICK_CONCAT(autogen_tag_, __LINE__), __VA_ARGS__); \ + +// Scoped macro with DYNAMIC name. +// Optick holds a copy of the provided name. +// Each scope does a search in hashmap for the name. +// Please use variations with STATIC names where it's possible. +// Use this macro for quick prototyping or intergratoin with other profiling systems (e.g. UE4) +// Example: +// const char* name = ... ; +// OPTICK_EVENT_DYNAMIC(name); +#define OPTICK_EVENT_DYNAMIC(NAME) OPTICK_CUSTOM_EVENT(::Optick::EventDescription::CreateShared(NAME, __FILE__, __LINE__)); +// Push\Pop profiling macro with DYNAMIC name. +#define OPTICK_PUSH_DYNAMIC(NAME) ::Optick::Event::Push(NAME); + +// Push\Pop profiling macro with STATIC name. +// Please avoid using Push\Pop approach in favor for scoped macros. +// For backward compatibility with some engines. +// Example: +// OPTICK_PUSH("ScopeName"); +// ... +// OPTICK_POP(); +#define OPTICK_PUSH(NAME) static ::Optick::EventDescription* OPTICK_CONCAT(autogen_description_, __LINE__) = nullptr; \ + if (OPTICK_CONCAT(autogen_description_, __LINE__) == nullptr) OPTICK_CONCAT(autogen_description_, __LINE__) = ::Optick::EventDescription::Create( NAME, __FILE__, __LINE__ ); \ + ::Optick::Event::Push(*OPTICK_CONCAT(autogen_description_, __LINE__)); +#define OPTICK_POP() ::Optick::Event::Pop(); + + +// Scoped macro with predefined Optick::EventDescription. +// Use these events instead of DYNAMIC macros to minimize overhead. +// Common use-case: integrating Optick with internal script languages (e.g. Lua, Actionscript(Scaleform), etc.). +// Example: +// Generating EventDescription once during initialization: +// Optick::EventDescription* description = Optick::EventDescription::CreateShared("FunctionName"); +// +// Then we could just use a pointer to cached description later for profiling: +// OPTICK_CUSTOM_EVENT(description); +#define OPTICK_CUSTOM_EVENT(DESCRIPTION) ::Optick::Event OPTICK_CONCAT(autogen_event_, __LINE__)( *DESCRIPTION ); \ + +// Registration of a custom EventStorage (e.g. GPU, IO, etc.) +// Use it to present any extra information on the timeline. +// Example: +// Optick::EventStorage* IOStorage = Optick::RegisterStorage("I/O"); +// Notes: +// Registration of a new storage is thread-safe. +#define OPTICK_STORAGE_REGISTER(STORAGE_NAME) ::Optick::RegisterStorage(STORAGE_NAME); + +// Adding events to the custom storage. +// Helps to integrate Optick into already existing profiling systems (e.g. GPU Profiler, I/O profiler, etc.). +// Example: +// //Registering a storage - should be done once during initialization +// static Optick::EventStorage* IOStorage = Optick::RegisterStorage("I/O"); +// +// int64_t cpuTimestampStart = Optick::GetHighPrecisionTime(); +// ... +// int64_t cpuTimestampFinish = Optick::GetHighPrecisionTime(); +// +// //Creating a shared event-description +// static Optick::EventDescription* IORead = Optick::EventDescription::CreateShared("IO Read"); +// +// OPTICK_STORAGE_EVENT(IOStorage, IORead, cpuTimestampStart, cpuTimestampFinish); +// Notes: +// It's not thread-safe to add events to the same storage from multiple threads. +// Please guarantee thread-safety on the higher level if access from multiple threads to the same storage is required. +#define OPTICK_STORAGE_EVENT(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START, CPU_TIMESTAMP_FINISH) if (::Optick::IsActive()) { ::Optick::Event::Add(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START, CPU_TIMESTAMP_FINISH); } +#define OPTICK_STORAGE_PUSH(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START) if (::Optick::IsActive()) { ::Optick::Event::Push(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START); } +#define OPTICK_STORAGE_POP(STORAGE, CPU_TIMESTAMP_FINISH) if (::Optick::IsActive()) { ::Optick::Event::Pop(STORAGE, CPU_TIMESTAMP_FINISH); } + + +// Registers state change callback +// If callback returns false - the call is repeated the next frame +#define OPTICK_SET_STATE_CHANGED_CALLBACK(CALLBACK) ::Optick::SetStateChangedCallback(CALLBACK); + + +// Registers custom memory allocator within Optick core +// Example: +// OPTICK_SET_MEMORY_ALLOCATOR([](size_t size) -> void* { return operator new(size); }, [](void* p) { operator delete(p); }, nullptr); +// Params: +// INIT_THREAD_CALLBACK - callback for internal Optick threads (useful if you need to setup some TLS variables related to the memory allocator for your thread) +// Notes: +// Should be called before the first call to OPTICK_FRAME +// Allocation and deallocation functions should be thread-safe - Optick doesn't do any synchronization for these calls +#define OPTICK_SET_MEMORY_ALLOCATOR(ALLOCATE_FUNCTION, DEALLOCATE_FUNCTION, INIT_THREAD_CALLBACK) ::Optick::SetAllocator(ALLOCATE_FUNCTION, DEALLOCATE_FUNCTION, INIT_THREAD_CALLBACK); + +// Shutdown +// Clears all the internal buffers allocated by Optick +// Notes: +// You shouldn't call any Optick functions after shutting down the system (it might lead to the undefined behaviour) +#define OPTICK_SHUTDOWN() ::Optick::Shutdown(); + +// GPU events +#define OPTICK_GPU_INIT_D3D12(DEVICE, CMD_QUEUES, NUM_CMD_QUEUS) ::Optick::InitGpuD3D12(DEVICE, CMD_QUEUES, NUM_CMD_QUEUS); +#define OPTICK_GPU_INIT_VULKAN(DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS, FUNCTIONS) ::Optick::InitGpuVulkan(DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS, FUNCTIONS); + +// Setup GPU context: +// Params: +// (CommandBuffer\CommandList, [Optional] Optick::GPUQueue queue, [Optional] int NodeIndex) +// Examples: +// OPTICK_GPU_CONTEXT(cmdBuffer); - all OPTICK_GPU_EVENT will use the same command buffer within the scope +// OPTICK_GPU_CONTEXT(cmdBuffer, Optick::GPU_QUEUE_COMPUTE); - all events will use the same command buffer and queue for the scope +// OPTICK_GPU_CONTEXT(cmdBuffer, Optick::GPU_QUEUE_COMPUTE, gpuIndex); - all events will use the same command buffer and queue for the scope +#define OPTICK_GPU_CONTEXT(...) ::Optick::GPUContextScope OPTICK_CONCAT(gpu_autogen_context_, __LINE__)(__VA_ARGS__); \ + (void)OPTICK_CONCAT(gpu_autogen_context_, __LINE__); + +#define OPTICK_GPU_EVENT(NAME) OPTICK_EVENT(NAME); \ + static ::Optick::EventDescription* OPTICK_CONCAT(gpu_autogen_description_, __LINE__) = nullptr; \ + if (OPTICK_CONCAT(gpu_autogen_description_, __LINE__) == nullptr) OPTICK_CONCAT(gpu_autogen_description_, __LINE__) = ::Optick::EventDescription::Create( NAME, __FILE__, __LINE__ ); \ + ::Optick::GPUEvent OPTICK_CONCAT(gpu_autogen_event_, __LINE__)( *(OPTICK_CONCAT(gpu_autogen_description_, __LINE__)) ); \ + +#define OPTICK_GPU_FLIP(SWAP_CHAIN) ::Optick::GpuFlip(SWAP_CHAIN); + +///////////////////////////////////////////////////////////////////////////////// +// [Automation][Startup] +///////////////////////////////////////////////////////////////////////////////// + +// Starts a new capture +// Params: +// [Optional] Mode::Type mode /*= Mode::DEFAULT*/ +// [Optional] int samplingFrequency /*= 1000*/ +#define OPTICK_START_CAPTURE(...) ::Optick::StartCapture(__VA_ARGS__); + +// Stops a new capture (Keeps data intact in the local buffers) +#define OPTICK_STOP_CAPTURE(...) ::Optick::StopCapture(__VA_ARGS__); + +// Saves capture +// Params: +// const char* FilePath - path to the capture +// or +// CaptureSaveChunkCb dataCb - callback for saving chunks of data +// Example: +// OPTICK_SAVE_CAPTURE("ConsoleApp.opt"); +#define OPTICK_SAVE_CAPTURE(...) ::Optick::SaveCapture(__VA_ARGS__); + +// Generate a capture for the whole scope +// Params: +// NAME - name of the application +// Examples: +// int main() { +// OPTICK_APP("MyGame"); //Optick will automatically save a capture in the working directory with the name "MyGame(2019-09-08.14-30-19).opt" +// ... +// } +#define OPTICK_APP(NAME) OPTICK_THREAD(NAME); \ + ::Optick::OptickApp _optickApp(NAME); \ + OPTICK_UNUSED(_optickApp); + + +#else +#define OPTICK_EVENT(...) +#define OPTICK_CATEGORY(NAME, CATEGORY) +#define OPTICK_FRAME(NAME) +#define OPTICK_THREAD(THREAD_NAME) +#define OPTICK_START_THREAD(THREAD_NAME) +#define OPTICK_STOP_THREAD() +#define OPTICK_TAG(NAME, DATA) +#define OPTICK_EVENT_DYNAMIC(NAME) +#define OPTICK_PUSH_DYNAMIC(NAME) +#define OPTICK_PUSH(NAME) +#define OPTICK_POP() +#define OPTICK_CUSTOM_EVENT(DESCRIPTION) +#define OPTICK_STORAGE_REGISTER(STORAGE_NAME) +#define OPTICK_STORAGE_EVENT(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START, CPU_TIMESTAMP_FINISH) +#define OPTICK_STORAGE_PUSH(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START) +#define OPTICK_STORAGE_POP(STORAGE, CPU_TIMESTAMP_FINISH) +#define OPTICK_SET_STATE_CHANGED_CALLBACK(CALLBACK) +#define OPTICK_SET_MEMORY_ALLOCATOR(ALLOCATE_FUNCTION, DEALLOCATE_FUNCTION) +#define OPTICK_SHUTDOWN() +#define OPTICK_GPU_INIT_D3D12(DEVICE, CMD_QUEUES, NUM_CMD_QUEUS) +#define OPTICK_GPU_INIT_VULKAN(DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS, FUNCTIONS) +#define OPTICK_GPU_CONTEXT(...) +#define OPTICK_GPU_EVENT(NAME) +#define OPTICK_GPU_FLIP(SWAP_CHAIN) +#define OPTICK_UPDATE() +#define OPTICK_FRAME_FLIP(...) +#define OPTICK_FRAME_EVENT(FRAME_TYPE, ...) +#define OPTICK_START_CAPTURE(...) +#define OPTICK_STOP_CAPTURE() +#define OPTICK_SAVE_CAPTURE(...) +#define OPTICK_APP(NAME) +#endif diff --git a/neo/libs/optick/optick_capi.cpp b/neo/libs/optick/optick_capi.cpp new file mode 100644 index 00000000..d298b43e --- /dev/null +++ b/neo/libs/optick/optick_capi.cpp @@ -0,0 +1,110 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "optick_capi.h" + +#if USE_OPTICK + +#include "optick_core.h" + +#if defined(__MACH__) +#include +#else +#include +#endif +#include + +OPTICK_API void OptickAPI_RegisterThread(const char* inThreadName, uint16_t inThreadNameLength) +{ + Optick::OptickString<256> threadName(inThreadName, inThreadNameLength); + Optick::RegisterThread(threadName.data); +} + +OPTICK_API uint64_t OptickAPI_CreateEventDescription(const char* inFunctionName, uint16_t inFunctionLength, const char* inFileName, uint16_t inFileNameLenght, uint32_t inFileLine) +{ + Optick::OptickString<128> name(inFunctionName, inFunctionLength); + Optick::OptickString<256> file(inFileName, inFileNameLenght); + uint8_t flags = Optick::EventDescription::COPY_NAME_STRING | Optick::EventDescription::COPY_FILENAME_STRING | Optick::EventDescription::IS_CUSTOM_NAME; + return (uint64_t)::Optick::CreateDescription(name.data, file.data, inFileLine, nullptr, Optick::Category::None, flags); +} +OPTICK_API uint64_t OptickAPI_PushEvent(uint64_t inEventDescription) +{ + return (uint64_t)Optick::Event::Start(*((Optick::EventDescription*)inEventDescription)); +} + +OPTICK_API void OptickAPI_PopEvent(uint64_t inEventData) +{ + Optick::Event::Stop(*((Optick::EventData*)inEventData)); +} + +OPTICK_API void OptickAPI_NextFrame() +{ + Optick::Event::Pop(); + Optick::EndFrame(); + Optick::Update(); + Optick::BeginFrame(); + Optick::Event::Push(*Optick::GetFrameDescription()); +} + +OPTICK_API void OptickAPI_StartCapture() +{ + Optick::StartCapture(); +} + +OPTICK_API void OptickAPI_StopCapture(const char* inFileName, uint16_t inFileNameLength) +{ + Optick::OptickString<256> fileName(inFileName, inFileNameLength); + Optick::StopCapture(); + Optick::SaveCapture(fileName.data); +} + +OPTICK_API void OptickAPI_AttachTag_String(uint64_t inEventDescription, const char* inValue, uint16_t inValueLength) +{ + Optick::Tag::Attach(*(Optick::EventDescription*)inEventDescription, inValue, inValueLength); +} + +OPTICK_API void OptickAPI_AttachTag_Int32(uint64_t inEventDescription, int32_t inValue) +{ + Optick::Tag::Attach(*(Optick::EventDescription*)inEventDescription, inValue); +} + +OPTICK_API void OptickAPI_AttachTag_Float(uint64_t inEventDescription, float inValue) +{ + Optick::Tag::Attach(*(Optick::EventDescription*)inEventDescription, inValue); +} + +OPTICK_API void OptickAPI_AttachTag_UInt32(uint64_t inEventDescription, uint32_t inValue) +{ + Optick::Tag::Attach(*(Optick::EventDescription*)inEventDescription, inValue); +} + +OPTICK_API void OptickAPI_AttachTag_UInt64(uint64_t inEventDescription, uint64_t inValue) +{ + Optick::Tag::Attach(*(Optick::EventDescription*)inEventDescription, inValue); +} + +OPTICK_API void OptickAPI_AttachTag_Point(uint64_t inEventDescription, float x, float y, float z) +{ + Optick::Tag::Attach(*(Optick::EventDescription*)inEventDescription, x, y, z); +} + +#endif //USE_OPTICK diff --git a/neo/libs/optick/optick_capi.h b/neo/libs/optick/optick_capi.h new file mode 100644 index 00000000..2b507699 --- /dev/null +++ b/neo/libs/optick/optick_capi.h @@ -0,0 +1,80 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Config +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#include "optick.config.h" +#include + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// EXPORTS +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_EXPORTS) && defined(_MSC_VER) +#define OPTICK_API __declspec(dllexport) +#else +#define OPTICK_API +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + +#if USE_OPTICK + OPTICK_API void OptickAPI_RegisterThread(const char* inThreadName, uint16_t inThreadNameLength); + + OPTICK_API uint64_t OptickAPI_CreateEventDescription(const char* inFunctionName, uint16_t inFunctionLength, const char* inFileName, uint16_t inFileNameLenght, uint32_t inFileLine); + OPTICK_API uint64_t OptickAPI_PushEvent(uint64_t inEventDescription); + OPTICK_API void OptickAPI_PopEvent(uint64_t inEventData); + + OPTICK_API void OptickAPI_NextFrame(); + + OPTICK_API void OptickAPI_StartCapture(); + OPTICK_API void OptickAPI_StopCapture(const char* inFileName, uint16_t inFileNameLength); + + OPTICK_API void OptickAPI_AttachTag_String(uint64_t inEventDescription, const char* inValue, uint16_t intValueLength); + OPTICK_API void OptickAPI_AttachTag_Int32(uint64_t inEventDescription, int inValue); + OPTICK_API void OptickAPI_AttachTag_Float(uint64_t inEventDescription, float inValue); + OPTICK_API void OptickAPI_AttachTag_UInt32(uint64_t inEventDescription, uint32_t inValue); + OPTICK_API void OptickAPI_AttachTag_UInt64(uint64_t inEventDescription, uint64_t inValue); + OPTICK_API void OptickAPI_AttachTag_Point(uint64_t inEventDescription, float x, float y, float z); +#else + inline void OptickAPI_RegisterThread(const char* inThreadName, uint16_t inThreadNameLength) {} + inline uint64_t OptickAPI_CreateEventDescription(const char* inFunctionName, uint16_t inFunctionLength, const char* inFileName, uint16_t inFileNameLenght, uint32_t inFileLine) { return 0; } + inline uint64_t OptickAPI_PushEvent(uint64_t inEventDescription) { return 0; } + inline void OptickAPI_PopEvent(uint64_t inEventData) {} + inline void OptickAPI_NextFrame() {} + inline void OptickAPI_StartCapture() {} + inline void OptickAPI_StopCapture(const char* inFileName, uint16_t inFileNameLength) {} + inline void OptickAPI_AttachTag_String(uint64_t inEventDescription, const char* inValue, uint16_t intValueLength) {} + inline void OptickAPI_AttachTag_Int(uint64_t inEventDescription, int inValue) {} + inline void OptickAPI_AttachTag_Float(uint64_t inEventDescription, float inValue) {} + inline void OptickAPI_AttachTag_Int32(uint64_t inEventDescription, uint32_t inValue) {} + inline void OptickAPI_AttachTag_UInt64(uint64_t inEventDescription, uint64_t inValue) {} + inline void OptickAPI_AttachTag_Point(uint64_t inEventDescription, float x, float y, float z) {} +#endif + +#ifdef __cplusplus +} // extern "C" +#endif \ No newline at end of file diff --git a/neo/libs/optick/optick_common.h b/neo/libs/optick/optick_common.h new file mode 100644 index 00000000..2c52c7be --- /dev/null +++ b/neo/libs/optick/optick_common.h @@ -0,0 +1,187 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "optick.config.h" + +#if USE_OPTICK + +#include "optick.h" + +#include +#include +#include +#include +#include + +#if defined(OPTICK_MSVC) + +#ifdef OPTICK_UE4 +#include "Core/Public/Windows/AllowWindowsPlatformTypes.h" +#endif + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include + +#ifdef OPTICK_UE4 +#include "Core/Public/Windows/HideWindowsPlatformTypes.h" +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +#endif + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Types +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef signed char int8; +typedef unsigned char uint8; +typedef unsigned char byte; +typedef short int16; +typedef unsigned short uint16; +typedef int int32; +typedef unsigned int uint32; +#if defined(OPTICK_MSVC) +typedef __int64 int64; +typedef unsigned __int64 uint64; +#elif defined(OPTICK_GCC) +typedef int64_t int64; +typedef uint64_t uint64; +#else +#error Compiler is not supported +#endif +static_assert(sizeof(int8) == 1, "Invalid type size, int8"); +static_assert(sizeof(uint8) == 1, "Invalid type size, uint8"); +static_assert(sizeof(byte) == 1, "Invalid type size, byte"); +static_assert(sizeof(int16) == 2, "Invalid type size, int16"); +static_assert(sizeof(uint16) == 2, "Invalid type size, uint16"); +static_assert(sizeof(int32) == 4, "Invalid type size, int32"); +static_assert(sizeof(uint32) == 4, "Invalid type size, uint32"); +static_assert(sizeof(int64) == 8, "Invalid type size, int64"); +static_assert(sizeof(uint64) == 8, "Invalid type size, uint64"); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef uint64 ThreadID; +static const ThreadID INVALID_THREAD_ID = (ThreadID)-1; +typedef uint32 ProcessID; +static const ProcessID INVALID_PROCESS_ID = (ProcessID)-1; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Memory +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_MSVC) +#define OPTICK_ALIGN(N) __declspec( align( N ) ) +#elif defined(OPTICK_GCC) +#define OPTICK_ALIGN(N) __attribute__((aligned(N))) +#else +#error Can not define OPTICK_ALIGN. Unknown platform. +#endif +#define OPTICK_ARRAY_SIZE(ARR) (sizeof(ARR)/sizeof((ARR)[0])) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_MSVC) +#define OPTICK_NOINLINE __declspec(noinline) +#elif defined(OPTICK_GCC) +#define OPTICK_NOINLINE __attribute__((__noinline__)) +#else +#error Compiler is not supported +#endif +//////////////////////////////////////////////////////////////////////// +// OPTICK_THREAD_LOCAL +//////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_MSVC) +#define OPTICK_THREAD_LOCAL __declspec(thread) +#elif defined(OPTICK_GCC) +#define OPTICK_THREAD_LOCAL __thread +#else +#error Can not define OPTICK_THREAD_LOCAL. Unknown platform. +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Asserts +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_MSVC) +#define OPTICK_DEBUG_BREAK __debugbreak() +#elif defined(OPTICK_GCC) +#define OPTICK_DEBUG_BREAK __builtin_trap() +#else + #error Can not define OPTICK_DEBUG_BREAK. Unknown platform. +#endif +#define OPTICK_UNUSED(x) (void)(x) +#ifdef _DEBUG + #define OPTICK_ASSERT(arg, description) if (!(arg)) { OPTICK_DEBUG_BREAK; } + #define OPTICK_FAILED(description) { OPTICK_DEBUG_BREAK; } +#else + #define OPTICK_ASSERT(arg, description) + #define OPTICK_FAILED(description) +#endif +#define OPTICK_VERIFY(arg, description, operation) if (!(arg)) { OPTICK_DEBUG_BREAK; operation; } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Safe functions +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_LINUX) || defined(OPTICK_OSX) +template +inline int sprintf_s(char(&buffer)[sizeOfBuffer], const char* format, ...) +{ + va_list ap; + va_start(ap, format); + int result = vsnprintf(buffer, sizeOfBuffer, format, ap); + va_end(ap); + return result; +} +#endif + +#if defined(OPTICK_GCC) +#include +template +inline int wcstombs_s(char(&buffer)[sizeOfBuffer], const wchar_t* src, size_t maxCount) +{ + return wcstombs(buffer, src, maxCount); +} +#endif + +#if defined(OPTICK_MSVC) +template +inline int wcstombs_s(char(&buffer)[sizeOfBuffer], const wchar_t* src, size_t maxCount) +{ + size_t converted = 0; + return ::wcstombs_s(&converted, buffer, src, maxCount); +} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#endif //USE_OPTICK diff --git a/neo/libs/optick/optick_core.cpp b/neo/libs/optick/optick_core.cpp new file mode 100644 index 00000000..352ee7af --- /dev/null +++ b/neo/libs/optick/optick_core.cpp @@ -0,0 +1,2036 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "optick_core.h" + +#if USE_OPTICK + +#include "optick.h" +#include "optick_server.h" + +#include +#include +#include + +////////////////////////////////////////////////////////////////////////// +// Start of the Platform-specific stuff +////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_MSVC) +#include "optick_core.win.h" +#elif defined(OPTICK_LINUX) +#include "optick_core.linux.h" +#elif defined(OPTICK_OSX) +#include "optick_core.macos.h" +#elif defined(OPTICK_PS4) +#include "optick_core.ps4.h" +#elif defined(OPTICK_FREEBSD) +#include "optick_core.freebsd.h" +#endif +////////////////////////////////////////////////////////////////////////// +// End of the Platform-specific stuff +////////////////////////////////////////////////////////////////////////// + +extern "C" Optick::EventData* NextEvent() +{ + if (Optick::EventStorage* storage = Optick::Core::storage) + { + return &storage->NextEvent(); + } + + return nullptr; +} + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void* (*Memory::allocate)(size_t) = [](size_t size)->void* { return operator new(size); }; +void (*Memory::deallocate)(void* p) = [](void* p) { operator delete(p); }; +void (*Memory::initThread)(void) = nullptr; +#if defined(OPTICK_32BIT) + std::atomic Memory::memAllocated; +#else + std::atomic Memory::memAllocated; +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +uint64_t MurmurHash64A(const void * key, int len, uint64_t seed) +{ + const uint64_t m = 0xc6a4a7935bd1e995; + const int r = 47; + + uint64_t h = seed ^ (len * m); + + const uint64_t * data = (const uint64_t *)key; + const uint64_t * end = data + (len / 8); + + while (data != end) + { + uint64_t k = *data++; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + const unsigned char * data2 = (const unsigned char*)data; + + switch (len & 7) + { + case 7: h ^= uint64_t(data2[6]) << 48; // fallthrough + case 6: h ^= uint64_t(data2[5]) << 40; // fallthrough + case 5: h ^= uint64_t(data2[4]) << 32; // fallthrough + case 4: h ^= uint64_t(data2[3]) << 24; // fallthrough + case 3: h ^= uint64_t(data2[2]) << 16; // fallthrough + case 2: h ^= uint64_t(data2[1]) << 8; // fallthrough + case 1: h ^= uint64_t(data2[0]); // fallthrough + h *= m; + }; + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +uint64_t StringHash::CalcHash(const char* str) +{ + return MurmurHash64A(str, (int)strlen(str), 0); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Base 64 +// https://renenyffenegger.ch/notes/development/Base64/Encoding-and-decoding-base-64-with-cpp +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +static inline bool is_base64(unsigned char c) { + return (isalnum(c) || (c == '+') || (c == '/')); +} +string base64_decode(string const& encoded_string) { + static string base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + int in_len = (int)encoded_string.size(); + int i = 0; + int j = 0; + int in_ = 0; + unsigned char char_array_4[4], char_array_3[3]; + string ret; + + while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_])) { + char_array_4[i++] = encoded_string[in_]; in_++; + if (i == 4) { + for (i = 0; i < 4; i++) + char_array_4[i] = (unsigned char)base64_chars.find(char_array_4[i]); + + char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (i = 0; (i < 3); i++) + ret += char_array_3[i]; + i = 0; + } + } + + if (i) { + for (j = i; j < 4; j++) + char_array_4[j] = 0; + + for (j = 0; j < 4; j++) + char_array_4[j] = (unsigned char)base64_chars.find(char_array_4[j]); + + char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (j = 0; (j < i - 1); j++) ret += char_array_3[j]; + } + + return ret; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Get current time in milliseconds +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +int64 GetTimeMilliSeconds() +{ + return Platform::GetTime() * 1000 / Platform::GetFrequency(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +int64 TicksToMs(int64 ticks) +{ + return ticks * 1000 / Platform::GetFrequency(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +int64 TicksToUs(int64 ticks) +{ + return ticks * 1000000 / Platform::GetFrequency(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +template +OutputDataStream& operator<<(OutputDataStream& stream, const TagData& ob) +{ + return stream << ob.timestamp << ob.description->index << ob.data; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& os, const Symbol * const symbol) +{ + OPTICK_VERIFY(symbol, "Can't serialize NULL symbol!", return os); + return os << symbol->address << symbol->function << symbol->file << symbol->line; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& os, const Module& module) +{ + return os << module.path << (uint64)module.address << (uint64)module.size; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// VS TODO: Replace with random access iterator for MemoryPool +template +void SortMemoryPool(MemoryPool& memoryPool) +{ + size_t count = memoryPool.Size(); + if (count == 0) + return; + + vector memoryArray; + memoryArray.resize(count); + memoryPool.ToArray(&memoryArray[0]); + + std::sort(memoryArray.begin(), memoryArray.end()); + + memoryPool.Clear(true); + + for (const T& item : memoryArray) + memoryPool.Add(item); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventDescription* EventDescription::Create(const char* eventName, const char* fileName, const unsigned long fileLine, const unsigned long eventColor /*= Color::Null*/, const unsigned long filter /*= 0*/, const uint8_t eventFlags /*= 0*/) +{ + return EventDescriptionBoard::Get().CreateDescription(eventName, fileName, fileLine, eventColor, filter, eventFlags); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventDescription* EventDescription::CreateShared(const char* eventName, const char* fileName, const unsigned long fileLine, const unsigned long eventColor /*= Color::Null*/, const unsigned long filter /*= 0*/) +{ + return EventDescriptionBoard::Get().CreateSharedDescription(eventName, fileName, fileLine, eventColor, filter); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventDescription::EventDescription() : name(""), file(""), line(0), index((uint32_t)-1), color(0), filter(0), flags(0) +{ +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventDescription& EventDescription::operator=(const EventDescription&) +{ + OPTICK_FAILED("It is pointless to copy EventDescription. Please, check you logic!"); return *this; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventData* Event::Start(const EventDescription& description) +{ + EventData* result = nullptr; + + if (EventStorage* storage = Core::storage) + { + result = &storage->NextEvent(); + result->description = &description; + result->Start(); + } + return result; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Event::Stop(EventData& data) +{ + if (Core::storage != nullptr) + { + data.Stop(); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void OPTICK_INLINE PushEvent(EventStorage* pStorage, const EventDescription* description, int64_t timestampStart) +{ + if (EventStorage* storage = pStorage) + { + if (storage->pushPopEventStackIndex++ < storage->pushPopEventStack.size()) + { + EventData& result = storage->NextEvent(); + result.description = description; + result.start = timestampStart; + result.finish = EventTime::INVALID_TIMESTAMP; + storage->pushPopEventStack[storage->pushPopEventStackIndex - 1] = &result; + } + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void OPTICK_INLINE PopEvent(EventStorage* pStorage, int64_t timestampFinish) +{ + if (EventStorage* storage = pStorage) + if (storage->pushPopEventStackIndex > 0) + if (--(storage->pushPopEventStackIndex) < storage->pushPopEventStack.size()) + storage->pushPopEventStack[storage->pushPopEventStackIndex]->finish = timestampFinish; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Event::Push(const char* name) +{ + if (EventStorage* storage = Core::storage) + { + EventDescription* desc = EventDescription::CreateShared(name); + PushEvent(storage, desc, GetHighPrecisionTime()); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Event::Push(const EventDescription& description) +{ + PushEvent(Core::storage, &description, GetHighPrecisionTime()); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Event::Pop() +{ + PopEvent(Core::storage, GetHighPrecisionTime()); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Event::Add(EventStorage* storage, const EventDescription* description, int64_t timestampStart, int64_t timestampFinish) +{ + EventData& data = storage->eventBuffer.Add(); + data.description = description; + data.start = timestampStart; + data.finish = timestampFinish; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Event::Push(EventStorage* storage, const EventDescription* description, int64_t timestampStart) +{ + PushEvent(storage, description, timestampStart); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Event::Pop(EventStorage* storage, int64_t timestampFinish) +{ + PopEvent(storage, timestampFinish); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventData* GPUEvent::Start(const EventDescription& description) +{ + EventData* result = nullptr; + + if (EventStorage* storage = Core::storage) + result = storage->gpuStorage.Start(description); + + return result; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void GPUEvent::Stop(EventData& data) +{ + if (EventStorage* storage = Core::storage) + storage->gpuStorage.Stop(data); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void FiberSyncData::AttachToThread(EventStorage* storage, uint64_t threadId) +{ + if (storage) + { + FiberSyncData& data = storage->fiberSyncBuffer.Add(); + data.Start(); + data.finish = EventTime::INVALID_TIMESTAMP; + data.threadId = threadId; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void FiberSyncData::DetachFromThread(EventStorage* storage) +{ + if (storage) + { + if (FiberSyncData* syncData = storage->fiberSyncBuffer.Back()) + { + syncData->Stop(); + } + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(const EventDescription& description, float val) +{ + if (EventStorage* storage = Core::storage) + if (storage->currentMode & Mode::TAGS) + storage->tagFloatBuffer.Add(TagFloat(description, val)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(const EventDescription& description, int32_t val) +{ + if (EventStorage* storage = Core::storage) + if (storage->currentMode & Mode::TAGS) + storage->tagS32Buffer.Add(TagS32(description, val)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(const EventDescription& description, uint32_t val) +{ + if (EventStorage* storage = Core::storage) + if (storage->currentMode & Mode::TAGS) + storage->tagU32Buffer.Add(TagU32(description, val)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(const EventDescription& description, uint64_t val) +{ + if (EventStorage* storage = Core::storage) + if (storage->currentMode & Mode::TAGS) + storage->tagU64Buffer.Add(TagU64(description, val)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(const EventDescription& description, float val[3]) +{ + if (EventStorage* storage = Core::storage) + if (storage->currentMode & Mode::TAGS) + storage->tagPointBuffer.Add(TagPoint(description, val)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(const EventDescription& description, const char* val) +{ + if (EventStorage* storage = Core::storage) + if (storage->currentMode & Mode::TAGS) + storage->tagStringBuffer.Add(TagString(description, val)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Tag::Attach(const EventDescription& description, const char* val, uint16_t length) +{ + if (EventStorage * storage = Core::storage) + if (storage->currentMode & Mode::TAGS) + storage->tagStringBuffer.Add(TagString(description, val, length)); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream & operator<<(OutputDataStream &stream, const EventDescription &ob) +{ + return stream << ob.name << ob.file << ob.line << ob.filter << ob.color << (float)0.0f << ob.flags; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const EventTime& ob) +{ + return stream << ob.start << ob.finish; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const EventData& ob) +{ + return stream << (EventTime)(ob) << (ob.description ? ob.description->index : (uint32)-1); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const SyncData& ob) +{ + return stream << (EventTime)(ob) << ob.core << ob.reason << ob.newThreadId; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const FiberSyncData& ob) +{ + return stream << (EventTime)(ob) << ob.threadId; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const FrameData& ob) +{ + return stream << (EventData)(ob) << ob.threadID; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +static std::mutex& GetBoardLock() +{ + // Initialize as static local variable to prevent problems with static initialization order + static std::mutex lock; + return lock; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventDescriptionBoard& EventDescriptionBoard::Get() +{ + static EventDescriptionBoard instance; + return instance; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +const EventDescriptionList& EventDescriptionBoard::GetEvents() const +{ + return boardDescriptions; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void EventDescriptionBoard::Shutdown() +{ + boardDescriptions.Clear(false); + sharedNames.Clear(false); + sharedDescriptions.clear(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventDescription* EventDescriptionBoard::CreateDescription(const char* name, const char* file /*= nullptr*/, uint32_t line /*= 0*/, uint32_t color /*= Color::Null*/, uint32_t filter /*= 0*/, uint8_t flags /*= 0*/) +{ + std::lock_guard lock(GetBoardLock()); + + size_t index = boardDescriptions.Size(); + + EventDescription& desc = boardDescriptions.Add(); + desc.index = (uint32)index; + desc.name = (flags & EventDescription::COPY_NAME_STRING) != 0 ? CacheString(name) : name; + desc.file = (flags & EventDescription::COPY_FILENAME_STRING) != 0 ? CacheString(file) : file; + desc.line = line; + desc.color = color; + desc.filter = filter; + desc.flags = flags; + + return &desc; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventDescription* EventDescriptionBoard::CreateSharedDescription(const char* name, const char* file /*= nullptr*/, uint32_t line /*= 0*/, uint32_t color /*= Color::Null*/, uint32_t filter /*= 0*/) +{ + StringHash nameHash(name); + + std::lock_guard lock(sharedLock); + + std::pair cached = sharedDescriptions.insert({ nameHash, nullptr }); + + if (cached.second) + { + const char* nameCopy = CacheString(name); + cached.first->second = CreateDescription(nameCopy, file, line, color, filter); + } + + return cached.first->second; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +const char* EventDescriptionBoard::CacheString(const char* name) +{ + return sharedNames.Add(name, strlen(name) + 1, false); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator << (OutputDataStream& stream, const EventDescriptionBoard& ob) +{ + std::lock_guard lock(GetBoardLock()); + stream << ob.GetEvents(); + return stream; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +ProcessDescription::ProcessDescription(const char* processName, ProcessID pid, uint64 key) : name(processName), processID(pid), uniqueKey(key) +{ +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +ThreadDescription::ThreadDescription(const char* threadName, ThreadID tid, ProcessID pid, int32 _maxDepth /*= 1*/, int32 _priority /*= 0*/, uint32 _mask /*= 0*/) + : name(threadName), threadID(tid), processID(pid), maxDepth(_maxDepth), priority(_priority), mask(_mask) +{ +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +int64_t GetHighPrecisionTime() +{ + return Platform::GetTime(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +int64_t GetHighPrecisionFrequency() +{ + return Platform::GetFrequency(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream & operator<<(OutputDataStream &stream, const SysCallData &ob) +{ + return stream << (const EventData&)ob << ob.threadID << ob.id; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +SysCallData& SysCallCollector::Add() +{ + return syscallPool.Add(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void SysCallCollector::Clear() +{ + syscallPool.Clear(false); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool SysCallCollector::Serialize(OutputDataStream& stream) +{ + stream << syscallPool; + + if (!syscallPool.IsEmpty()) + { + syscallPool.Clear(false); + return true; + } + + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void CallstackCollector::Add(const CallstackDesc& desc) +{ + if (uint64* storage = callstacksPool.TryAdd(desc.count + 3)) + { + storage[0] = desc.threadID; + storage[1] = desc.timestamp; + storage[2] = desc.count; + + for (uint64 i = 0; i < desc.count; ++i) + { + storage[3 + i] = desc.callstack[desc.count - i - 1]; + } + } + else + { + uint64& item0 = callstacksPool.Add(); + uint64& item1 = callstacksPool.Add(); + uint64& item2 = callstacksPool.Add(); + + item0 = desc.threadID; + item1 = desc.timestamp; + item2 = desc.count; + + for (uint64 i = 0; i < desc.count; ++i) + { + callstacksPool.Add() = desc.callstack[desc.count - i - 1]; + } + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void CallstackCollector::Clear() +{ + callstacksPool.Clear(false); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool CallstackCollector::SerializeModules(OutputDataStream& stream) +{ + if (SymbolEngine* symEngine = Core::Get().symbolEngine) + { + stream << symEngine->GetModules(); + return true; + } + else + { + stream << (int)0; + } + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool CallstackCollector::SerializeSymbols(OutputDataStream& stream) +{ + typedef unordered_set SymbolSet; + SymbolSet symbolSet; + + Core::Get().DumpProgress("Collecting Callstacks..."); + + for (CallstacksPool::const_iterator it = callstacksPool.begin(); it != callstacksPool.end();) + { + CallstacksPool::const_iterator startIt = it; + OPTICK_UNUSED(startIt); + + uint64 threadID = *it; + OPTICK_UNUSED(threadID); + ++it; //Skip ThreadID + uint64 timestamp = *it; + OPTICK_UNUSED + (timestamp); + ++it; //Skip Timestamp + uint64 count = *it; + count = (count & 0xFF); + ++it; //Skip Count + + bool isBadAddrFound = false; + + for (uint64 i = 0; i < count; ++i) + { + uint64 address = *it; + ++it; + + if (address == 0) + { + isBadAddrFound = true; + } + + if (!isBadAddrFound) + { + symbolSet.insert(address); + } + } + } + + SymbolEngine* symEngine = Core::Get().symbolEngine; + + vector symbols; + symbols.reserve(symbolSet.size()); + + Core::Get().DumpProgress("Resolving addresses ... "); + + if (symEngine) + { + int total = (int)symbolSet.size(); + const int progressBatchSize = 100; + for (auto it = symbolSet.begin(); it != symbolSet.end(); ++it) + { + uint64 address = *it; + if (const Symbol* symbol = symEngine->GetSymbol(address)) + { + symbols.push_back(symbol); + + if ((symbols.size() % progressBatchSize == 0) && Core::Get().IsTimeToReportProgress()) + { + Core::Get().DumpProgressFormatted("Resolving addresses %d / %d", (int)symbols.size(), total); + } + } + } + } + + stream << symbols; + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool CallstackCollector::SerializeCallstacks(OutputDataStream& stream) +{ + stream << callstacksPool; + + if (!callstacksPool.IsEmpty()) + { + callstacksPool.Clear(false); + return true; + } + + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool CallstackCollector::IsEmpty() const +{ + return callstacksPool.IsEmpty(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream & operator<<(OutputDataStream &stream, const SwitchContextDesc &ob) +{ + return stream << ob.timestamp << ob.oldThreadId << ob.newThreadId << ob.cpuId << ob.reason; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void SwitchContextCollector::Add(const SwitchContextDesc& desc) +{ + switchContextPool.Add() = desc; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void SwitchContextCollector::Clear() +{ + switchContextPool.Clear(false); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool SwitchContextCollector::Serialize(OutputDataStream& stream) +{ + stream << switchContextPool; + + if (!switchContextPool.IsEmpty()) + { + switchContextPool.Clear(false); + return true; + } + + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_MSVC) +#include +#define CPUID(INFO, ID) __cpuid(INFO, ID) +#elif (defined(__ANDROID__) || defined(OPTICK_ARM)) +// Nothing +#elif defined(OPTICK_GCC) +#include +#define CPUID(INFO, ID) __cpuid(ID, INFO[0], INFO[1], INFO[2], INFO[3]) +#else +#error Platform is not supported! +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +string GetCPUName() +{ +#if defined(__ANDROID__) + FILE * fp = popen("cat /proc/cpuinfo | grep -m1 'model name'","r"); + char res[128] = {0}; + fread(res, 1, sizeof(res)-1, fp); + fclose(fp); + char* name = strstr(res, ":"); + if (name && strlen(name) > 2) + { + string s = name + 2; + s.erase(std::remove(s.begin(), s.end(), '\n'), s.end()); + return s; + } + return "Undefined CPU"; +#elif defined(OPTICK_ARM) + #if defined(OPTICK_ARM32) + return "ARM 32-bit"; + #else + return "ARM 64-bit"; + #endif +#else + int cpuInfo[4] = { -1 }; + char cpuBrandString[0x40] = { 0 }; + CPUID(cpuInfo, 0x80000000); + unsigned nExIds = cpuInfo[0]; + for (unsigned i = 0x80000000; i <= nExIds; ++i) + { + CPUID(cpuInfo, i); + if (i == 0x80000002) + memcpy(cpuBrandString, cpuInfo, sizeof(cpuInfo)); + else if (i == 0x80000003) + memcpy(cpuBrandString + 16, cpuInfo, sizeof(cpuInfo)); + else if (i == 0x80000004) + memcpy(cpuBrandString + 32, cpuInfo, sizeof(cpuInfo)); + } + return string(cpuBrandString); +#endif +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Core& Core::Get() +{ + static Core instance; + return instance; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::StartCapture() +{ + pendingState = State::START_CAPTURE; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::StopCapture() +{ + pendingState = State::STOP_CAPTURE; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::CancelCapture() +{ + pendingState = State::CANCEL_CAPTURE; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpCapture() +{ + pendingState = State::DUMP_CAPTURE; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpProgress(const char* message) +{ + progressReportedLastTimestampMS = GetTimeMilliSeconds(); + + OutputDataStream stream; + stream << message; + + Server::Get().Send(DataResponse::ReportProgress, stream); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_MSVC) +#pragma warning( push ) +#pragma warning( disable : 4996) +#endif +void Core::DumpProgressFormatted(const char* format, ...) +{ + va_list arglist; + char buffer[256] = { 0 }; + va_start(arglist, format); +#ifdef OPTICK_MSVC + vsprintf_s(buffer, format, arglist); +#else + vsprintf(buffer, format, arglist); +#endif + va_end(arglist); + DumpProgress(buffer); +} +#if defined(OPTICK_MSVC) +#pragma warning( pop ) +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool IsFrameDescription(const EventDescription* desc) +{ + for (int i = 0; i < FrameType::COUNT; ++i) + if (GetFrameDescription((FrameType::Type)i) == desc) + return true; + + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool IsSleepDescription(const EventDescription* desc) +{ + return desc->color == Color::White; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool IsSleepOnlyScope(const ScopeData& scope) +{ + //if (!scope.categories.empty()) + // return false; + + const vector& events = scope.events; + for (auto it = events.begin(); it != events.end(); ++it) + { + const EventData& data = *it; + + if (!IsSleepDescription(data.description)) + { + return false; + } + } + + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpEvents(EventStorage& entry, const EventTime& timeSlice, ScopeData& scope) +{ + if (!entry.eventBuffer.IsEmpty()) + { + const EventData* rootEvent = nullptr; + const int64 batchLimitMs = 3; + + entry.eventBuffer.ForEach([&](const EventData& data) + { + if (data.finish >= data.start && data.start >= timeSlice.start && timeSlice.finish >= data.finish) + { + if (!rootEvent) + { + rootEvent = &data; + scope.InitRootEvent(*rootEvent); + } + else if (rootEvent->finish < data.finish) + { + // Batching together small buckets + // Flushing if we hit the following conditions: + // * Frame Description - don't batch frames together + // * SleepOnly scope - we ignore them + // * Sleep Event - flush the previous batch + if (IsFrameDescription(rootEvent->description) || TicksToMs(scope.header.event.finish - scope.header.event.start) > batchLimitMs || IsSleepDescription(data.description) || IsSleepOnlyScope(scope)) + scope.Send(); + + rootEvent = &data; + scope.InitRootEvent(*rootEvent); + } + else + { + scope.AddEvent(data); + } + } + }); + + scope.Send(); + + entry.eventBuffer.Clear(false); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpTags(EventStorage& entry, ScopeData& scope) +{ + if (!entry.tagFloatBuffer.IsEmpty() || + !entry.tagS32Buffer.IsEmpty() || + !entry.tagU32Buffer.IsEmpty() || + !entry.tagU64Buffer.IsEmpty() || + !entry.tagPointBuffer.IsEmpty() || + !entry.tagStringBuffer.IsEmpty()) + { + OutputDataStream tagStream; + tagStream << scope.header.boardNumber << scope.header.threadNumber; + tagStream + << (uint32)0 + << entry.tagFloatBuffer + << entry.tagU32Buffer + << entry.tagS32Buffer + << entry.tagU64Buffer + << entry.tagPointBuffer + << (uint32)0 + << (uint32)0 + << entry.tagStringBuffer; + Server::Get().Send(DataResponse::TagsPack, tagStream); + + entry.ClearTags(false); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpThread(ThreadEntry& entry, const EventTime& timeSlice, ScopeData& scope) +{ + // We need to sort events for all the custom thread storages + if (entry.description.threadID == INVALID_THREAD_ID) + entry.Sort(); + + // Events + DumpProgressFormatted("Serializing %s", entry.description.name.c_str()); + DumpEvents(entry.storage, timeSlice, scope); + DumpTags(entry.storage, scope); + OPTICK_ASSERT(entry.storage.fiberSyncBuffer.IsEmpty(), "Fiber switch events in native threads?"); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpFiber(FiberEntry& entry, const EventTime& timeSlice, ScopeData& scope) +{ + // Events + DumpEvents(entry.storage, timeSlice, scope); + + if (!entry.storage.fiberSyncBuffer.IsEmpty()) + { + OutputDataStream fiberSynchronizationStream; + fiberSynchronizationStream << scope.header.boardNumber; + fiberSynchronizationStream << scope.header.fiberNumber; + fiberSynchronizationStream << entry.storage.fiberSyncBuffer; + Server::Get().Send(DataResponse::FiberSynchronizationData, fiberSynchronizationStream); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventTime CalculateRange(const ThreadEntry& entry, const EventDescription* rootDescription) +{ + EventTime timeSlice = { INT64_MAX, INT64_MIN }; + entry.storage.eventBuffer.ForEach([&](const EventData& data) + { + if (data.description == rootDescription) + { + timeSlice.start = std::min(timeSlice.start, data.start); + timeSlice.finish = std::max(timeSlice.finish, data.finish); + } + }); + return timeSlice; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventTime CalculateRange(FrameStorage& frameStorage) +{ + EventTime timeSlice = { INT64_MAX, INT64_MIN }; + frameStorage.m_Frames.ForEach([&](const FrameData& data) + { + timeSlice.start = std::min(timeSlice.start, data.start); + timeSlice.finish = std::max(timeSlice.finish, data.finish); + }); + return timeSlice; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpFrames(uint32 mode) +{ + std::lock_guard lock(threadsLock); + + if (frames.empty() || threads.empty()) + return; + + ++boardNumber; + + Server::Get().SendStart(); + + DumpProgress("Generating summary..."); + + GenerateCommonSummary(); + DumpSummary(); + + DumpProgress("Collecting Frame Events..."); + + std::array timeSlice; + for (int i = 0; i < FrameType::COUNT; ++i) + { + timeSlice[i] = CalculateRange(frames[i]); + } + + DumpBoard(mode, timeSlice[FrameType::CPU]); + + { + DumpProgress("Serializing Frames"); + OutputDataStream framesStream; + framesStream << boardNumber; + framesStream << (uint32)frames.size(); + for (size_t i = 0; i < frames.size(); ++i) + framesStream << frames[i].m_Frames; + Server::Get().Send(DataResponse::FramesPack, framesStream); + } + + ScopeData threadScope; + threadScope.header.boardNumber = boardNumber; + threadScope.header.fiberNumber = -1; + + if (gpuProfiler) + gpuProfiler->Dump(mode); + + for (size_t i = 0; i < threads.size(); ++i) + { + threadScope.header.threadNumber = (uint32)i; + + ThreadEntry* entry = threads[i]; + + EventTime range = timeSlice[FrameType::CPU]; + + if ((entry->description.mask & ThreadMask::GPU) != 0 && timeSlice[FrameType::GPU].IsValid()) + range = timeSlice[FrameType::GPU]; + + DumpThread(*entry, range, threadScope); + } + + ScopeData fiberScope; + fiberScope.header.boardNumber = (uint32)boardNumber; + fiberScope.header.threadNumber = -1; + for (size_t i = 0; i < fibers.size(); ++i) + { + fiberScope.header.fiberNumber = (uint32)i; + DumpFiber(*fibers[i], timeSlice[FrameType::CPU], fiberScope); + } + + for (int i = 0; i < FrameType::COUNT; ++i) + frames[i].Clear(false); + + CleanupThreadsAndFibers(); + + { + DumpProgress("Serializing SwitchContexts"); + OutputDataStream switchContextsStream; + switchContextsStream << boardNumber; + switchContextCollector.Serialize(switchContextsStream); + Server::Get().Send(DataResponse::SynchronizationData, switchContextsStream); + } + + { + DumpProgress("Serializing SysCalls"); + OutputDataStream callstacksStream; + callstacksStream << boardNumber; + syscallCollector.Serialize(callstacksStream); + Server::Get().Send(DataResponse::SyscallPack, callstacksStream); + } + + if (!callstackCollector.IsEmpty()) + { + OutputDataStream symbolsStream; + symbolsStream << boardNumber; + DumpProgress("Serializing Modules"); + callstackCollector.SerializeModules(symbolsStream); + callstackCollector.SerializeSymbols(symbolsStream); + Server::Get().Send(DataResponse::CallstackDescriptionBoard, symbolsStream); + + // We can free some memory now to unlock space for callstack serialization + DumpProgress("Deallocating memory for SymbolEngine"); + Memory::Delete(symbolEngine); + symbolEngine = nullptr; + + DumpProgress("Serializing callstacks"); + OutputDataStream callstacksStream; + callstacksStream << boardNumber; + callstackCollector.SerializeCallstacks(callstacksStream); + Server::Get().Send(DataResponse::CallstackPack, callstacksStream); + } + + forcedMainThreadIndex = (uint32)-1; + + Server::Get().SendFinish(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpSummary() +{ + OutputDataStream stream; + + // Board Number + stream << boardNumber; + + // Frames + double frequency = (double)Platform::GetFrequency(); + stream << (uint32_t)frames[FrameType::CPU].m_Frames.Size(); + for (const EventTime& frame : frames[FrameType::CPU].m_Frames) + { + double frameTimeMs = 1000.0 * (frame.finish - frame.start) / frequency; + stream << (float)frameTimeMs; + } + + // Summary + stream << (uint32_t)summary.size(); + for (size_t i = 0; i < summary.size(); ++i) + stream << summary[i].first << summary[i].second; + summary.clear(); + + // Attachments + stream << (uint32_t)attachments.size(); + for (const Attachment& att : attachments) + stream << (uint32_t)att.type << att.name << att.data; + attachments.clear(); + + // Send + Server::Get().Send(DataResponse::SummaryPack, stream); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::CleanupThreadsAndFibers() +{ + std::lock_guard lock(threadsLock); + + for (ThreadList::iterator it = threads.begin(); it != threads.end();) + { + if (!(*it)->isAlive) + { + Memory::Delete(*it); + it = threads.erase(it); + } + else + { + ++it; + } + } +} + +void Core::DumpBoard(uint32 mode, EventTime timeSlice) +{ + OutputDataStream boardStream; + + boardStream << boardNumber; + boardStream << Platform::GetFrequency(); + boardStream << (uint64)0; // Origin + boardStream << (uint32)0; // Precision + boardStream << timeSlice; + boardStream << threads; + boardStream << fibers; + boardStream << forcedMainThreadIndex; + boardStream << EventDescriptionBoard::Get(); + boardStream << (uint32)0; // Tags + boardStream << (uint32)0; // Run + boardStream << (uint32)0; // Filters + boardStream << (uint32)0; // ThreadDescs + boardStream << mode; // Mode + boardStream << processDescs; + boardStream << threadDescs; + boardStream << (uint32)Platform::GetProcessID(); + boardStream << (uint32)std::thread::hardware_concurrency(); + Server::Get().Send(DataResponse::FrameDescriptionBoard, boardStream); + + // Cleanup + processDescs.clear(); + threadDescs.clear(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::GenerateCommonSummary() +{ + AttachSummary("Platform", Platform::GetName()); + AttachSummary("CPU", GetCPUName().c_str()); + if (gpuProfiler) + AttachSummary("GPU", gpuProfiler->GetName().c_str()); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Core::Core() + : progressReportedLastTimestampMS(0) + , boardNumber(0) + , stateCallback(nullptr) + , currentState(State::DUMP_CAPTURE) + , pendingState(State::DUMP_CAPTURE) + , forcedMainThreadIndex((uint32)-1) + , currentMode(Mode::OFF) + , previousMode(Mode::OFF) + , symbolEngine(nullptr) + , tracer(nullptr) + , gpuProfiler(nullptr) +{ + frames[FrameType::CPU].m_Description = EventDescription::Create("CPU Frame", __FILE__, __LINE__); + frames[FrameType::GPU].m_Description = EventDescription::Create("GPU Frame", __FILE__, __LINE__); + frames[FrameType::Render].m_Description = EventDescription::Create("Render Frame", __FILE__, __LINE__); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::UpdateState() +{ + if (currentState != pendingState) + { + State::Type nextState = pendingState; + if (pendingState == State::DUMP_CAPTURE && currentState == State::START_CAPTURE) + nextState = State::STOP_CAPTURE; + + if ((stateCallback != nullptr) && !stateCallback(nextState)) + return false; + + switch (nextState) + { + case State::START_CAPTURE: + Activate((Mode::Type)settings.mode); + break; + + case State::STOP_CAPTURE: + case State::CANCEL_CAPTURE: + Activate(Mode::OFF); + break; + + case State::DUMP_CAPTURE: + DumpFrames(previousMode); + break; + } + currentState = nextState; + return true; + } + return false; +} + + +void Core::Update() +{ + std::lock_guard lock(coreLock); + + if (currentMode != Mode::OFF) + { + FrameBuffer frameBuffer = frames[FrameType::CPU].m_Frames; + + if (frameBuffer.Size() > 0) + { + if (settings.frameLimit > 0 && frameBuffer.Size() >= settings.frameLimit) + DumpCapture(); + + if (settings.timeLimitUs > 0) + { + if (TicksToUs(frameBuffer.Back()->finish - frameBuffer.Front()->start) >= settings.timeLimitUs) + DumpCapture(); + } + + if (settings.spikeLimitUs > 0) + { + if (TicksToUs(frameBuffer.Back()->finish - frameBuffer.Front()->start) >= settings.spikeLimitUs) + DumpCapture(); + } + } + + if (IsTimeToReportProgress()) + DumpCapturingProgress(); + } + + UpdateEvents(); + + while (UpdateState()) {} +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +uint32_t Core::BeginUpdateFrame(FrameType::Type frameType, int64_t timestamp, uint64_t threadID) +{ + std::lock_guard lock(coreLock); + + if (currentMode != Mode::OFF) + { + FrameData& data = frames[frameType].m_Frames.Add(); + data.description = frames[frameType].m_Description; + data.start = timestamp; + data.finish = timestamp; + data.threadID = threadID; + } + + return ++frames[frameType].m_FrameNumber; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +uint32_t Core::EndUpdateFrame(FrameType::Type frameType, int64_t timestamp, uint64_t /*threadID*/) +{ + std::lock_guard lock(coreLock); + + if (currentMode != Mode::OFF) + { + if (FrameData* lastFrame = frames[frameType].m_Frames.Back()) + { + lastFrame->finish = timestamp; + } + } + + return frames[frameType].m_FrameNumber; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::UpdateEvents() +{ + Server::Get().Update(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::ReportSwitchContext(const SwitchContextDesc& desc) +{ + switchContextCollector.Add(desc); + return true; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::ReportStackWalk(const CallstackDesc& desc) +{ + callstackCollector.Add(desc); + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::Activate(Mode::Type mode) +{ + if (mode != currentMode) + { + previousMode = currentMode; + currentMode = mode; + + { + std::lock_guard lock(threadsLock); + for(auto it = threads.begin(); it != threads.end(); ++it) + { + ThreadEntry* entry = *it; + entry->Activate(mode); + } + } + + + if (mode != Mode::OFF) + { + CaptureStatus::Type status = CaptureStatus::ERR_TRACER_NOT_IMPLEMENTED; + +#if OPTICK_ENABLE_TRACING + if (mode & Mode::TRACER) + { + if (tracer == nullptr) + tracer = Platform::CreateTrace(); + + if (tracer) + { + tracer->SetPassword(settings.password.c_str()); + + std::lock_guard lock(threadsLock); + + status = tracer->Start(mode, settings.samplingFrequency, threads); + + // Let's retry with more narrow setup + if (status != CaptureStatus::OK && (mode & Mode::AUTOSAMPLING)) + status = tracer->Start((Mode::Type)(mode & ~Mode::AUTOSAMPLING), settings.samplingFrequency, threads); + } + } + + if (mode & Mode::AUTOSAMPLING) + if (symbolEngine == nullptr) + symbolEngine = Platform::CreateSymbolEngine(); +#endif + + if (gpuProfiler && (mode & Mode::GPU)) + gpuProfiler->Start(mode); + + SendHandshakeResponse(status); + } + else + { + if (tracer) + { + tracer->Stop(); + Memory::Delete(tracer); + tracer = nullptr; + } + + + if (gpuProfiler) + gpuProfiler->Stop(previousMode); + } + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::DumpCapturingProgress() +{ + stringstream stream; + + if (currentMode != Mode::OFF) + { + size_t memUsedKb = Memory::GetAllocatedSize() >> 10; + float memUsedMb = memUsedKb / 1024.0f; + + stream << "Capturing Frame " << (uint32)frames[FrameType::CPU].m_Frames.Size() << "..." << std::endl << "Memory Used: " << std::fixed << std::setprecision(3) << memUsedMb << " Mb"; + } + + DumpProgress(stream.str().c_str()); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::IsTimeToReportProgress() const +{ + return GetTimeMilliSeconds() > progressReportedLastTimestampMS + 200; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::SendHandshakeResponse(CaptureStatus::Type status) +{ + OutputDataStream stream; + stream << (uint32)status; + stream << Platform::GetName(); + stream << Server::Get().GetHostName(); + Server::Get().Send(DataResponse::Handshake, stream); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::IsRegistredThread(ThreadID id) +{ + std::lock_guard lock(threadsLock); + + for (ThreadList::iterator it = threads.begin(); it != threads.end(); ++it) + { + ThreadEntry* entry = *it; + if (entry->description.threadID == id) + { + return true; + } + } + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +ThreadEntry* Core::RegisterThread(const ThreadDescription& description, EventStorage** slot) +{ + std::lock_guard lock(threadsLock); + + ThreadEntry* entry = nullptr; + + auto it = std::find_if(threads.begin(), threads.end(), [&description](const ThreadEntry* entry) { return entry->description == description; }); + if (it == threads.end()) + { + entry = Memory::New(description, slot); + threads.push_back(entry); + } + else + { + entry = *it; + } + + if ((currentMode != Mode::OFF) && slot != nullptr) + *slot = &entry->storage; + + return entry; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::UnRegisterThread(ThreadID threadID, bool keepAlive) +{ + std::lock_guard lock(threadsLock); + + for (ThreadList::iterator it = threads.begin(); it != threads.end(); ++it) + { + ThreadEntry* entry = *it; + if (entry->description.threadID == threadID && entry->isAlive) + { + if ((currentMode == Mode::OFF) && !keepAlive) + { + Memory::Delete(entry); + threads.erase(it); + return true; + } + else + { + entry->isAlive = false; + return true; + } + } + } + + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::RegisterFiber(const FiberDescription& description, EventStorage** slot) +{ + std::lock_guard lock(coreLock); + FiberEntry* entry = Memory::New(description); + fibers.push_back(entry); + entry->storage.isFiberStorage = true; + *slot = &entry->storage; + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::RegisterProcessDescription(const ProcessDescription& description) +{ + processDescs.push_back(description); + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::RegisterThreadDescription(const ThreadDescription& description) +{ + threadDescs.push_back(description); + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::SetStateChangedCallback(StateCallback cb) +{ + stateCallback = cb; + return stateCallback != nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::AttachSummary(const char* key, const char* value) +{ + summary.push_back(make_pair(string(key), string(value))); + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::AttachFile(File::Type type, const char* name, const uint8_t* data, uint32_t size) +{ + if (size > 0) + { + attachments.push_back(Attachment(type, name)); + Attachment& attachment = attachments.back(); + attachment.data.resize(size); + memcpy(&attachment.data[0], data, size); + return true; + } + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::AttachFile(File::Type type, const char* name, std::istream& stream) +{ + std::streampos beg = stream.tellg(); + stream.seekg(0, std::ios::end); + std::streampos end = stream.tellg(); + stream.seekg(beg, std::ios::beg); + + size_t size =(size_t)(end - beg); + void* buffer = Memory::Alloc(size); + + stream.read((char*)buffer, size); + bool result = AttachFile(type, name, (uint8*)buffer, (uint32_t)size); + + Memory::Free(buffer); + return result; + +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::AttachFile(File::Type type, const char* name, const char* path) +{ + std::ifstream stream(path, std::ios::binary); + return AttachFile(type, name, stream); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::AttachFile(File::Type type, const char* name, const wchar_t* path) +{ +#if defined(OPTICK_MSVC) + std::ifstream stream(path, std::ios::binary); + return AttachFile(type, name, stream); +#else + char p[256] = { 0 }; + wcstombs(p, path, sizeof(p)); + std::ifstream stream(p, std::ios::binary); + return AttachFile(type, name, stream); +#endif +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::InitGPUProfiler(GPUProfiler* profiler) +{ + OPTICK_ASSERT(gpuProfiler == nullptr, "Can't reinitialize GPU profiler! Not supported yet!"); + gpuProfiler = profiler; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Core::SetSettings(const CaptureSettings& captureSettings) +{ + settings = captureSettings; + + //if (tracer) + //{ + // string decoded = base64_decode(encodedPassword); + // tracer->SetPassword(decoded.c_str()); + // return true; + //} + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::SetMainThreadID(uint64_t threadID) +{ + std::lock_guard lock(threadsLock); + + if (threadID == INVALID_THREAD_ID) + { + forcedMainThreadIndex = (uint32)-1; + } + else + { + for (size_t i = 0; i < threads.size(); ++i) + { + ThreadEntry* entry = threads[i]; + if (entry->description.threadID == threadID) + { + forcedMainThreadIndex = (uint32)i; + } + } + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +const EventDescription* Core::GetFrameDescription(FrameType::Type frame) const +{ + return frames[frame].m_Description; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Core::Shutdown() +{ + std::lock_guard lock(threadsLock); + + Memory::Delete(gpuProfiler); + gpuProfiler = nullptr; + + for (ThreadList::iterator it = threads.begin(); it != threads.end(); ++it) + { + Memory::Delete(*it); + } + threads.clear(); + + for (FiberList::iterator it = fibers.begin(); it != fibers.end(); ++it) + { + Memory::Delete(*it); + } + fibers.clear(); + + Memory::Delete(symbolEngine); + symbolEngine = nullptr; + + EventDescriptionBoard::Get().Shutdown(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Core::~Core() +{ + Shutdown(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +const vector& Core::GetThreads() const +{ + return threads; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_THREAD_LOCAL EventStorage* Core::storage = nullptr; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +ScopeHeader::ScopeHeader() : boardNumber(0), threadNumber(0), fiberNumber(0), type(FrameType::NONE) +{ + event.start = EventTime::INVALID_TIMESTAMP; + event.finish = EventTime::INVALID_TIMESTAMP; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const ScopeHeader& header) +{ + return stream << header.boardNumber << header.threadNumber << header.fiberNumber << header.event << header.type; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const ScopeData& ob) +{ + return stream << ob.header << ob.categories << ob.events; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const ThreadDescription& description) +{ + return stream << description.threadID << description.processID << description.name << description.maxDepth << description.priority << description.mask; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const ThreadEntry* entry) +{ + return stream << entry->description; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const FiberDescription& description) +{ + return stream << description.id; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const FiberEntry* entry) +{ + return stream << entry->description; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const ProcessDescription& description) +{ + return stream << description.processID << description.name << description.uniqueKey; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool SetStateChangedCallback(StateCallback cb) +{ + return Core::Get().SetStateChangedCallback(cb); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool AttachSummary(const char* key, const char* value) +{ + return Core::Get().AttachSummary(key, value); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool AttachFile(File::Type type, const char* name, const uint8_t* data, uint32_t size) +{ + return Core::Get().AttachFile(type, name, data, size); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool AttachFile(File::Type type, const char* name, const char* path) +{ + return Core::Get().AttachFile(type, name, path); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool AttachFile(File::Type type, const char* name, const wchar_t* path) +{ + return Core::Get().AttachFile(type, name, path); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& stream, const Point& ob) +{ + return stream << ob.x << ob.y << ob.z; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API void Update() +{ + return Core::Get().Update(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API uint32_t BeginFrame(Optick::FrameType::Type frameType, int64_t timestamp, uint64_t threadID) +{ + return Core::BeginFrame(frameType, timestamp != EventTime::INVALID_TIMESTAMP ? timestamp : Optick::GetHighPrecisionTime(), threadID != INVALID_THREAD_ID ? threadID : Platform::GetThreadID()); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API uint32_t EndFrame(Optick::FrameType::Type frameType, int64_t timestamp, uint64_t threadID) +{ + return Core::EndFrame(frameType, timestamp != EventTime::INVALID_TIMESTAMP ? timestamp : Optick::GetHighPrecisionTime(), threadID != INVALID_THREAD_ID ? threadID : Platform::GetThreadID()); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool IsActive(Mode::Type mode /*= Mode::INSTRUMENTATION_EVENTS*/) +{ + return (Core::Get().currentMode & mode) != 0; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API EventStorage** GetEventStorageSlotForCurrentThread() +{ + return &Core::Get().storage; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool IsFiberStorage(EventStorage* fiberStorage) +{ + return fiberStorage->isFiberStorage; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool RegisterThread(const char* name) +{ + return Core::Get().RegisterThread(ThreadDescription(name, Platform::GetThreadID(), Platform::GetProcessID()), &Core::storage) != nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool RegisterThread(const wchar_t* name) +{ + const int THREAD_NAME_LENGTH = 128; + char mbName[THREAD_NAME_LENGTH]; + wcstombs_s(mbName, name, THREAD_NAME_LENGTH); + + return Core::Get().RegisterThread(ThreadDescription(mbName, Platform::GetThreadID(), Platform::GetProcessID()), &Core::storage) != nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool UnRegisterThread(bool keepAlive) +{ + return Core::Get().UnRegisterThread(Platform::GetThreadID(), keepAlive); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool RegisterFiber(uint64 fiberId, EventStorage** slot) +{ + return Core::Get().RegisterFiber(FiberDescription(fiberId), slot); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API EventStorage* RegisterStorage(const char* name, uint64_t threadID, ThreadMask::Type type) +{ + ThreadEntry* entry = Core::Get().RegisterThread(ThreadDescription(name, threadID, Platform::GetProcessID(), 1, 0, type), nullptr); + return entry ? &entry->storage : nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API void GpuFlip(void* swapChain) +{ + if (GPUProfiler* gpuProfiler = Core::Get().gpuProfiler) + gpuProfiler->Flip(swapChain); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API GPUContext SetGpuContext(GPUContext context) +{ + if (EventStorage* storage = Core::storage) + { + GPUContext prevContext = storage->gpuStorage.context; + storage->gpuStorage.context = context; + return prevContext; + } + return GPUContext(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API const EventDescription* GetFrameDescription(FrameType::Type frame) +{ + return Core::Get().GetFrameDescription(frame); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API void SetAllocator(AllocateFn allocateFn, DeallocateFn deallocateFn, InitThreadCb initThreadCb) +{ + Memory::SetAllocator(allocateFn, deallocateFn, initThreadCb); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool StartCapture(Mode::Type mode /*= Mode::DEFAULT*/, int samplingFrequency /*= 1000*/, bool force /*= true*/) +{ + if (IsActive()) + return false; + + CaptureSettings settings; + settings.mode = mode | Mode::NOGUI; + settings.samplingFrequency = samplingFrequency; + + Core& core = Core::Get(); + core.SetSettings(settings); + + if (!core.IsRegistredThread(Platform::GetThreadID())) + RegisterThread("MainThread"); + + core.StartCapture(); + + if (force) + { + core.Update(); + core.SetMainThreadID(Platform::GetThreadID()); + core.BeginFrame(FrameType::CPU, GetHighPrecisionTime(), Platform::GetThreadID()); + } + + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool StopCapture(bool force /*= true*/) +{ + if (!IsActive()) + return false; + + Core& core = Core::Get(); + core.StopCapture(); + + if (force) + { + core.EndFrame(FrameType::CPU, GetHighPrecisionTime(), Platform::GetThreadID()); + core.Update(); + } + + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct SaveHelper +{ + static void Init(const char* path) + { + GetOutputFile().open(path, std::ios::out | std::ios::binary); + } + + static void Write(const char* data, size_t size) + { + if (data) + GetOutputFile().write(data, size); + else + GetOutputFile().close(); + } + + static fstream& GetOutputFile() + { + static fstream file; + return file; + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool EndsWith(const char* str, const char* substr) +{ + size_t strLength = strlen(str); + size_t substrLength = strlen(substr); + return strLength >= substrLength && strcmp(substr, &str[strLength - substrLength]) == 0; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool SaveCapture(const char* path, bool force /*= true*/) +{ + char filePath[512] = { 0 }; +#if defined(OPTICK_MSVC) + strcpy_s(filePath, 512, path); +#else + strcpy(filePath, path); +#endif + + if (path == nullptr || !EndsWith(path, ".opt")) + { + time_t now = time(0); + struct tm tstruct; +#if defined(OPTICK_MSVC) + localtime_s(&tstruct, &now); +#else + localtime_r(&now, &tstruct); +#endif + char timeStr[80] = { 0 }; + strftime(timeStr, sizeof(timeStr), "(%Y-%m-%d.%H-%M-%S).opt", &tstruct); +#if defined(OPTICK_MSVC) + strcat_s(filePath, 512, timeStr); +#else + strcat(filePath, timeStr); +#endif + } + + SaveHelper::Init(filePath); + return SaveCapture(SaveHelper::Write, force); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API bool SaveCapture(CaptureSaveChunkCb dataCb /*= nullptr*/, bool force /*= true*/) +{ + Server::Get().SetSaveCallback(dataCb); + + Core& core = Core::Get(); + core.DumpCapture(); + if (force) + core.Update(); + + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OPTICK_API void Shutdown() +{ + Core::Get().Shutdown(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventStorage::EventStorage(): currentMode(Mode::OFF), pushPopEventStackIndex(0), isFiberStorage(false) +{ + +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void ThreadEntry::Activate(Mode::Type mode) +{ + if (!isAlive) + return; + + if (mode != Mode::OFF) + storage.Clear(true); + + if (threadTLS != nullptr) + { + storage.currentMode = mode; + *threadTLS = mode != Mode::OFF ? &storage : nullptr; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void ThreadEntry::Sort() +{ + SortMemoryPool(storage.eventBuffer); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void ScopeData::Send() +{ + if (!events.empty() || !categories.empty()) + { + if (!IsSleepOnlyScope(*this)) + { + OutputDataStream frameStream; + frameStream << *this; + Server::Get().Send(DataResponse::EventFrame, frameStream); + } + } + + Clear(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void ScopeData::ResetHeader() +{ + header.event.start = INT64_MAX; + header.event.finish = INT64_MIN; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void ScopeData::Clear() +{ + ResetHeader(); + events.clear(); + categories.clear(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void EventStorage::GPUStorage::Clear(bool preserveMemory) +{ + for (size_t i = 0; i < gpuBuffer.size(); ++i) + for (int j = 0; j < GPU_QUEUE_COUNT; ++j) + gpuBuffer[i][j].Clear(preserveMemory); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +EventData* EventStorage::GPUStorage::Start(const EventDescription &desc) +{ + if (GPUProfiler* gpuProfiler = Core::Get().gpuProfiler) + { + EventData& result = gpuBuffer[context.node][context.queue].Add(); + result.description = &desc; + result.start = EventTime::INVALID_TIMESTAMP; + result.finish = EventTime::INVALID_TIMESTAMP; + gpuProfiler->QueryTimestamp(context.cmdBuffer, &result.start); + return &result; + } + return nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void EventStorage::GPUStorage::Stop(EventData& data) +{ + if (GPUProfiler* gpuProfiler = Core::Get().gpuProfiler) + { + gpuProfiler->QueryTimestamp(context.cmdBuffer, &data.finish); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + +#endif //USE_OPTICK diff --git a/neo/libs/optick/optick_core.freebsd.h b/neo/libs/optick/optick_core.freebsd.h new file mode 100644 index 00000000..18ae79c5 --- /dev/null +++ b/neo/libs/optick/optick_core.freebsd.h @@ -0,0 +1,77 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#if defined(__FreeBSD__) + +#include "optick.config.h" +#if USE_OPTICK + +#include "optick_core.platform.h" + +#include +#include +#include +#include + +namespace Optick +{ + const char* Platform::GetName() + { + return "PS4"; + } + + ThreadID Platform::GetThreadID() + { + return (uint64_t)pthread_self(); + } + + ProcessID Platform::GetProcessID() + { + return (ProcessID)getpid(); + } + + int64 Platform::GetFrequency() + { + return 1000000000; + } + + int64 Platform::GetTime() + { + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + return ts.tv_sec * 1000000000LL + ts.tv_nsec; + } + + Trace* Platform::CreateTrace() + { + return nullptr; + } + + SymbolEngine* Platform::CreateSymbolEngine() + { + return nullptr; + } +} + +#endif //USE_OPTICK +#endif //__FreeBSD__ \ No newline at end of file diff --git a/neo/libs/optick/optick_core.h b/neo/libs/optick/optick_core.h new file mode 100644 index 00000000..9066433d --- /dev/null +++ b/neo/libs/optick/optick_core.h @@ -0,0 +1,653 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#include "optick.config.h" + +#if USE_OPTICK + +#include +#include + +#include "optick_common.h" + +#include "optick_memory.h" +#include "optick_message.h" +#include "optick_serialization.h" + +#include "optick_gpu.h" + +#include + +// We expect to have 1k unique strings going through Optick at once +// The chances to hit a collision are 1 in 10 trillion (odds of a meteor landing on your house) +// We should be quite safe here :) +// https://preshing.com/20110504/hash-collision-probabilities/ +// Feel free to add a seed and wait for another strike if armageddon starts +namespace Optick +{ + struct StringHash + { + uint64 hash; + + StringHash(size_t h) : hash(h) {} + StringHash(const char* str) : hash(CalcHash(str)) {} + + bool operator==(const StringHash& other) const { return hash == other.hash; } + bool operator<(const StringHash& other) const { return hash < other.hash; } + + static uint64 CalcHash(const char* str); + }; +} + +// Overriding default hash function to return hash value directly +namespace std +{ + template<> + struct hash + { + size_t operator()(const Optick::StringHash& x) const + { + return (size_t)x.hash; + } + }; +} + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct Trace; +struct SymbolEngine; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct ScopeHeader +{ + EventTime event; + uint32 boardNumber; + int32 threadNumber; + int32 fiberNumber; + FrameType::Type type; + + ScopeHeader(); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator << ( OutputDataStream& stream, const ScopeHeader& ob); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct ScopeData +{ + ScopeHeader header; + vector categories; + vector events; + + ScopeData() + { + ResetHeader(); + } + + void AddEvent(const EventData& data) + { + events.push_back(data); + if (data.description->color != Color::Null) + { + categories.push_back(data); + } + } + + void InitRootEvent(const EventData& data) + { + header.event.start = std::min(data.start, header.event.start); + header.event.finish = std::max(data.finish, header.event.finish); + AddEvent(data); + + header.type = FrameType::NONE; + for (int i = 0; i < FrameType::COUNT; ++i) + if (GetFrameDescription((FrameType::Type)i) == data.description) + header.type = (FrameType::Type)i; + } + + void ResetHeader(); + void Send(); + void Clear(); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(OPTICK_MSVC) +#pragma warning( push ) +#pragma warning( disable : 4996 ) +#endif //OPTICK_MSVC +template +struct OptickString +{ + char data[N]; + OptickString() {} + OptickString& operator=(const char* text) { strncpy(data, text ? text : "null", N - 1); data[N - 1] = 0; return *this; } + OptickString(const char* text) { *this = text; } + OptickString(const char* text, uint16_t length) { uint16_t maxLength = std::min((uint16_t)(N - 1), length); strncpy(data, text ? text : "null", maxLength); data[maxLength] = 0; } +}; +#if defined(OPTICK_MSVC) +#pragma warning( pop ) +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct Point +{ + float x, y, z; + Point() {} + Point(float _x, float _y, float _z) : x(_x), y(_y), z(_z) {} + Point(float pos[3]) : x(pos[0]), y(pos[1]), z(pos[2]) {} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +template +OutputDataStream& operator<<(OutputDataStream &stream, const OptickString& ob) +{ + size_t length = strnlen(ob.data, N); + stream << (uint32)length; + return stream.Write(ob.data, length); +} +OutputDataStream& operator<<(OutputDataStream& stream, const Point& ob); +OutputDataStream& operator<<(OutputDataStream& stream, const ScopeData& ob); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef MemoryPool EventBuffer; +typedef MemoryPool CategoryBuffer; +typedef MemoryPool SynchronizationBuffer; +typedef MemoryPool FiberSyncBuffer; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef OptickString<32> ShortString; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef TagData TagFloat; +typedef TagData TagS32; +typedef TagData TagU32; +typedef TagData TagU64; +typedef TagData TagPoint; +typedef TagData TagString; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef MemoryPool TagFloatBuffer; +typedef MemoryPool TagS32Buffer; +typedef MemoryPool TagU32Buffer; +typedef MemoryPool TagU64Buffer; +typedef MemoryPool TagPointBuffer; +typedef MemoryPool TagStringBuffer; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Base64 +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +string base64_decode(string const& encoded_string); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Board +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef MemoryPool EventDescriptionList; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class EventDescriptionBoard +{ + // List of stored Event Descriptions + EventDescriptionList boardDescriptions; + + // Shared Descriptions + typedef unordered_map DescriptionMap; + DescriptionMap sharedDescriptions; + MemoryBuffer<64 * 1024> sharedNames; + std::mutex sharedLock; + + const char* CacheString(const char* text); +public: + EventDescription* CreateDescription(const char* name, const char* file = nullptr, uint32_t line = 0, uint32_t color = Color::Null, uint32_t filter = 0, uint8_t flags = 0); + EventDescription* CreateSharedDescription(const char* name, const char* file = nullptr, uint32_t line = 0, uint32_t color = Color::Null, uint32_t filter = 0); + + + static EventDescriptionBoard& Get(); + + const EventDescriptionList& GetEvents() const; + + void Shutdown(); + + friend OutputDataStream& operator << (OutputDataStream& stream, const EventDescriptionBoard& ob); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct EventStorage +{ + Mode::Type currentMode; + EventBuffer eventBuffer; + FiberSyncBuffer fiberSyncBuffer; + + TagFloatBuffer tagFloatBuffer; + TagS32Buffer tagS32Buffer; + TagU32Buffer tagU32Buffer; + TagU64Buffer tagU64Buffer; + TagPointBuffer tagPointBuffer; + TagStringBuffer tagStringBuffer; + + struct GPUStorage + { + static const int MAX_GPU_NODES = 2; + array, MAX_GPU_NODES> gpuBuffer; + GPUContext context; + + void Clear(bool preserveMemory); + + EventData* Start(const EventDescription& desc); + void Stop(EventData& data); + }; + GPUStorage gpuStorage; + + uint32 pushPopEventStackIndex; + array pushPopEventStack; + + bool isFiberStorage; + + EventStorage(); + + OPTICK_INLINE EventData& NextEvent() + { + return eventBuffer.Add(); + } + + // Free all temporary memory + void Clear(bool preserveContent) + { + currentMode = Mode::OFF; + eventBuffer.Clear(preserveContent); + fiberSyncBuffer.Clear(preserveContent); + gpuStorage.Clear(preserveContent); + ClearTags(preserveContent); + + while (pushPopEventStackIndex) + { + if (--pushPopEventStackIndex < pushPopEventStack.size()) + pushPopEventStack[pushPopEventStackIndex] = nullptr; + } + } + + void ClearTags(bool preserveContent) + { + tagFloatBuffer.Clear(preserveContent); + tagS32Buffer.Clear(preserveContent); + tagU32Buffer.Clear(preserveContent); + tagU64Buffer.Clear(preserveContent); + tagPointBuffer.Clear(preserveContent); + tagStringBuffer.Clear(preserveContent); + } + + void Reset() + { + Clear(true); + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct ProcessDescription +{ + string name; + ProcessID processID; + uint64 uniqueKey; + ProcessDescription(const char* processName, ProcessID pid, uint64 key); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct ThreadDescription +{ + string name; + ThreadID threadID; + ProcessID processID; + int32 maxDepth; + int32 priority; + uint32 mask; + + bool operator==(const ThreadDescription& other) const { return name == other.name && threadID == other.threadID && processID == other.processID; } + ThreadDescription(const char* threadName, ThreadID tid, ProcessID pid, int32 maxDepth = 1, int32 priority = 0, uint32 mask = 0); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct FiberDescription +{ + uint64 id; + + FiberDescription(uint64 _id) + : id(_id) + {} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct ThreadEntry +{ + ThreadDescription description; + EventStorage storage; + EventStorage** threadTLS; + + bool isAlive; + + ThreadEntry(const ThreadDescription& desc, EventStorage** tls) : description(desc), threadTLS(tls), isAlive(true) {} + // RB: see Fix for crash on stop capture #1 + // https://github.com/ulricheck/optick/pull/1/commits/1e5e1919816a64f235caa0f4b0bf20495225b1fa + ~ThreadEntry() + { + if((*threadTLS)!=nullptr) + { + *threadTLS = nullptr; + } + } + void Activate(Mode::Type mode); + void Sort(); +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct FiberEntry +{ + FiberDescription description; + EventStorage storage; + + FiberEntry(const FiberDescription& desc) : description(desc) {} +}; + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef vector ThreadList; +typedef vector FiberList; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct SysCallData : EventData +{ + uint64 id; + uint64 threadID; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream &operator << (OutputDataStream &stream, const SysCallData &ob); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class SysCallCollector +{ + typedef MemoryPool SysCallPool; +public: + SysCallPool syscallPool; + + SysCallData& Add(); + void Clear(); + + bool Serialize(OutputDataStream& stream); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct CallstackDesc +{ + uint64 threadID; + uint64 timestamp; + uint64* callstack; + uint8 count; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class CallstackCollector +{ + // Packed callstack list: {ThreadID, Timestamp, Count, Callstack[Count]} + typedef MemoryPool CallstacksPool; + CallstacksPool callstacksPool; +public: + void Add(const CallstackDesc& desc); + void Clear(); + + bool SerializeModules(OutputDataStream& stream); + bool SerializeSymbols(OutputDataStream& stream); + bool SerializeCallstacks(OutputDataStream& stream); + + bool IsEmpty() const; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct SwitchContextDesc +{ + int64_t timestamp; + uint64 oldThreadId; + uint64 newThreadId; + uint8 cpuId; + uint8 reason; +}; +////////////////////////////////////////////////////////////////////////// +OutputDataStream &operator << (OutputDataStream &stream, const SwitchContextDesc &ob); +////////////////////////////////////////////////////////////////////////// +class SwitchContextCollector +{ + typedef MemoryPool SwitchContextPool; + SwitchContextPool switchContextPool; +public: + void Add(const SwitchContextDesc& desc); + void Clear(); + bool Serialize(OutputDataStream& stream); +}; +////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct CaptureStatus +{ + enum Type + { + OK = 0, + ERR_TRACER_ALREADY_EXISTS = 1, + ERR_TRACER_ACCESS_DENIED = 2, + ERR_TRACER_FAILED = 3, + ERR_TRACER_INVALID_PASSWORD = 4, + ERR_TRACER_NOT_IMPLEMENTED = 5, + }; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct FrameData : public EventData +{ + uint64_t threadID; + FrameData() : threadID(INVALID_THREAD_ID) {} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef MemoryPool FrameBuffer; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct FrameStorage +{ + const EventDescription* m_Description; + FrameBuffer m_Frames; + std::atomic m_FrameNumber; + + void Clear(bool preserveMemory = true) + { + m_Frames.Clear(preserveMemory); + } + + FrameStorage() : m_Description(nullptr) {} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class Core +{ + std::recursive_mutex coreLock; + std::recursive_mutex threadsLock; + + ThreadList threads; + FiberList fibers; + + int64 progressReportedLastTimestampMS; + + array frames; + uint32 boardNumber; + + CallstackCollector callstackCollector; + SwitchContextCollector switchContextCollector; + + vector> summary; + + struct Attachment + { + string name; + vector data; + File::Type type; + Attachment(File::Type t, const char* n) : name(n), type(t) {} + }; + list attachments; + + StateCallback stateCallback; + + vector processDescs; + vector threadDescs; + + State::Type currentState; + State::Type pendingState; + + CaptureSettings settings; + + uint32 forcedMainThreadIndex; + + void UpdateEvents(); + bool UpdateState(); + + uint32_t BeginUpdateFrame(FrameType::Type frame, int64_t timestamp, uint64_t threadID); + uint32_t EndUpdateFrame(FrameType::Type frame, int64_t timestamp, uint64_t threadID); + + Core(); + ~Core(); + + void DumpCapturingProgress(); + void SendHandshakeResponse(CaptureStatus::Type status); + + + void DumpEvents(EventStorage& entry, const EventTime& timeSlice, ScopeData& scope); + void DumpTags(EventStorage& entry, ScopeData& scope); + void DumpThread(ThreadEntry& entry, const EventTime& timeSlice, ScopeData& scope); + void DumpFiber(FiberEntry& entry, const EventTime& timeSlice, ScopeData& scope); + + void CleanupThreadsAndFibers(); + + void DumpBoard(uint32 mode, EventTime timeSlice); + + void GenerateCommonSummary(); +public: + void Activate(Mode::Type mode); + volatile Mode::Type currentMode; + volatile Mode::Type previousMode; + + // Active Frame (is used as buffer) + static OPTICK_THREAD_LOCAL EventStorage* storage; + + // Resolves symbols + SymbolEngine* symbolEngine; + + // Controls GPU activity + // Graphics graphics; + + // System scheduler trace + Trace* tracer; + + // SysCall Collector + SysCallCollector syscallCollector; + + // GPU Profiler + GPUProfiler* gpuProfiler; + + // Returns thread collection + const vector& GetThreads() const; + + // Request to start a new capture + void StartCapture(); + + // Request to stop an active capture + void StopCapture(); + + // Request to stop an active capture + void CancelCapture(); + + // Requests to dump current capture + void DumpCapture(); + + // Report switch context event + bool ReportSwitchContext(const SwitchContextDesc& desc); + + // Report switch context event + bool ReportStackWalk(const CallstackDesc& desc); + + // Serialize and send current profiling progress + void DumpProgress(const char* message = ""); + void DumpProgressFormatted(const char* format, ...); + + // Too much time from last report + bool IsTimeToReportProgress() const; + + // Serialize and send frames + void DumpFrames(uint32 mode = Mode::DEFAULT); + + // Serialize and send frames + void DumpSummary(); + + // Registers thread and create EventStorage + ThreadEntry* RegisterThread(const ThreadDescription& description, EventStorage** slot); + + // UnRegisters thread + bool UnRegisterThread(ThreadID threadId, bool keepAlive = false); + + // Check is registered thread + bool IsRegistredThread(ThreadID id); + + // Registers finer and create EventStorage + bool RegisterFiber(const FiberDescription& description, EventStorage** slot); + + // Registers ProcessDescription + bool RegisterProcessDescription(const ProcessDescription& description); + + // Registers ThreaDescription (used for threads from other processes) + bool RegisterThreadDescription(const ThreadDescription& description); + + // Sets state change callback + bool SetStateChangedCallback(StateCallback cb); + + // Attaches a key-value pair to the next capture + bool AttachSummary(const char* key, const char* value); + + // Attaches a screenshot to the current capture + bool AttachFile(File::Type type, const char* name, const uint8_t* data, uint32_t size); + bool AttachFile(File::Type type, const char* name, std::istream& stream); + bool AttachFile(File::Type type, const char* name, const char* path); + bool AttachFile(File::Type type, const char* name, const wchar_t* path); + + // Initalizes GPU profiler + void InitGPUProfiler(GPUProfiler* profiler); + + // Initializes root password for the device + bool SetSettings(const CaptureSettings& settings); + + // Current Frame Number (since the game started) + uint32_t GetCurrentFrame(FrameType::Type frameType) const { return frames[frameType].m_FrameNumber; } + + // Returns Frame Description + const EventDescription* GetFrameDescription(FrameType::Type frame) const; + + // Main Update Function + void Update(); + + // Full Destruction + void Shutdown(); + + // Frame Flip functions + static uint32_t BeginFrame(FrameType::Type frame, int64_t timestamp, uint64_t threadID) { return Get().BeginUpdateFrame(frame, timestamp, threadID); } + static uint32_t EndFrame(FrameType::Type frame, int64_t timestamp, uint64_t threadID) { return Get().EndUpdateFrame(frame, timestamp, threadID); } + + // Initialize Main ThreadID + void SetMainThreadID(uint64_t threadID); + + // NOT Thread Safe singleton (performance) + static Core& Get(); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + +#endif //USE_OPTICK \ No newline at end of file diff --git a/neo/libs/optick/optick_core.linux.h b/neo/libs/optick/optick_core.linux.h new file mode 100644 index 00000000..87c8781e --- /dev/null +++ b/neo/libs/optick/optick_core.linux.h @@ -0,0 +1,446 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#if defined(__linux__) + +#include "optick.config.h" +#if USE_OPTICK + +#include "optick_core.platform.h" + +#include +#include +#include +#include +#include + +namespace Optick +{ + const char* Platform::GetName() + { +#if defined(__ANDROID__) + return "Android"; +#else + return "Linux"; +#endif + } + + ThreadID Platform::GetThreadID() + { + return syscall(SYS_gettid); + } + + ProcessID Platform::GetProcessID() + { + return (ProcessID)getpid(); + } + + int64 Platform::GetFrequency() + { + return 1000000000; + } + + int64 Platform::GetTime() + { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * 1000000000LL + ts.tv_nsec; + } +} + +#if OPTICK_ENABLE_TRACING + +#include "optick_memory.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +namespace ft +{ + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct base_event + { + int64_t timestamp; + short common_type; + uint8_t cpu_id; + base_event(short type) : timestamp(-1), common_type(type), cpu_id(uint8_t(-1)) {} +}; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + template + struct event : public base_event + { + static const short type = TYPE; + event() : base_event(TYPE) {} + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct process_state + { + enum type + { + Unknown, + //D Uninterruptible sleep(usually IO) + UninterruptibleSleep, + //R Running or runnable(on run queue) + Running, + //S Interruptible sleep(waiting for an event to complete) + InterruptibleSleep, + //T Stopped, either by a job control signal or because it is being traced. + Stopped, + //X dead(should never be seen) + Dead, + //Z Defunct(“zombie”) process, terminated but not reaped by its parent. + Zombie, + }; + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct sched_switch : public event<305> + { + char prev_comm[16]; + pid_t prev_pid; + int prev_prio; + process_state::type prev_state; + char next_comm[16]; + pid_t next_pid; + int next_prio; + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} // namespace ft +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +static const char* KERNEL_TRACING_PATH = "/sys/kernel/debug/tracing"; +static const char* FTRACE_TRACE = "trace"; +static const char* FTRACE_TRACING_ON = "tracing_on"; +static const char* FTRACE_TRACE_CLOCK = "trace_clock"; +static const char* FTRACE_OPTIONS_IRQ_INFO = "options/irq-info"; +static const char* FTRACE_SCHED_SWITCH = "events/sched/sched_switch/enable"; +static const uint8_t PROCESS_STATE_REASON_START = 38; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class FTrace : public Trace +{ + bool isActive; + string password; + unordered_set pidCache; + + bool Parse(const char* line); + bool ProcessEvent(const ft::base_event& ev); + + bool Set(const char* name, bool value); + bool Set(const char* name, const char* value); + bool Exec(const char* cmd); +public: + + FTrace(); + ~FTrace(); + + virtual void SetPassword(const char* pwd) override { password = pwd; } + virtual CaptureStatus::Type Start(Mode::Type mode, int frequency, const ThreadList& threads) override; + virtual bool Stop() override; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct Parser +{ + const char* cursor; + const char* finish; + size_t length; + + Parser(const char* b) : cursor(b), finish(b + strlen(b)) {} + + bool Skip(size_t count) + { + if ((size_t)(finish - cursor) > count) + { + cursor += count; + return true; + } + return false; + } + + bool Skip(const char* text, char* output = nullptr, size_t size = 0) + { + if (const char* ptr = strstr(cursor, text)) + { + if (output != nullptr) + { + size_t count = std::min(size - 1, (size_t)(ptr - cursor)); + strncpy(output, cursor, count); + output[count] = '\0'; + } + cursor = ptr + strlen(text); + return true; + } + return false; + } + + void SkipSpaces() + { + while (cursor != finish && (*cursor == ' ' || *cursor == '\t' || *cursor == '\n')) + ++cursor; + } + + bool Starts(const char* text) const + { + return strncmp(cursor, text, strlen(text)) == 0; + } + + int GetInt() const + { + return atoi(cursor); + } + + char GetChar() const + { + return *cursor; + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +CaptureStatus::Type FTrace::Start(Mode::Type mode, int /*frequency*/, const ThreadList& /*threads*/) +{ + if (!isActive) + { + // Disable tracing + if (!Set(FTRACE_TRACING_ON, false)) + return CaptureStatus::ERR_TRACER_INVALID_PASSWORD; + + // Cleanup old data + Set(FTRACE_TRACE, ""); + // Set clock type + Set(FTRACE_TRACE_CLOCK, "mono"); + // Disable irq info + Set(FTRACE_OPTIONS_IRQ_INFO, false); + // Enable switch events + Set(FTRACE_SCHED_SWITCH, (mode & Mode::SWITCH_CONTEXT) != 0); + + // Enable tracing + Set(FTRACE_TRACING_ON, true); + + isActive = true; + } + + return CaptureStatus::OK; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool FTrace::Stop() +{ + if (!isActive) + { + return false; + } + + // Reset variables + Set(FTRACE_TRACING_ON, false); + Set(FTRACE_SCHED_SWITCH, false); + + // Parsing the output + char buffer[256] = { 0 }; + sprintf_s(buffer, "echo \'%s\' | sudo -S sh -c \'cat %s/%s\'", password.c_str(), KERNEL_TRACING_PATH, FTRACE_TRACE); + if (FILE* pipe = popen(buffer, "r")) + { + char* line = NULL; + size_t len = 0; + while ((getline(&line, &len, pipe)) != -1) + Parse(line); + pclose(pipe); + } + + // Cleanup data + Set(FTRACE_TRACE, ""); + + pidCache.clear(); + + isActive = false; + + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool FTrace::Parse(const char * line) +{ + // sched_switch: + // ConsoleApp-8687 [000] 181944.352057: sched_switch: prev_comm=ConsoleApp prev_pid=8687 prev_prio=120 prev_state=S ==> next_comm=ConsoleApp next_pid=8686 next_prio=120 + + Parser p(line); + if (p.Starts("#")) + return true; + + if (!p.Skip(16)) + return false; + + if (!p.Skip("[")) + return false; + + int cpu = p.GetInt(); + if (!p.Skip("]")) + return false; + + int64 timestampInt = p.GetInt(); + if (!p.Skip(".")) + return false; + + int64 timestampFraq = p.GetInt(); + if (!p.Skip(": ")) + return false; + + int64 timestamp = ((timestampInt * 1000000) + timestampFraq) * 1000; + + if (p.Starts("sched_switch:")) + { + ft::sched_switch ev; + ev.cpu_id = cpu; + ev.timestamp = timestamp; + + if (!p.Skip("prev_comm=")) + return false; + + if (!p.Skip(" prev_pid=", ev.prev_comm, OPTICK_ARRAY_SIZE(ev.prev_comm))) + return false; + + ev.prev_pid = p.GetInt(); + + if (!p.Skip(" prev_prio=")) + return false; + + ev.prev_prio = p.GetInt(); + + if (!p.Skip(" prev_state=")) + return false; + + switch (p.GetChar()) + { + case 'D': + ev.prev_state = ft::process_state::UninterruptibleSleep; + break; + + case 'R': + ev.prev_state = ft::process_state::Running; + break; + + case 'S': + ev.prev_state = ft::process_state::InterruptibleSleep; + break; + + case 'T': + ev.prev_state = ft::process_state::Stopped; + break; + + case 'X': + ev.prev_state = ft::process_state::Dead; + break; + + case 'Z': + ev.prev_state = ft::process_state::Zombie; + break; + + default: + ev.prev_state = ft::process_state::Unknown; + break; + } + + if (!p.Skip("==> next_comm=")) + return false; + + if (!p.Skip(" next_pid=", ev.next_comm, OPTICK_ARRAY_SIZE(ev.prev_comm))) + return false; + + ev.next_pid = p.GetInt(); + + if (!p.Skip(" next_prio=")) + return false; + + ev.next_prio = p.GetInt(); + + return ProcessEvent(ev); + } + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool FTrace::ProcessEvent(const ft::base_event& ev) +{ + switch (ev.common_type) + { + case ft::sched_switch::type: + { + const ft::sched_switch& switchEv = (const ft::sched_switch&)ev; + SwitchContextDesc desc; + desc.reason = switchEv.prev_state + PROCESS_STATE_REASON_START; + desc.cpuId = switchEv.cpu_id; + desc.oldThreadId = (uint64)switchEv.prev_pid; + desc.newThreadId = (uint64)switchEv.next_pid; + desc.timestamp = switchEv.timestamp; + Core::Get().ReportSwitchContext(desc); + + if (pidCache.find(switchEv.next_pid) == pidCache.end()) + { + pidCache.insert(switchEv.next_pid); + Core::Get().RegisterThreadDescription(ThreadDescription(switchEv.next_comm, (ThreadID)switchEv.next_pid, (ProcessID)switchEv.next_pid, switchEv.next_prio)); + } + + return true; + } + break; + } + + return false; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool FTrace::Set(const char * name, bool value) +{ + return Set(name, value ? "1" : "0"); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool FTrace::Set(const char* name, const char* value) +{ + char buffer[256] = { 0 }; + sprintf_s(buffer, "echo %s > %s/%s", value, KERNEL_TRACING_PATH, name); + return Exec(buffer); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool FTrace::Exec(const char* cmd) +{ + char buffer[256] = { 0 }; + sprintf_s(buffer, "echo \'%s\' | sudo -S sh -c \'%s\' 2> /dev/null", password.c_str(), cmd); + return std::system(buffer) == 0; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +FTrace::FTrace() : isActive(false) +{ +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +FTrace::~FTrace() +{ + Stop(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Trace* Platform::CreateTrace() +{ + return Memory::New(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +SymbolEngine* Platform::CreateSymbolEngine() +{ + return nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} +#endif //OPTICK_ENABLE_TRACING +#endif //USE_OPTICK +#endif //__linux__ diff --git a/neo/libs/optick/optick_core.macos.h b/neo/libs/optick/optick_core.macos.h new file mode 100644 index 00000000..e4b53f8a --- /dev/null +++ b/neo/libs/optick/optick_core.macos.h @@ -0,0 +1,309 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#if defined(__APPLE_CC__) + +#include "optick.config.h" +#if USE_OPTICK + +#include "optick_core.platform.h" + +#include +#include +#include +#include +#include + +namespace Optick +{ + const char* Platform::GetName() + { + return "MacOS"; + } + + ThreadID Platform::GetThreadID() + { + uint64_t tid; + pthread_threadid_np(pthread_self(), &tid); + return tid; + } + + ProcessID Platform::GetProcessID() + { + return (ProcessID)getpid(); + } + + int64 Platform::GetFrequency() + { + return 1000000000; + } + + int64 Platform::GetTime() + { + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + return ts.tv_sec * 1000000000LL + ts.tv_nsec; + } +} + +#if OPTICK_ENABLE_TRACING + +#include "optick_core.h" + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class DTrace : public Trace +{ + static const bool isSilent = true; + + std::thread processThread; + string password; + + enum State + { + STATE_IDLE, + STATE_RUNNING, + STATE_ABORT, + }; + + volatile State state; + volatile int64 timeout; + + struct CoreState + { + ProcessID pid; + ThreadID tid; + int prio; + bool IsValid() const { return tid != INVALID_THREAD_ID; } + CoreState() : pid(INVALID_PROCESS_ID), tid(INVALID_THREAD_ID), prio(0) {} + }; + static const int MAX_CPU_CORES = 256; + array cores; + + static void AsyncProcess(DTrace* trace); + void Process(); + + bool CheckRootAccess(); + + enum ParseResult + { + PARSE_OK, + PARSE_TIMEOUT, + PARSE_FAILED, + }; + ParseResult Parse(const char* line); +public: + + DTrace(); + + virtual void SetPassword(const char* pwd) override { password = pwd; } + virtual CaptureStatus::Type Start(Mode::Type mode, int frequency, const ThreadList& threads) override; + virtual bool Stop() override; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +DTrace::DTrace() : state(STATE_IDLE), timeout(0) +{ +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool DTrace::CheckRootAccess() +{ + char cmd[256] = { 0 }; + sprintf_s(cmd, "echo \'%s\' | sudo -S echo %s", password.c_str(), isSilent ? "2> /dev/null" : ""); + return system(cmd) == 0; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +CaptureStatus::Type DTrace::Start(Mode::Type mode, int /*frequency*/, const ThreadList& /*threads*/) +{ + if (state == STATE_IDLE && (mode & Mode::SWITCH_CONTEXT) != 0) + { + if (!CheckRootAccess()) + return CaptureStatus::ERR_TRACER_INVALID_PASSWORD; + + state = STATE_RUNNING; + timeout = INT64_MAX; + cores.fill(CoreState()); + processThread = std::thread(AsyncProcess, this); + } + + return CaptureStatus::OK; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool DTrace::Stop() +{ + if (state != STATE_RUNNING) + { + return false; + } + + timeout = Platform::GetTime(); + processThread.join(); + state = STATE_IDLE; + + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +FILE* popen2(const char *program, const char *type, pid_t* outPid) +{ + FILE *iop; + int pdes[2]; + pid_t pid; + if ((*type != 'r' && *type != 'w') || type[1] != '\0') { + errno = EINVAL; + return (NULL); + } + + if (pipe(pdes) < 0) { + return (NULL); + } + + switch (pid = fork()) { + case -1: /* Error. */ + (void)close(pdes[0]); + (void)close(pdes[1]); + return (NULL); + /* NOTREACHED */ + case 0: /* Child. */ + { + if (*type == 'r') { + (void)close(pdes[0]); + if (pdes[1] != STDOUT_FILENO) { + (void)dup2(pdes[1], STDOUT_FILENO); + (void)close(pdes[1]); + } + } + else { + (void)close(pdes[1]); + if (pdes[0] != STDIN_FILENO) { + (void)dup2(pdes[0], STDIN_FILENO); + (void)close(pdes[0]); + } + } + execl("/bin/sh", "sh", "-c", program, NULL); + perror("execl"); + exit(1); + /* NOTREACHED */ + } + } + /* Parent; assume fdopen can't fail. */ + if (*type == 'r') { + iop = fdopen(pdes[0], type); + (void)close(pdes[1]); + } + else { + iop = fdopen(pdes[1], type); + (void)close(pdes[0]); + } + + if (outPid) + *outPid = pid; + + return (iop); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void DTrace::Process() +{ + const char* command = "dtrace -n fbt::thread_dispatch:return'\\''{printf(\"@%d %d %d %d\", pid, tid, curthread->sched_pri, walltimestamp)}'\\''"; + + char buffer[256] = { 0 }; + sprintf_s(buffer, "echo \'%s\' | sudo -S sh -c \'%s\' %s", password.c_str(), command, isSilent ? "2> /dev/null" : ""); + pid_t pid; + if (FILE* pipe = popen2(buffer, "r", &pid)) + { + char* line = NULL; + size_t len = 0; + while (state == STATE_RUNNING && (getline(&line, &len, pipe)) != -1) + { + if (Parse(line) == PARSE_TIMEOUT) + break; + } + fclose(pipe); + + int internal_stat; + waitpid(pid, &internal_stat, 0); + } + else + { + OPTICK_FAILED("Failed to open communication pipe!"); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +DTrace::ParseResult DTrace::Parse(const char* line) +{ + if (const char* cmd = strchr(line, '@')) + { + int cpu = atoi(line); + + CoreState currState; + + currState.pid = atoi(cmd + 1); + cmd = strchr(cmd, ' ') + 1; + + currState.tid = atoi(cmd); + cmd = strchr(cmd, ' ') + 1; + + currState.prio = atoi(cmd); + cmd = strchr(cmd, ' ') + 1; + + int64_t timestamp = (int64_t)atoll(cmd); + + if (timestamp > timeout) + return PARSE_TIMEOUT; + + const CoreState& prevState = cores[cpu]; + + if (prevState.IsValid()) + { + SwitchContextDesc desc; + desc.reason = 0; + desc.cpuId = cpu; + desc.oldThreadId = prevState.tid; + desc.newThreadId = currState.tid; + desc.timestamp = timestamp; + Core::Get().ReportSwitchContext(desc); + } + + cores[cpu] = currState; + } + return PARSE_FAILED; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void DTrace::AsyncProcess(DTrace *trace) { + trace->Process(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Trace* Platform::CreateTrace() +{ + return Memory::New(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +SymbolEngine* Platform::CreateSymbolEngine() +{ + return nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} +#endif //OPTICK_ENABLE_TRACING +#endif //USE_OPTICK +#endif //__APPLE_CC__ \ No newline at end of file diff --git a/neo/libs/optick/optick_core.platform.h b/neo/libs/optick/optick_core.platform.h new file mode 100644 index 00000000..4c2a1e51 --- /dev/null +++ b/neo/libs/optick/optick_core.platform.h @@ -0,0 +1,114 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#include "optick.config.h" + +#if USE_OPTICK + +#include "optick_common.h" +#include "optick_memory.h" + +////////////////////////////////////////////////////////////////////////// +// Platform-specific stuff +////////////////////////////////////////////////////////////////////////// +namespace Optick +{ + struct Trace; + struct Module; + struct Symbol; + struct SymbolEngine; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Platform API + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct Platform + { + // Platform Name + static OPTICK_INLINE const char* GetName(); + // Thread ID (system thread id) + static OPTICK_INLINE ThreadID GetThreadID(); + // Process ID + static OPTICK_INLINE ProcessID GetProcessID(); + // CPU Frequency + static OPTICK_INLINE int64 GetFrequency(); + // CPU Time (Ticks) + static OPTICK_INLINE int64 GetTime(); + // System Tracer + static OPTICK_INLINE Trace* CreateTrace(); + // Symbol Resolver + static OPTICK_INLINE SymbolEngine* CreateSymbolEngine(); + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Tracing API + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct Trace + { + virtual void SetPassword(const char* /*pwd*/) {}; + virtual CaptureStatus::Type Start(Mode::Type mode, int frequency, const ThreadList& threads) = 0; + virtual bool Stop() = 0; + virtual ~Trace() {}; + }; + + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Symbol API + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct Module + { + string path; + void* address; + size_t size; + Module(const char* p, void* a, size_t s) : path(p), address(a), size(s) {} + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct Symbol + { + uint64 address; + uint64 offset; + wstring file; + wstring function; + uint32 line; + Symbol() + : address(0) + , offset(0) + , line(0) + {} + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct SymbolEngine + { + // Get list of loaded modules + virtual const vector& GetModules() = 0; + + // Get Symbol from address + virtual const Symbol* GetSymbol(uint64 dwAddress) = 0; + + virtual ~SymbolEngine() {}; + }; +} +////////////////////////////////////////////////////////////////////////// + +#endif //USE_OPTICK \ No newline at end of file diff --git a/neo/libs/optick/optick_core.win.h b/neo/libs/optick/optick_core.win.h new file mode 100644 index 00000000..07f0b370 --- /dev/null +++ b/neo/libs/optick/optick_core.win.h @@ -0,0 +1,1737 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#if defined(_MSC_VER) + +#include "optick.config.h" + +#if USE_OPTICK + +#include "optick_core.platform.h" + +namespace Optick +{ + const char* Platform::GetName() + { + #if OPTICK_PC + return "Windows"; + #else + return "XBox"; + #endif + } + + ThreadID Platform::GetThreadID() + { + return GetCurrentThreadId(); + } + + ProcessID Platform::GetProcessID() + { + return GetCurrentProcessId(); + } + + int64 Platform::GetFrequency() + { + LARGE_INTEGER frequency; + QueryPerformanceFrequency(&frequency); + return frequency.QuadPart; + } + + int64 Platform::GetTime() + { + LARGE_INTEGER largeInteger; + QueryPerformanceCounter(&largeInteger); + return largeInteger.QuadPart; + } +} + +#if OPTICK_ENABLE_TRACING +#include +#include "optick_core.h" + +/* +Event Tracing Functions - API +https://msdn.microsoft.com/en-us/library/windows/desktop/aa363795(v=vs.85).aspx +*/ + +#define DECLARE_ETW (!OPTICK_PC) + +#if DECLARE_ETW +// Copied from Windows SDK +#ifndef WMIAPI +#ifndef MIDL_PASS +#ifdef _WMI_SOURCE_ +#define WMIAPI __stdcall +#else +#define WMIAPI DECLSPEC_IMPORT __stdcall +#endif // _WMI_SOURCE +#endif // MIDL_PASS +#endif // WMIAPI +#define INITGUID +#include +#if defined(_NTDDK_) || defined(_NTIFS_) || defined(_WMIKM_) +#define _EVNTRACE_KERNEL_MODE +#endif +#if !defined(_EVNTRACE_KERNEL_MODE) +#include +#endif + +#if _MSC_VER <= 1600 +#define EVENT_DESCRIPTOR_DEF +#define EVENT_HEADER_DEF +#define EVENT_HEADER_EXTENDED_DATA_ITEM_DEF +#define EVENT_RECORD_DEF +#endif + +#ifndef _TRACEHANDLE_DEFINED +#define _TRACEHANDLE_DEFINED +typedef ULONG64 TRACEHANDLE, *PTRACEHANDLE; +#endif + +// +// EventTraceGuid is used to identify a event tracing session +// +DEFINE_GUID( /* 68fdd900-4a3e-11d1-84f4-0000f80464e3 */ + EventTraceGuid, + 0x68fdd900, + 0x4a3e, + 0x11d1, + 0x84, 0xf4, 0x00, 0x00, 0xf8, 0x04, 0x64, 0xe3 +); + +// +// SystemTraceControlGuid. Used to specify event tracing for kernel +// +DEFINE_GUID( /* 9e814aad-3204-11d2-9a82-006008a86939 */ + SystemTraceControlGuid, + 0x9e814aad, + 0x3204, + 0x11d2, + 0x9a, 0x82, 0x00, 0x60, 0x08, 0xa8, 0x69, 0x39 +); + +// +// EventTraceConfigGuid. Used to report system configuration records +// +DEFINE_GUID( /* 01853a65-418f-4f36-aefc-dc0f1d2fd235 */ + EventTraceConfigGuid, + 0x01853a65, + 0x418f, + 0x4f36, + 0xae, 0xfc, 0xdc, 0x0f, 0x1d, 0x2f, 0xd2, 0x35 +); + +// +// DefaultTraceSecurityGuid. Specifies the default event tracing security +// +DEFINE_GUID( /* 0811c1af-7a07-4a06-82ed-869455cdf713 */ + DefaultTraceSecurityGuid, + 0x0811c1af, + 0x7a07, + 0x4a06, + 0x82, 0xed, 0x86, 0x94, 0x55, 0xcd, 0xf7, 0x13 +); + + +/////////////////////////////////////////////////////////////////////////////// +#define PROCESS_TRACE_MODE_REAL_TIME 0x00000100 +#define PROCESS_TRACE_MODE_RAW_TIMESTAMP 0x00001000 +#define PROCESS_TRACE_MODE_EVENT_RECORD 0x10000000 +/////////////////////////////////////////////////////////////////////////////// +#define EVENT_HEADER_FLAG_EXTENDED_INFO 0x0001 +#define EVENT_HEADER_FLAG_PRIVATE_SESSION 0x0002 +#define EVENT_HEADER_FLAG_STRING_ONLY 0x0004 +#define EVENT_HEADER_FLAG_TRACE_MESSAGE 0x0008 +#define EVENT_HEADER_FLAG_NO_CPUTIME 0x0010 +#define EVENT_HEADER_FLAG_32_BIT_HEADER 0x0020 +#define EVENT_HEADER_FLAG_64_BIT_HEADER 0x0040 +#define EVENT_HEADER_FLAG_CLASSIC_HEADER 0x0100 +#define EVENT_HEADER_FLAG_PROCESSOR_INDEX 0x0200 +/////////////////////////////////////////////////////////////////////////////// +#define KERNEL_LOGGER_NAMEW L"NT Kernel Logger" +/////////////////////////////////////////////////////////////////////////////// +#define EVENT_TRACE_REAL_TIME_MODE 0x00000100 // Real time mode on +/////////////////////////////////////////////////////////////////////////////// +#define EVENT_TRACE_CONTROL_STOP 1 +/////////////////////////////////////////////////////////////////////////////// + +// +// Enable flags for Kernel Events +// +#define EVENT_TRACE_FLAG_PROCESS 0x00000001 // process start & end +#define EVENT_TRACE_FLAG_THREAD 0x00000002 // thread start & end +#define EVENT_TRACE_FLAG_IMAGE_LOAD 0x00000004 // image load + +#define EVENT_TRACE_FLAG_DISK_IO 0x00000100 // physical disk IO +#define EVENT_TRACE_FLAG_DISK_FILE_IO 0x00000200 // requires disk IO + +#define EVENT_TRACE_FLAG_MEMORY_PAGE_FAULTS 0x00001000 // all page faults +#define EVENT_TRACE_FLAG_MEMORY_HARD_FAULTS 0x00002000 // hard faults only + +#define EVENT_TRACE_FLAG_NETWORK_TCPIP 0x00010000 // tcpip send & receive + +#define EVENT_TRACE_FLAG_REGISTRY 0x00020000 // registry calls +#define EVENT_TRACE_FLAG_DBGPRINT 0x00040000 // DbgPrint(ex) Calls + +// +// Enable flags for Kernel Events on Vista and above +// +#define EVENT_TRACE_FLAG_PROCESS_COUNTERS 0x00000008 // process perf counters +#define EVENT_TRACE_FLAG_CSWITCH 0x00000010 // context switches +#define EVENT_TRACE_FLAG_DPC 0x00000020 // deffered procedure calls +#define EVENT_TRACE_FLAG_INTERRUPT 0x00000040 // interrupts +#define EVENT_TRACE_FLAG_SYSTEMCALL 0x00000080 // system calls + +#define EVENT_TRACE_FLAG_DISK_IO_INIT 0x00000400 // physical disk IO initiation +#define EVENT_TRACE_FLAG_ALPC 0x00100000 // ALPC traces +#define EVENT_TRACE_FLAG_SPLIT_IO 0x00200000 // split io traces (VolumeManager) + +#define EVENT_TRACE_FLAG_DRIVER 0x00800000 // driver delays +#define EVENT_TRACE_FLAG_PROFILE 0x01000000 // sample based profiling +#define EVENT_TRACE_FLAG_FILE_IO 0x02000000 // file IO +#define EVENT_TRACE_FLAG_FILE_IO_INIT 0x04000000 // file IO initiation + +#define EVENT_TRACE_FLAG_PMC_PROFILE 0x80000000 // sample based profiling (PMC) - NOT CONFIRMED! + +// +// Enable flags for Kernel Events on Win7 and above +// +#define EVENT_TRACE_FLAG_DISPATCHER 0x00000800 // scheduler (ReadyThread) +#define EVENT_TRACE_FLAG_VIRTUAL_ALLOC 0x00004000 // VM operations + +// +// Enable flags for Kernel Events on Win8 and above +// +#define EVENT_TRACE_FLAG_VAMAP 0x00008000 // map/unmap (excluding images) +#define EVENT_TRACE_FLAG_NO_SYSCONFIG 0x10000000 // Do not do sys config rundown + +/////////////////////////////////////////////////////////////////////////////// + +#pragma warning(push) +#pragma warning (disable:4201) + +#ifndef EVENT_DESCRIPTOR_DEF +#define EVENT_DESCRIPTOR_DEF +typedef struct _EVENT_DESCRIPTOR { + + USHORT Id; + UCHAR Version; + UCHAR Channel; + UCHAR Level; + UCHAR Opcode; + USHORT Task; + ULONGLONG Keyword; + +} EVENT_DESCRIPTOR, *PEVENT_DESCRIPTOR; +typedef const EVENT_DESCRIPTOR *PCEVENT_DESCRIPTOR; +#endif +/////////////////////////////////////////////////////////////////////////////// +#ifndef EVENT_HEADER_DEF +#define EVENT_HEADER_DEF +typedef struct _EVENT_HEADER { + + USHORT Size; + USHORT HeaderType; + USHORT Flags; + USHORT EventProperty; + ULONG ThreadId; + ULONG ProcessId; + LARGE_INTEGER TimeStamp; + GUID ProviderId; + EVENT_DESCRIPTOR EventDescriptor; + union { + struct { + ULONG KernelTime; + ULONG UserTime; + } DUMMYSTRUCTNAME; + ULONG64 ProcessorTime; + + } DUMMYUNIONNAME; + GUID ActivityId; + +} EVENT_HEADER, *PEVENT_HEADER; +#endif +/////////////////////////////////////////////////////////////////////////////// +#ifndef EVENT_HEADER_EXTENDED_DATA_ITEM_DEF +#define EVENT_HEADER_EXTENDED_DATA_ITEM_DEF +typedef struct _EVENT_HEADER_EXTENDED_DATA_ITEM { + + USHORT Reserved1; // Reserved for internal use + USHORT ExtType; // Extended info type + struct { + USHORT Linkage : 1; // Indicates additional extended + // data item + USHORT Reserved2 : 15; + }; + USHORT DataSize; // Size of extended info data + ULONGLONG DataPtr; // Pointer to extended info data + +} EVENT_HEADER_EXTENDED_DATA_ITEM, *PEVENT_HEADER_EXTENDED_DATA_ITEM; +#endif +/////////////////////////////////////////////////////////////////////////////// +#ifndef ETW_BUFFER_CONTEXT_DEF +#define ETW_BUFFER_CONTEXT_DEF +typedef struct _ETW_BUFFER_CONTEXT { + union { + struct { + UCHAR ProcessorNumber; + UCHAR Alignment; + } DUMMYSTRUCTNAME; + USHORT ProcessorIndex; + } DUMMYUNIONNAME; + USHORT LoggerId; +} ETW_BUFFER_CONTEXT, *PETW_BUFFER_CONTEXT; +#endif +/////////////////////////////////////////////////////////////////////////////// +#ifndef EVENT_RECORD_DEF +#define EVENT_RECORD_DEF +typedef struct _EVENT_RECORD { + EVENT_HEADER EventHeader; + ETW_BUFFER_CONTEXT BufferContext; + USHORT ExtendedDataCount; + + USHORT UserDataLength; + PEVENT_HEADER_EXTENDED_DATA_ITEM ExtendedData; + PVOID UserData; + PVOID UserContext; +} EVENT_RECORD, *PEVENT_RECORD; +#endif +/////////////////////////////////////////////////////////////////////////////// +typedef struct _EVENT_TRACE_PROPERTIES { + WNODE_HEADER Wnode; + // + // data provided by caller + ULONG BufferSize; // buffer size for logging (kbytes) + ULONG MinimumBuffers; // minimum to preallocate + ULONG MaximumBuffers; // maximum buffers allowed + ULONG MaximumFileSize; // maximum logfile size (in MBytes) + ULONG LogFileMode; // sequential, circular + ULONG FlushTimer; // buffer flush timer, in seconds + ULONG EnableFlags; // trace enable flags + union { + LONG AgeLimit; // unused + LONG FlushThreshold; // Number of buffers to fill before flushing + } DUMMYUNIONNAME; + + // data returned to caller + ULONG NumberOfBuffers; // no of buffers in use + ULONG FreeBuffers; // no of buffers free + ULONG EventsLost; // event records lost + ULONG BuffersWritten; // no of buffers written to file + ULONG LogBuffersLost; // no of logfile write failures + ULONG RealTimeBuffersLost; // no of rt delivery failures + HANDLE LoggerThreadId; // thread id of Logger + ULONG LogFileNameOffset; // Offset to LogFileName + ULONG LoggerNameOffset; // Offset to LoggerName +} EVENT_TRACE_PROPERTIES, *PEVENT_TRACE_PROPERTIES; + +typedef struct _EVENT_TRACE_HEADER { // overlays WNODE_HEADER + USHORT Size; // Size of entire record + union { + USHORT FieldTypeFlags; // Indicates valid fields + struct { + UCHAR HeaderType; // Header type - internal use only + UCHAR MarkerFlags; // Marker - internal use only + } DUMMYSTRUCTNAME; + } DUMMYUNIONNAME; + union { + ULONG Version; + struct { + UCHAR Type; // event type + UCHAR Level; // trace instrumentation level + USHORT Version; // version of trace record + } Class; + } DUMMYUNIONNAME2; + ULONG ThreadId; // Thread Id + ULONG ProcessId; // Process Id + LARGE_INTEGER TimeStamp; // time when event happens + union { + GUID Guid; // Guid that identifies event + ULONGLONG GuidPtr; // use with WNODE_FLAG_USE_GUID_PTR + } DUMMYUNIONNAME3; + union { + struct { + ULONG KernelTime; // Kernel Mode CPU ticks + ULONG UserTime; // User mode CPU ticks + } DUMMYSTRUCTNAME; + ULONG64 ProcessorTime; // Processor Clock + struct { + ULONG ClientContext; // Reserved + ULONG Flags; // Event Flags + } DUMMYSTRUCTNAME2; + } DUMMYUNIONNAME4; +} EVENT_TRACE_HEADER, *PEVENT_TRACE_HEADER; + +typedef struct _EVENT_TRACE { + EVENT_TRACE_HEADER Header; // Event trace header + ULONG InstanceId; // Instance Id of this event + ULONG ParentInstanceId; // Parent Instance Id. + GUID ParentGuid; // Parent Guid; + PVOID MofData; // Pointer to Variable Data + ULONG MofLength; // Variable Datablock Length + union { + ULONG ClientContext; + ETW_BUFFER_CONTEXT BufferContext; + } DUMMYUNIONNAME; +} EVENT_TRACE, *PEVENT_TRACE; + +typedef struct _TRACE_LOGFILE_HEADER { + ULONG BufferSize; // Logger buffer size in Kbytes + union { + ULONG Version; // Logger version + struct { + UCHAR MajorVersion; + UCHAR MinorVersion; + UCHAR SubVersion; + UCHAR SubMinorVersion; + } VersionDetail; + } DUMMYUNIONNAME; + ULONG ProviderVersion; // defaults to NT version + ULONG NumberOfProcessors; // Number of Processors + LARGE_INTEGER EndTime; // Time when logger stops + ULONG TimerResolution; // assumes timer is constant!!! + ULONG MaximumFileSize; // Maximum in Mbytes + ULONG LogFileMode; // specify logfile mode + ULONG BuffersWritten; // used to file start of Circular File + union { + GUID LogInstanceGuid; // For RealTime Buffer Delivery + struct { + ULONG StartBuffers; // Count of buffers written at start. + ULONG PointerSize; // Size of pointer type in bits + ULONG EventsLost; // Events losts during log session + ULONG CpuSpeedInMHz; // Cpu Speed in MHz + } DUMMYSTRUCTNAME; + } DUMMYUNIONNAME2; +#if defined(_WMIKM_) + PWCHAR LoggerName; + PWCHAR LogFileName; + RTL_TIME_ZONE_INFORMATION TimeZone; +#else + LPWSTR LoggerName; + LPWSTR LogFileName; + TIME_ZONE_INFORMATION TimeZone; +#endif + LARGE_INTEGER BootTime; + LARGE_INTEGER PerfFreq; // Reserved + LARGE_INTEGER StartTime; // Reserved + ULONG ReservedFlags; // ClockType + ULONG BuffersLost; +} TRACE_LOGFILE_HEADER, *PTRACE_LOGFILE_HEADER; + +typedef enum _TRACE_QUERY_INFO_CLASS { + TraceGuidQueryList, + TraceGuidQueryInfo, + TraceGuidQueryProcess, + TraceStackTracingInfo, // Win7 + TraceSystemTraceEnableFlagsInfo, + TraceSampledProfileIntervalInfo, + TraceProfileSourceConfigInfo, + TraceProfileSourceListInfo, + TracePmcEventListInfo, + TracePmcCounterListInfo, + MaxTraceSetInfoClass +} TRACE_QUERY_INFO_CLASS, TRACE_INFO_CLASS; + +typedef struct _CLASSIC_EVENT_ID { + GUID EventGuid; + UCHAR Type; + UCHAR Reserved[7]; +} CLASSIC_EVENT_ID, *PCLASSIC_EVENT_ID; + +typedef struct _TRACE_PROFILE_INTERVAL { + ULONG Source; + ULONG Interval; +} TRACE_PROFILE_INTERVAL, *PTRACE_PROFILE_INTERVAL; + +typedef struct _EVENT_TRACE_LOGFILEW +EVENT_TRACE_LOGFILEW, *PEVENT_TRACE_LOGFILEW; + +typedef ULONG(WINAPI * PEVENT_TRACE_BUFFER_CALLBACKW) +(PEVENT_TRACE_LOGFILEW Logfile); + +typedef VOID(WINAPI *PEVENT_CALLBACK)(PEVENT_TRACE pEvent); + +typedef struct _EVENT_RECORD +EVENT_RECORD, *PEVENT_RECORD; + +typedef VOID(WINAPI *PEVENT_RECORD_CALLBACK) (PEVENT_RECORD EventRecord); + +struct _EVENT_TRACE_LOGFILEW { + LPWSTR LogFileName; // Logfile Name + LPWSTR LoggerName; // LoggerName + LONGLONG CurrentTime; // timestamp of last event + ULONG BuffersRead; // buffers read to date + union { + // Mode of the logfile + ULONG LogFileMode; + // Processing flags used on Vista and above + ULONG ProcessTraceMode; + } DUMMYUNIONNAME; + EVENT_TRACE CurrentEvent; // Current Event from this stream. + TRACE_LOGFILE_HEADER LogfileHeader; // logfile header structure + PEVENT_TRACE_BUFFER_CALLBACKW // callback before each buffer + BufferCallback; // is read + // + // following variables are filled for BufferCallback. + // + ULONG BufferSize; + ULONG Filled; + ULONG EventsLost; + // + // following needs to be propaged to each buffer + // + union { + // Callback with EVENT_TRACE + PEVENT_CALLBACK EventCallback; + // Callback with EVENT_RECORD on Vista and above + PEVENT_RECORD_CALLBACK EventRecordCallback; + } DUMMYUNIONNAME2; + + ULONG IsKernelTrace; // TRUE for kernel logfile + + PVOID Context; // reserved for internal use +}; + +#pragma warning(pop) + +#define PEVENT_TRACE_BUFFER_CALLBACK PEVENT_TRACE_BUFFER_CALLBACKW +#define EVENT_TRACE_LOGFILE EVENT_TRACE_LOGFILEW +#define PEVENT_TRACE_LOGFILE PEVENT_TRACE_LOGFILEW +#define KERNEL_LOGGER_NAME KERNEL_LOGGER_NAMEW +#define GLOBAL_LOGGER_NAME GLOBAL_LOGGER_NAMEW +#define EVENT_LOGGER_NAME EVENT_LOGGER_NAMEW + +EXTERN_C +ULONG +WMIAPI +ProcessTrace( + _In_reads_(HandleCount) PTRACEHANDLE HandleArray, + _In_ ULONG HandleCount, + _In_opt_ LPFILETIME StartTime, + _In_opt_ LPFILETIME EndTime +); + +EXTERN_C +ULONG +WMIAPI +StartTraceW( + _Out_ PTRACEHANDLE TraceHandle, + _In_ LPCWSTR InstanceName, + _Inout_ PEVENT_TRACE_PROPERTIES Properties +); + +EXTERN_C +ULONG +WMIAPI +ControlTraceW( + _In_ TRACEHANDLE TraceHandle, + _In_opt_ LPCWSTR InstanceName, + _Inout_ PEVENT_TRACE_PROPERTIES Properties, + _In_ ULONG ControlCode +); + +EXTERN_C +TRACEHANDLE +WMIAPI +OpenTraceW( + _Inout_ PEVENT_TRACE_LOGFILEW Logfile +); + +EXTERN_C +ULONG +WMIAPI +CloseTrace( + _In_ TRACEHANDLE TraceHandle +); + +EXTERN_C +ULONG +WMIAPI +TraceSetInformation( + _In_ TRACEHANDLE SessionHandle, + _In_ TRACE_INFO_CLASS InformationClass, + _In_reads_bytes_(InformationLength) PVOID TraceInformation, + _In_ ULONG InformationLength +); + +EXTERN_C +ULONG +WMIAPI +TraceQueryInformation( + _In_ TRACEHANDLE SessionHandle, + _In_ TRACE_INFO_CLASS InformationClass, + _Out_writes_bytes_(InformationLength) PVOID TraceInformation, + _In_ ULONG InformationLength, + _Out_opt_ PULONG ReturnLength +); + +////////////////////////////////////////////////////////////////////////// +#define RegisterTraceGuids RegisterTraceGuidsW +#define StartTrace StartTraceW +#define ControlTrace ControlTraceW +#define StopTrace StopTraceW +#define QueryTrace QueryTraceW +#define UpdateTrace UpdateTraceW +#define FlushTrace FlushTraceW +#define QueryAllTraces QueryAllTracesW +#define OpenTrace OpenTraceW +////////////////////////////////////////////////////////////////////////// +#else +#define INITGUID // Causes definition of SystemTraceControlGuid in evntrace.h. +#include +#include +#include +#include +#endif //DECLARE_ETW + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +const int MAX_CPU_CORES = 256; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct ETWRuntime +{ + array activeCores; + vector> activeSyscalls; + unordered_set activeThreadsIDs; + ProcessID currentProcessId; + + ETWRuntime() + { + Reset(); + } + + void Reset() + { + currentProcessId = INVALID_PROCESS_ID; + activeCores.fill(INVALID_THREAD_ID); + activeSyscalls.resize(0); + activeThreadsIDs.clear(); + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class ETW : public Trace +{ + static const int ETW_BUFFER_SIZE = 1024 << 10; // 1Mb + static const int ETW_BUFFER_COUNT = 32; + static const int ETW_MAXIMUM_SESSION_NAME = 1024; + + EVENT_TRACE_PROPERTIES *traceProperties; + EVENT_TRACE_LOGFILE logFile; + TRACEHANDLE traceSessionHandle; + TRACEHANDLE openedHandle; + + HANDLE processThreadHandle; + DWORD currentProcessId; + + bool isActive; + + static DWORD WINAPI RunProcessTraceThreadFunction(LPVOID parameter); + static void AdjustPrivileges(); + + unordered_map syscallDescriptions; +public: + + ETWRuntime runtime; + + ETW(); + ~ETW(); + + virtual CaptureStatus::Type Start(Mode::Type mode, int frequency, const ThreadList& threads) override; + virtual bool Stop() override; + + DWORD GetProcessID() const { return currentProcessId; } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct CSwitch +{ + // New thread ID after the switch. + uint32 NewThreadId; + + // Previous thread ID. + uint32 OldThreadId; + + // Thread priority of the new thread. + int8 NewThreadPriority; + + // Thread priority of the previous thread. + int8 OldThreadPriority; + + //The index of the C-state that was last used by the processor. A value of 0 represents the lightest idle state with higher values representing deeper C-states. + uint8 PreviousCState; + + // Not used. + int8 SpareByte; + + // Wait reason for the previous thread. The following are the possible values: + // 0 Executive + // 1 FreePage + // 2 PageIn + // 3 PoolAllocation + // 4 DelayExecution + // 5 Suspended + // 6 UserRequest + // 7 WrExecutive + // 8 WrFreePage + // 9 WrPageIn + // 10 WrPoolAllocation + // 11 WrDelayExecution + // 12 WrSuspended + // 13 WrUserRequest + // 14 WrEventPair + // 15 WrQueue + // 16 WrLpcReceive + // 17 WrLpcReply + // 18 WrVirtualMemory + // 19 WrPageOut + // 20 WrRendezvous + // 21 WrKeyedEvent + // 22 WrTerminated + // 23 WrProcessInSwap + // 24 WrCpuRateControl + // 25 WrCalloutStack + // 26 WrKernel + // 27 WrResource + // 28 WrPushLock + // 29 WrMutex + // 30 WrQuantumEnd + // 31 WrDispatchInt + // 32 WrPreempted + // 33 WrYieldExecution + // 34 WrFastMutex + // 35 WrGuardedMutex + // 36 WrRundown + // 37 MaximumWaitReason + int8 OldThreadWaitReason; + + // Wait mode for the previous thread. The following are the possible values: + // 0 KernelMode + // 1 UserMode + int8 OldThreadWaitMode; + + // State of the previous thread. The following are the possible state values: + // 0 Initialized + // 1 Ready + // 2 Running + // 3 Standby + // 4 Terminated + // 5 Waiting + // 6 Transition + // 7 DeferredReady (added for Windows Server 2003) + int8 OldThreadState; + + // Ideal wait time of the previous thread. + int8 OldThreadWaitIdealProcessor; + + // Wait time for the new thread. + uint32 NewThreadWaitTime; + + // Reserved. + uint32 Reserved; + + static const byte OPCODE = 36; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct StackWalk_Event +{ + // Original event time stamp from the event header + uint64 EventTimeStamp; + + // The process identifier of the original event + uint32 StackProcess; + + // The thread identifier of the original event + uint32 StackThread; + + // Callstack head + uint64 Stack0; + + static const byte OPCODE = 32; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct Thread_TypeGroup1 +{ + // Process identifier of the thread involved in the event. + uint32 ProcessId; + // Thread identifier of the thread involved in the event. + uint32 TThreadId; + // Base address of the thread's stack. + uint64 StackBase; + // Limit of the thread's stack. + uint64 StackLimit; + // Base address of the thread's user-mode stack. + uint64 UserStackBase; + // Limit of the thread's user-mode stack. + uint64 UserStackLimit; + // The set of processors on which the thread is allowed to run. + uint32 Affinity; + // Starting address of the function to be executed by this thread. + uint64 Win32StartAddr; + // Thread environment block base address. + uint64 TebBase; + // Identifies the service if the thread is owned by a service; otherwise, zero. + uint32 SubProcessTag; + // The scheduler priority of the thread + uint8 BasePriority; + // A memory page priority hint for memory pages accessed by the thread. + uint8 PagePriority; + // An IO priority hint for scheduling IOs generated by the thread. + uint8 IoPriority; + // Not used. + uint8 ThreadFlags; + + enum Opcode : uint8 + { + Start = 1, + End = 2, + DCStart = 3, + DCEnd = 4, + }; +}; + +size_t GetSIDSize(uint8* ptr) +{ + size_t result = 0; + + int sid = *((int*)ptr); + + if (sid != 0) + { + size_t tokenSize = 16; + ptr += tokenSize; + result += tokenSize; + result += 8 + (4 * ((SID*)ptr)->SubAuthorityCount); + } + else + { + result = 4; + } + + return result; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// https://github.com/Microsoft/perfview/blob/688a8564062d51321bbab53cd71d9e174a77d2ce/src/TraceEvent/TraceEvent.cs +struct Process_TypeGroup1 +{ + // The address of the process object in the kernel. + uint64 UniqueProcessKey; + // Global process identifier that you can use to identify a process. + uint32 ProcessId; + // Unique identifier of the process that creates this process. + uint32 ParentId; + // Unique identifier that an operating system generates when it creates a new session. + uint32 SessionId; + // Exit status of the stopped process. + int32 ExitStatus; + // The physical address of the page table of the process. + uint64 DirectoryTableBase; + // (?) uint8 Flags; + // object UserSID; + // string ImageFileName; + // wstring CommandLine; + + static size_t GetSIDOffset(PEVENT_RECORD pEvent) + { + if (pEvent->EventHeader.EventDescriptor.Version >= 4) + return 36; + + if (pEvent->EventHeader.EventDescriptor.Version == 3) + return 32; + + return 24; + } + + const char* GetProcessName(PEVENT_RECORD pEvent) const + { + OPTICK_ASSERT((pEvent->EventHeader.Flags & EVENT_HEADER_FLAG_64_BIT_HEADER) != 0, "32-bit is not supported! Disable OPTICK_ENABLE_TRACING on 32-bit platform if needed!"); + size_t sidOffset = GetSIDOffset(pEvent); + size_t sidSize = GetSIDSize((uint8*)this + sidOffset); + return (char*)this + sidOffset + sidSize; + } + + enum Opcode + { + Start = 1, + End = 2, + DCStart = 3, + DCEnd = 4, + Defunct = 39, + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct SampledProfile +{ + uint32 InstructionPointer; + uint32 ThreadId; + uint32 Count; + + static const byte OPCODE = 46; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct SysCallEnter +{ + uintptr_t SysCallAddress; + + static const byte OPCODE = 51; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct SysCallExit +{ + uint32 SysCallNtStatus; + + static const byte OPCODE = 52; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +constexpr uint32 GuidHash(uint32 u0, uint32 u1, uint32 u2, uint32 u3) +{ + return u0 ^ u1 ^ u2 ^ u3; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +uint32 GuidHash(GUID guid) +{ + return GuidHash(guid.Data1, (guid.Data3 << 16) | guid.Data2, ((uint32*)guid.Data4)[0], ((uint32*)guid.Data4)[1]); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#define ETW_GUID(NAME, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) DEFINE_GUID(NAME, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8); \ + const uint32 NAME##Hash = GuidHash((uint32)l, (uint32)((w2 << 16) | w1), (uint32)((b4 << 24) | (b3 << 16) | (b2 << 8) | b1), (uint32)((b8 << 24) | (b7 << 16) | (b6 << 8) | b5)); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// ce1dbfb4-137e-4da6-87b0-3f59aa102cbc +ETW_GUID(SampledProfileGuid, 0xce1dbfb4, 0x137e, 0x4da6, 0x87, 0xb0, 0x3f, 0x59, 0xaa, 0x10, 0x2c, 0xbc); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// 3d6fa8d1-fe05-11d0-9dda-00c04fd7ba7c +// https://docs.microsoft.com/en-us/windows/desktop/etw/thread +ETW_GUID(ThreadGuid, 0x3d6fa8d1, 0xfe05, 0x11d0, 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// 3d6fa8d0-fe05-11d0-9dda-00c04fd7ba7c +// https://docs.microsoft.com/en-us/windows/desktop/etw/process +ETW_GUID(ProcessGuid, 0x3d6fa8d0, 0xfe05, 0x11d0, 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// def2fe46-7bd6-4b80-bd94-f57fe20d0ce3 +// https://docs.microsoft.com/en-us/windows/win32/etw/stackwalk +ETW_GUID(StackWalkGuid, 0xdef2fe46, 0x7bd6, 0x4b80, 0xbd, 0x94, 0xf5, 0x7f, 0xe2, 0x0d, 0x0c, 0xe3); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// https://docs.microsoft.com/en-us/windows/win32/etw/perfinfo +// ce1dbfb4-137e-4da6-87b0-3f59aa102cbc +ETW_GUID(PerfInfoGuid, 0xce1dbfb4, 0x137e, 0x4da6, 0x87, 0xb0, 0x3f, 0x59, 0xaa, 0x10, 0x2c, 0xbc); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +ETW* g_ETW; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void OnThreadEvent(PEVENT_RECORD eventRecord) +{ + ETWRuntime& runtime = g_ETW->runtime; + + switch (eventRecord->EventHeader.EventDescriptor.Opcode) + { + case CSwitch::OPCODE: + if (sizeof(CSwitch) == eventRecord->UserDataLength) + { + CSwitch* pSwitchEvent = (CSwitch*)eventRecord->UserData; + + SwitchContextDesc desc; + desc.reason = pSwitchEvent->OldThreadWaitReason; + desc.cpuId = eventRecord->BufferContext.ProcessorNumber; + desc.oldThreadId = (uint64)pSwitchEvent->OldThreadId; + desc.newThreadId = (uint64)pSwitchEvent->NewThreadId; + desc.timestamp = eventRecord->EventHeader.TimeStamp.QuadPart; + Core::Get().ReportSwitchContext(desc); + + + // Assign ThreadID to the cores + if (runtime.activeThreadsIDs.find(desc.newThreadId) != runtime.activeThreadsIDs.end()) + { + runtime.activeCores[desc.cpuId] = desc.newThreadId; + } + else if (runtime.activeThreadsIDs.find(desc.oldThreadId) != runtime.activeThreadsIDs.end()) + { + runtime.activeCores[desc.cpuId] = INVALID_THREAD_ID; + } + } + break; + + case Thread_TypeGroup1::Start: + case Thread_TypeGroup1::DCStart: + if (eventRecord->UserDataLength >= sizeof(Thread_TypeGroup1)) + { + const Thread_TypeGroup1* pThreadEvent = (const Thread_TypeGroup1*)eventRecord->UserData; + Core::Get().RegisterThreadDescription(ThreadDescription("", pThreadEvent->TThreadId, pThreadEvent->ProcessId, 1, pThreadEvent->BasePriority)); + + if (pThreadEvent->ProcessId == runtime.currentProcessId) + runtime.activeThreadsIDs.insert(pThreadEvent->TThreadId); + } + break; + + default: + break; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void OnProcessEvent(PEVENT_RECORD eventRecord) +{ + switch (eventRecord->EventHeader.EventDescriptor.Opcode) + { + case Process_TypeGroup1::Start: + case Process_TypeGroup1::DCStart: + if (eventRecord->UserDataLength >= sizeof(Process_TypeGroup1)) + { + const Process_TypeGroup1* pProcessEvent = (const Process_TypeGroup1*)eventRecord->UserData; + Core::Get().RegisterProcessDescription(ProcessDescription(pProcessEvent->GetProcessName(eventRecord), pProcessEvent->ProcessId, pProcessEvent->UniqueProcessKey)); + } + break; + + default: + break; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void OnStackWalkEvent(PEVENT_RECORD eventRecord) +{ + switch (eventRecord->EventHeader.EventDescriptor.Opcode) + { + case StackWalk_Event::OPCODE: + if (eventRecord->UserData && eventRecord->UserDataLength >= sizeof(StackWalk_Event)) + { + //TODO: Support x86 windows kernels + const size_t osKernelPtrSize = sizeof(uint64); + + StackWalk_Event* pStackWalkEvent = (StackWalk_Event*)eventRecord->UserData; + uint32 count = 1 + (eventRecord->UserDataLength - sizeof(StackWalk_Event)) / osKernelPtrSize; + + if (count && pStackWalkEvent->StackThread != 0) + { + if (pStackWalkEvent->StackProcess == g_ETW->GetProcessID()) + { + CallstackDesc desc; + desc.threadID = pStackWalkEvent->StackThread; + desc.timestamp = pStackWalkEvent->EventTimeStamp; + + static_assert(osKernelPtrSize == sizeof(uint64), "Incompatible types!"); + desc.callstack = &pStackWalkEvent->Stack0; + + desc.count = (uint8)count; + Core::Get().ReportStackWalk(desc); + } + } + } + break; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void OnPerfInfoEvent(PEVENT_RECORD eventRecord) +{ + ETWRuntime& runtime = g_ETW->runtime; + + switch (eventRecord->EventHeader.EventDescriptor.Opcode) + { + case SysCallEnter::OPCODE: + if (eventRecord->UserDataLength >= sizeof(SysCallEnter)) + { + uint8_t cpuId = eventRecord->BufferContext.ProcessorNumber; + uint64_t threadId = runtime.activeCores[cpuId]; + + if (threadId != INVALID_THREAD_ID) + { + SysCallEnter* pEventEnter = (SysCallEnter*)eventRecord->UserData; + + SysCallData& sysCall = Core::Get().syscallCollector.Add(); + sysCall.start = eventRecord->EventHeader.TimeStamp.QuadPart; + sysCall.finish = EventTime::INVALID_TIMESTAMP; + sysCall.threadID = threadId; + sysCall.id = pEventEnter->SysCallAddress; + sysCall.description = nullptr; + + runtime.activeSyscalls.push_back(std::make_pair(cpuId, &sysCall)); + } + } + break; + + case SysCallExit::OPCODE: + if (eventRecord->UserDataLength >= sizeof(SysCallExit)) + { + uint8_t cpuId = eventRecord->BufferContext.ProcessorNumber; + if (runtime.activeCores[cpuId] != INVALID_THREAD_ID) + { + for (int i = (int)runtime.activeSyscalls.size() - 1; i >= 0; --i) + { + if (runtime.activeSyscalls[i].first == cpuId) + { + runtime.activeSyscalls[i].second->finish = eventRecord->EventHeader.TimeStamp.QuadPart; + runtime.activeSyscalls.erase(runtime.activeSyscalls.begin() + i); + break; + } + } + } + } + break; + + default: + break; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void WINAPI OnRecordEvent(PEVENT_RECORD eventRecord) +{ + const uint32 eventHash = GuidHash(eventRecord->EventHeader.ProviderId); + + switch (eventHash) + { + case ThreadGuidHash: + OnThreadEvent(eventRecord); + break; + + case ProcessGuidHash: + OnProcessEvent(eventRecord); + break; + + case StackWalkGuidHash: + OnStackWalkEvent(eventRecord); + break; + + case PerfInfoGuidHash: + OnPerfInfoEvent(eventRecord); + break; + + default: + break; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +static ULONG WINAPI OnBufferRecord(_In_ PEVENT_TRACE_LOGFILE Buffer) +{ + OPTICK_UNUSED(Buffer); + return true; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +const TRACEHANDLE INVALID_TRACEHANDLE = (TRACEHANDLE)-1; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +DWORD WINAPI ETW::RunProcessTraceThreadFunction(LPVOID parameter) +{ + Memory::InitThread(); + Core::Get().RegisterThreadDescription(ThreadDescription("[Optick] ETW", GetCurrentThreadId(), GetCurrentProcessId())); + ETW* etw = (ETW*)parameter; + ULONG status = ProcessTrace(&etw->openedHandle, 1, 0, 0); + OPTICK_UNUSED(status); + return 0; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void ETW::AdjustPrivileges() +{ +#if OPTICK_PC + HANDLE token = 0; + if (OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token)) + { + TOKEN_PRIVILEGES tokenPrivileges; + memset(&tokenPrivileges, 0, sizeof(tokenPrivileges)); + tokenPrivileges.PrivilegeCount = 1; + tokenPrivileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + LookupPrivilegeValue(NULL, SE_SYSTEM_PROFILE_NAME, &tokenPrivileges.Privileges[0].Luid); + + AdjustTokenPrivileges(token, FALSE, &tokenPrivileges, 0, (PTOKEN_PRIVILEGES)NULL, 0); + CloseHandle(token); + } +#endif +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +ETW::ETW() + : traceProperties(nullptr) + , traceSessionHandle(INVALID_TRACEHANDLE) + , openedHandle(INVALID_TRACEHANDLE) + , processThreadHandle(INVALID_HANDLE_VALUE) + , currentProcessId((DWORD)-1) + , isActive(false) +{ + currentProcessId = GetCurrentProcessId(); + + OPTICK_ASSERT(g_ETW == nullptr, "Can't create more than one ETW session"); + g_ETW = this; +} + +CaptureStatus::Type ETW::Start(Mode::Type mode, int frequency, const ThreadList& threads) +{ + if (!isActive) + { + AdjustPrivileges(); + + runtime.Reset(); + + for (auto it = threads.begin(); it != threads.end(); ++it) + { + ThreadEntry* entry = *it; + if (entry->isAlive) + { + runtime.activeThreadsIDs.insert(entry->description.threadID); + } + } + + + ULONG bufferSize = sizeof(EVENT_TRACE_PROPERTIES) + (ETW_MAXIMUM_SESSION_NAME + MAX_PATH) * sizeof(WCHAR); + if (traceProperties == nullptr) + traceProperties = (EVENT_TRACE_PROPERTIES*)Memory::Alloc(bufferSize); + ZeroMemory(traceProperties, bufferSize); + traceProperties->Wnode.BufferSize = bufferSize; + traceProperties->LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES); + traceProperties->EnableFlags = 0; + +#if OPTICK_PC + traceProperties->BufferSize = ETW_BUFFER_SIZE; + traceProperties->MinimumBuffers = ETW_BUFFER_COUNT; +#endif + + if (mode & Mode::SWITCH_CONTEXT) + { + traceProperties->EnableFlags |= EVENT_TRACE_FLAG_CSWITCH; + } + + if (mode & Mode::AUTOSAMPLING) + { + traceProperties->EnableFlags |= EVENT_TRACE_FLAG_PROFILE; + } + + if (mode & Mode::SYS_CALLS) + { + traceProperties->EnableFlags |= EVENT_TRACE_FLAG_SYSTEMCALL; + } + + if (mode & Mode::OTHER_PROCESSES) + { + traceProperties->EnableFlags |= EVENT_TRACE_FLAG_PROCESS; + traceProperties->EnableFlags |= EVENT_TRACE_FLAG_THREAD; + } + + traceProperties->LogFileMode = EVENT_TRACE_REAL_TIME_MODE; + traceProperties->Wnode.Flags = WNODE_FLAG_TRACED_GUID; + // + // https://msdn.microsoft.com/en-us/library/windows/desktop/aa364160(v=vs.85).aspx + // Clock resolution = QPC + traceProperties->Wnode.ClientContext = 1; + traceProperties->Wnode.Guid = SystemTraceControlGuid; + + // ERROR_BAD_LENGTH(24): The Wnode.BufferSize member of Properties specifies an incorrect size. Properties does not have sufficient space allocated to hold a copy of SessionName. + // ERROR_ALREADY_EXISTS(183): A session with the same name or GUID is already running. + // ERROR_ACCESS_DENIED(5): Only users with administrative privileges, users in the Performance Log Users group, and services running as LocalSystem, LocalService, NetworkService can control event tracing sessions. + // ERROR_INVALID_PARAMETER(87) + // ERROR_BAD_PATHNAME(161) + // ERROR_DISK_FULL(112) + // ERROR_NO_SUCH_PRIVILEGE(1313) + int retryCount = 4; + ULONG status = CaptureStatus::OK; + + while (--retryCount >= 0) + { + status = StartTrace(&traceSessionHandle, KERNEL_LOGGER_NAME, traceProperties); + + switch (status) + { + case ERROR_NO_SUCH_PRIVILEGE: + AdjustPrivileges(); + break; + + case ERROR_ALREADY_EXISTS: + ControlTrace(0, KERNEL_LOGGER_NAME, traceProperties, EVENT_TRACE_CONTROL_STOP); + break; + + case ERROR_ACCESS_DENIED: + return CaptureStatus::ERR_TRACER_ACCESS_DENIED; + + case ERROR_SUCCESS: + retryCount = 0; + break; + + default: + return CaptureStatus::ERR_TRACER_FAILED; + } + } + + if (status != ERROR_SUCCESS) + { + return CaptureStatus::ERR_TRACER_FAILED; + } + + CLASSIC_EVENT_ID callstackSamples[4]; + int callstackCountSamplesCount = 0; + + if (mode & Mode::AUTOSAMPLING) + { + callstackSamples[callstackCountSamplesCount].EventGuid = SampledProfileGuid; + callstackSamples[callstackCountSamplesCount].Type = SampledProfile::OPCODE; + ++callstackCountSamplesCount; + } + + if (mode & Mode::SYS_CALLS) + { + callstackSamples[callstackCountSamplesCount].EventGuid = SampledProfileGuid; + callstackSamples[callstackCountSamplesCount].Type = SysCallEnter::OPCODE; + ++callstackCountSamplesCount; + } + + /* + callstackSamples[callstackCountSamplesCount].EventGuid = CSwitchProfileGuid; + callstackSamples[callstackCountSamplesCount].Type = CSwitch::OPCODE; + ++callstackCountSamplesCount; + */ + + + /* + https://msdn.microsoft.com/en-us/library/windows/desktop/dd392328%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396 + Typically, on 64-bit computers, you cannot capture the kernel stack in certain contexts when page faults are not allowed. To enable walking the kernel stack on x64, set + the DisablePagingExecutive Memory Management registry value to 1. The DisablePagingExecutive registry value is located under the following registry key: + HKEY_LOCAL_MACHINE\System\CurrentControlSet\Control\Session Manager\Memory Management + */ + if (callstackCountSamplesCount > 0) + { + status = TraceSetInformation(traceSessionHandle, TraceStackTracingInfo, &callstackSamples[0], sizeof(CLASSIC_EVENT_ID) * callstackCountSamplesCount); + if (status != ERROR_SUCCESS) + { + OPTICK_FAILED("TraceSetInformation - failed"); + return CaptureStatus::ERR_TRACER_FAILED; + } + } + + if (mode & Mode::AUTOSAMPLING) + { + TRACE_PROFILE_INTERVAL itnerval = { 0 }; + memset(&itnerval, 0, sizeof(TRACE_PROFILE_INTERVAL)); + int step = 10000 * 1000 / frequency; // 1ms = 10000 steps + itnerval.Interval = step; // std::max(1221, std::min(step, 10000)); + // The SessionHandle is irrelevant for this information class and must be zero, else the function returns ERROR_INVALID_PARAMETER. + status = TraceSetInformation(0, TraceSampledProfileIntervalInfo, &itnerval, sizeof(TRACE_PROFILE_INTERVAL)); + OPTICK_ASSERT(status == ERROR_SUCCESS, "TraceSetInformation - failed"); + } + + ZeroMemory(&logFile, sizeof(EVENT_TRACE_LOGFILE)); + logFile.LoggerName = const_cast(KERNEL_LOGGER_NAME); + logFile.ProcessTraceMode = (PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_EVENT_RECORD | PROCESS_TRACE_MODE_RAW_TIMESTAMP); + logFile.EventRecordCallback = OnRecordEvent; + logFile.BufferCallback = OnBufferRecord; + openedHandle = OpenTrace(&logFile); + if (openedHandle == INVALID_TRACEHANDLE) + { + OPTICK_FAILED("OpenTrace - failed"); + return CaptureStatus::ERR_TRACER_FAILED; + } + + DWORD threadID; + processThreadHandle = CreateThread(0, 0, RunProcessTraceThreadFunction, this, 0, &threadID); + + isActive = true; + } + + return CaptureStatus::OK; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool ETW::Stop() +{ + if (!isActive) + { + return false; + } + + ULONG controlTraceResult = ControlTrace(openedHandle, KERNEL_LOGGER_NAME, traceProperties, EVENT_TRACE_CONTROL_STOP); + + // ERROR_CTX_CLOSE_PENDING(7007L): The call was successful. The ProcessTrace function will stop after it has processed all real-time events in its buffers (it will not receive any new events). + // ERROR_BUSY(170L): Prior to Windows Vista, you cannot close the trace until the ProcessTrace function completes. + // ERROR_INVALID_HANDLE(6L): One of the following is true: TraceHandle is NULL. TraceHandle is INVALID_HANDLE_VALUE. + ULONG closeTraceStatus = CloseTrace(openedHandle); + + // Wait for ProcessThread to finish + WaitForSingleObject(processThreadHandle, INFINITE); + BOOL wasThreadClosed = CloseHandle(processThreadHandle); + + isActive = false; + + runtime.activeThreadsIDs.clear(); + + return wasThreadClosed && (closeTraceStatus == ERROR_SUCCESS) && (controlTraceResult == ERROR_SUCCESS); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +ETW::~ETW() +{ + Stop(); + Memory::Free(traceProperties); + traceProperties = nullptr; + g_ETW = nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Trace* Platform::CreateTrace() +{ + return Memory::New(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Symbol Resolving +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#define USE_DBG_HELP (OPTICK_PC) + +#if USE_DBG_HELP +#include +#pragma comment( lib, "DbgHelp.Lib" ) +#endif + +#include "optick_serialization.h" + +#if OPTICK_PC +#include +#else +// Forward declare kernel functions +#pragma pack(push,8) +typedef struct _MODULEINFO { + LPVOID lpBaseOfDll; + DWORD SizeOfImage; + LPVOID EntryPoint; +} MODULEINFO, *LPMODULEINFO; +#pragma pack(pop) +#ifndef EnumProcessModulesEx +#define EnumProcessModulesEx K32EnumProcessModulesEx +EXTERN_C DWORD WINAPI K32EnumProcessModulesEx(HANDLE hProcess, HMODULE *lphModule, DWORD cb, LPDWORD lpcbNeeded, DWORD dwFilterFlag); +#endif +#ifndef GetModuleInformation +#define GetModuleInformation K32GetModuleInformation +EXTERN_C DWORD WINAPI K32GetModuleInformation(HANDLE hProcess, HMODULE hModule, LPMODULEINFO lpmodinfo, DWORD cb); +#endif + +#ifndef GetModuleFileNameExA +#define GetModuleFileNameExA K32GetModuleFileNameExA +EXTERN_C DWORD WINAPI K32GetModuleFileNameExA(HANDLE hProcess, HMODULE hModule, LPSTR lpFilename, DWORD nSize); +#endif +#endif + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//void ReportLastError() +//{ +// LPVOID lpMsgBuf; +// DWORD dw = GetLastError(); +// +// FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, +// NULL, dw, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), +// (LPTSTR)&lpMsgBuf, 0, NULL); +// +// MessageBox(NULL, (LPCTSTR)lpMsgBuf, TEXT("Error"), MB_OK); +// LocalFree(lpMsgBuf); +//} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef array CallStackBuffer; +typedef unordered_map SymbolCache; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class WinSymbolEngine : public SymbolEngine +{ + HANDLE hProcess; + + bool isInitialized; + + bool needRestorePreviousSettings; + uint32 previousOptions; + static const size_t MAX_SEARCH_PATH_LENGTH = 2048; + char previousSearchPath[MAX_SEARCH_PATH_LENGTH]; + + SymbolCache cache; + vector modules; + + void InitSystemModules(); + void InitApplicationModules(); +public: + WinSymbolEngine(); + ~WinSymbolEngine(); + + void Init(); + void Close(); + + // Get Symbol from PDB file + virtual const Symbol * GetSymbol(uint64 dwAddress) override; + virtual const vector& GetModules() override; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +WinSymbolEngine::WinSymbolEngine() : hProcess(GetCurrentProcess()), isInitialized(false), needRestorePreviousSettings(false), previousOptions(0) +{ +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +WinSymbolEngine::~WinSymbolEngine() +{ + Close(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +const Symbol* WinSymbolEngine::GetSymbol(uint64 address) +{ + if (address == 0) + return nullptr; + + Init(); + + Symbol& symbol = cache[address]; + + if (symbol.address != 0) + return &symbol; + + if (!isInitialized) + return nullptr; + + symbol.address = address; + +#if USE_DBG_HELP + DWORD64 dwAddress = static_cast(address); + + // FileName and Line + IMAGEHLP_LINEW64 lineInfo; + memset(&lineInfo, 0, sizeof(IMAGEHLP_LINEW64)); + lineInfo.SizeOfStruct = sizeof(lineInfo); + DWORD dwDisp; + if (SymGetLineFromAddrW64(hProcess, dwAddress, &dwDisp, &lineInfo)) + { + symbol.file = lineInfo.FileName; + symbol.line = lineInfo.LineNumber; + } + + const size_t length = (sizeof(SYMBOL_INFOW) + MAX_SYM_NAME * sizeof(WCHAR) + sizeof(ULONG64) - 1) / sizeof(ULONG64) + 1; + + // Function Name + ULONG64 buffer[length]; + PSYMBOL_INFOW dbgSymbol = (PSYMBOL_INFOW)buffer; + memset(dbgSymbol, 0, sizeof(buffer)); + dbgSymbol->SizeOfStruct = sizeof(SYMBOL_INFOW); + dbgSymbol->MaxNameLen = MAX_SYM_NAME; + + DWORD64 offset = 0; + if (SymFromAddrW(hProcess, dwAddress, &offset, dbgSymbol)) + { + symbol.function.resize(dbgSymbol->NameLen); + memcpy(&symbol.function[0], &dbgSymbol->Name[0], sizeof(WCHAR) * dbgSymbol->NameLen); + } + + symbol.offset = static_cast(offset); +#endif + + return &symbol; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +const vector& WinSymbolEngine::GetModules() +{ + if (modules.empty()) + { + InitSystemModules(); + InitApplicationModules(); + } + return modules; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// const char* USER_SYMBOL_SEARCH_PATH = "http://msdl.microsoft.com/download/symbols"; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void WinSymbolEngine::Init() +{ + if (!isInitialized) + { +#if USE_DBG_HELP + previousOptions = SymGetOptions(); + + memset(previousSearchPath, 0, MAX_SEARCH_PATH_LENGTH); + SymGetSearchPath(hProcess, previousSearchPath, MAX_SEARCH_PATH_LENGTH); + + SymSetOptions(SymGetOptions() | SYMOPT_LOAD_LINES | SYMOPT_DEFERRED_LOADS | SYMOPT_UNDNAME | SYMOPT_INCLUDE_32BIT_MODULES | SYMOPT_LOAD_ANYTHING); + if (!SymInitialize(hProcess, NULL, TRUE)) + { + needRestorePreviousSettings = true; + SymCleanup(hProcess); + + if (SymInitialize(hProcess, NULL, TRUE)) + isInitialized = true; + } + else + { + isInitialized = true; + } + + const vector& loadedModules = GetModules(); + for (size_t i = 0; i < loadedModules.size(); ++i) + { + const Module& module = loadedModules[i]; + SymLoadModule64(hProcess, NULL, module.path.c_str(), NULL, (DWORD64)module.address, (DWORD)module.size); + } + +#else + isInitialized = true; +#endif + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +typedef DWORD(__stdcall *pZwQuerySystemInformation)(DWORD, LPVOID, DWORD, DWORD*); +#define SystemModuleInformation 11 // SYSTEMINFOCLASS +#define MAXIMUM_FILENAME_LENGTH 256 + +struct SYSTEM_MODULE_INFORMATION +{ + DWORD reserved1; + DWORD reserved2; + PVOID mappedBase; + PVOID imageBase; + DWORD imageSize; + DWORD flags; + WORD loadOrderIndex; + WORD initOrderIndex; + WORD loadCount; + WORD moduleNameOffset; + CHAR imageName[MAXIMUM_FILENAME_LENGTH]; +}; + +#pragma warning (push) +#pragma warning(disable : 4200) +struct MODULE_LIST +{ + DWORD dwModules; + SYSTEM_MODULE_INFORMATION pModulesInfo[]; +}; +#pragma warning (pop) + +void WinSymbolEngine::InitSystemModules() +{ + ULONG returnLength = 0; + ULONG systemInformationLength = 0; + MODULE_LIST* pModuleList = nullptr; + +#pragma warning (push) +#pragma warning(disable : 4191) + pZwQuerySystemInformation ZwQuerySystemInformation = (pZwQuerySystemInformation)GetProcAddress(GetModuleHandle(TEXT("ntdll.dll")), "ZwQuerySystemInformation"); +#pragma warning (pop) + + ZwQuerySystemInformation(SystemModuleInformation, pModuleList, systemInformationLength, &returnLength); + systemInformationLength = returnLength; + pModuleList = (MODULE_LIST*)Memory::Alloc(systemInformationLength); + DWORD status = ZwQuerySystemInformation(SystemModuleInformation, pModuleList, systemInformationLength, &returnLength); + if (status == ERROR_SUCCESS) + { + char systemRootPath[MAXIMUM_FILENAME_LENGTH] = { 0 }; +#if OPTICK_PC + ExpandEnvironmentStringsA("%SystemRoot%", systemRootPath, MAXIMUM_FILENAME_LENGTH); +#else + strcpy_s(systemRootPath, "C:\\Windows"); +#endif + + const char* systemRootPattern = "\\SystemRoot"; + + modules.reserve(modules.size() + pModuleList->dwModules); + + for (uint32_t i = 0; i < pModuleList->dwModules; ++i) + { + SYSTEM_MODULE_INFORMATION& module = pModuleList->pModulesInfo[i]; + + char path[MAXIMUM_FILENAME_LENGTH] = { 0 }; + + if (strstr(module.imageName, systemRootPattern) == module.imageName) + { + strcpy_s(path, systemRootPath); + strcat_s(path, module.imageName + strlen(systemRootPattern)); + } + else + { + strcpy_s(path, module.imageName); + } + + modules.push_back(Module(path, (void*)module.imageBase, module.imageSize)); + } + } + else + { + OPTICK_FAILED("Can't query System Module Information!"); + } + + if (pModuleList) + { + Memory::Free(pModuleList); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void WinSymbolEngine::InitApplicationModules() +{ + HANDLE processHandle = GetCurrentProcess(); + HMODULE hModules[256]; + DWORD modulesSize = 0; + EnumProcessModulesEx(processHandle, hModules, sizeof(hModules), &modulesSize, 0); + + int moduleCount = modulesSize / sizeof(HMODULE); + + modules.reserve(modules.size() + moduleCount); + + for (int i = 0; i < moduleCount; ++i) + { + MODULEINFO info = { 0 }; + if (GetModuleInformation(processHandle, hModules[i], &info, sizeof(MODULEINFO))) + { + char name[MAX_PATH] = "UnknownModule"; + GetModuleFileNameExA(processHandle, hModules[i], name, MAX_PATH); + + modules.push_back(Module(name, info.lpBaseOfDll, info.SizeOfImage)); + } + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void WinSymbolEngine::Close() +{ + if (isInitialized) + { +#if USE_DBG_HELP + SymCleanup(hProcess); + if (needRestorePreviousSettings) + { + HANDLE currentProcess = GetCurrentProcess(); + + SymSetOptions(previousOptions); + SymSetSearchPath(currentProcess, previousSearchPath); + SymInitialize(currentProcess, NULL, TRUE); + + needRestorePreviousSettings = false; + } +#endif + modules.clear(); + isInitialized = false; + } +} +////////////////////////////////////////////////////////////////////////// +SymbolEngine* Platform::CreateSymbolEngine() +{ + return Memory::New(); +} +////////////////////////////////////////////////////////////////////////// +} +#endif //OPTICK_ENABLE_TRACING +#endif //USE_OPTICK +#endif //_MSC_VER \ No newline at end of file diff --git a/neo/libs/optick/optick_gpu.cpp b/neo/libs/optick/optick_gpu.cpp new file mode 100644 index 00000000..e2779419 --- /dev/null +++ b/neo/libs/optick/optick_gpu.cpp @@ -0,0 +1,157 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "optick_gpu.h" + +#if USE_OPTICK +#include "optick_core.h" +#include "optick_memory.h" + +#include + +namespace Optick +{ + static_assert((1ULL << 32) % GPUProfiler::MAX_QUERIES_COUNT == 0, "(1 << 32) should be a multiple of MAX_QUERIES_COUNT to handle query index overflow!"); + + + GPUProfiler::GPUProfiler() : currentState(STATE_OFF), currentNode(0), frameNumber(0) + { + + } + + void GPUProfiler::InitNode(const char *nodeName, uint32_t nodeIndex) + { + Node* node = Memory::New(); + for (int i = 0; i < GPU_QUEUE_COUNT; ++i) + { + char name[128] = { 0 }; + sprintf_s(name, "%s [%s]", nodeName, GetGPUQueueName((GPUQueueType)i)); + node->gpuEventStorage[i] = RegisterStorage(name, uint64_t(-1), ThreadMask::GPU); + node->name = nodeName; + } + nodes[nodeIndex] = node; + } + + void GPUProfiler::Start(uint32 /*mode*/) + { + std::lock_guard lock(updateLock); + Reset(); + currentState = STATE_STARTING; + } + + void GPUProfiler::Stop(uint32 /*mode*/) + { + std::lock_guard lock(updateLock); + currentState = STATE_OFF; + } + + void GPUProfiler::Dump(uint32 /*mode*/) + { + for (size_t nodeIndex = 0; nodeIndex < nodes.size(); ++nodeIndex) + { + Node* node = nodes[nodeIndex]; + + for (int queueIndex = 0; queueIndex < GPU_QUEUE_COUNT; ++queueIndex) + { + EventBuffer& gpuBuffer = node->gpuEventStorage[queueIndex]->eventBuffer; + + const vector& threads = Core::Get().GetThreads(); + for (size_t threadIndex = 0; threadIndex < threads.size(); ++threadIndex) + { + ThreadEntry* thread = threads[threadIndex]; + thread->storage.gpuStorage.gpuBuffer[nodeIndex][queueIndex].ForEachChunk([&gpuBuffer](const EventData* events, int count) + { + gpuBuffer.AddRange(events, count); + }); + } + } + } + } + + string GPUProfiler::GetName() const + { + return !nodes.empty() ? nodes[0]->name : string(); + } + + GPUProfiler::~GPUProfiler() + { + for (Node* node : nodes) + Memory::Delete(node); + nodes.clear(); + } + + void GPUProfiler::Reset() + { + for (uint32_t nodeIndex = 0; nodeIndex < nodes.size(); ++nodeIndex) + { + Node& node = *nodes[nodeIndex]; + node.Reset(); + node.clock = GetClockSynchronization(nodeIndex); + } + } + + EventData& GPUProfiler::AddFrameEvent() + { + static const EventDescription* GPUFrameDescription = EventDescription::Create("GPU Frame", __FILE__, __LINE__); + EventData& event = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_GRAPHICS]->eventBuffer.Add(); + event.description = GPUFrameDescription; + event.start = EventTime::INVALID_TIMESTAMP; + event.finish = EventTime::INVALID_TIMESTAMP; + return event; + } + + EventData& GPUProfiler::AddVSyncEvent() + { + static const EventDescription* VSyncDescription = EventDescription::Create("VSync", __FILE__, __LINE__); + EventData& event = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_VSYNC]->eventBuffer.Add(); + event.description = VSyncDescription; + event.start = EventTime::INVALID_TIMESTAMP; + event.finish = EventTime::INVALID_TIMESTAMP; + return event; + } + + TagData& GPUProfiler::AddFrameTag() + { + static const EventDescription* FrameTagDescription = EventDescription::CreateShared("Frame"); + TagData& tag = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_GRAPHICS]->tagU32Buffer.Add(); + tag.description = FrameTagDescription; + tag.timestamp = EventTime::INVALID_TIMESTAMP; + tag.data = Core::Get().GetCurrentFrame(FrameType::CPU); + return tag; + } + + const char * GetGPUQueueName(GPUQueueType queue) + { + const char* GPUQueueToName[GPU_QUEUE_COUNT] = { "Graphics", "Compute", "Transfer", "VSync" }; + return GPUQueueToName[queue]; + } + + void GPUProfiler::Node::Reset() + { + queryIndex = 0; + + for (size_t frameIndex = 0; frameIndex < queryGpuframes.size(); ++frameIndex) + queryGpuframes[frameIndex].Reset(); + } +} +#endif //USE_OPTICK + diff --git a/neo/libs/optick/optick_gpu.d3d12.cpp b/neo/libs/optick/optick_gpu.d3d12.cpp new file mode 100644 index 00000000..081649cd --- /dev/null +++ b/neo/libs/optick/optick_gpu.d3d12.cpp @@ -0,0 +1,402 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "optick.config.h" +#if USE_OPTICK +#if OPTICK_ENABLE_GPU_D3D12 + +#include "optick_common.h" +#include "optick_memory.h" +#include "optick_core.h" +#include "optick_gpu.h" + +#include +#include + +#include +#include +#include + + +#define OPTICK_CHECK(args) do { HRESULT __hr = args; (void)__hr; OPTICK_ASSERT(__hr == S_OK, "Failed check"); } while(false); + +namespace Optick +{ + class GPUProfilerD3D12 : public GPUProfiler + { + struct Frame + { + ID3D12CommandAllocator* commandAllocator; + ID3D12GraphicsCommandList* commandList; + + Frame() : commandAllocator(nullptr), commandList(nullptr) + { + Reset(); + } + + void Reset() + { + } + + void Shutdown(); + + ~Frame() + { + Shutdown(); + } + }; + + struct NodePayload + { + ID3D12CommandQueue* commandQueue; + ID3D12QueryHeap* queryHeap; + ID3D12Fence* syncFence; + array frames; + + NodePayload() : commandQueue(nullptr), queryHeap(nullptr), syncFence(nullptr) {} + ~NodePayload(); + }; + vector nodePayloads; + + ID3D12Resource* queryBuffer; + ID3D12Device* device; + + // VSync Stats + DXGI_FRAME_STATISTICS prevFrameStatistics; + + //void UpdateRange(uint32_t start, uint32_t finish) + void InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue); + + void ResolveTimestamps(uint32_t startIndex, uint32_t count); + + void WaitForFrame(uint64_t frameNumber); + + public: + GPUProfilerD3D12(); + ~GPUProfilerD3D12(); + + void InitDevice(ID3D12Device* pDevice, ID3D12CommandQueue** pCommandQueues, uint32_t numCommandQueues); + + void QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp); + + void Flip(IDXGISwapChain* swapChain); + + + // Interface implementation + ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) override; + + void QueryTimestamp(void* context, int64_t* outCpuTimestamp) override + { + QueryTimestamp((ID3D12GraphicsCommandList*)context, outCpuTimestamp); + } + + void Flip(void* swapChain) override + { + Flip(static_cast(swapChain)); + } + }; + + template void SafeRelease(T **ppT) + { + if (*ppT) + { + (*ppT)->Release(); + *ppT = NULL; + } + } + + void InitGpuD3D12(ID3D12Device* device, ID3D12CommandQueue** cmdQueues, uint32_t numQueues) + { + GPUProfilerD3D12* gpuProfiler = Memory::New(); + gpuProfiler->InitDevice(device, cmdQueues, numQueues); + Core::Get().InitGPUProfiler(gpuProfiler); + } + + GPUProfilerD3D12::GPUProfilerD3D12() : queryBuffer(nullptr), device(nullptr) + { + prevFrameStatistics = { 0 }; + } + + GPUProfilerD3D12::~GPUProfilerD3D12() + { + for (NodePayload* payload : nodePayloads) + Memory::Delete(payload); + nodePayloads.clear(); + + for (Node* node : nodes) + Memory::Delete(node); + nodes.clear(); + + SafeRelease(&queryBuffer); + } + + void GPUProfilerD3D12::InitDevice(ID3D12Device* pDevice, ID3D12CommandQueue** pCommandQueues, uint32_t numCommandQueues) + { + device = pDevice; + + uint32_t nodeCount = numCommandQueues; // device->GetNodeCount(); + + nodes.resize(nodeCount); + nodePayloads.resize(nodeCount); + + D3D12_HEAP_PROPERTIES heapDesc; + heapDesc.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapDesc.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapDesc.CreationNodeMask = 0; + heapDesc.VisibleNodeMask = (1u << nodeCount) - 1u; + heapDesc.Type = D3D12_HEAP_TYPE_READBACK; + + D3D12_RESOURCE_DESC resourceDesc; + resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resourceDesc.Alignment = 0; + resourceDesc.Width = MAX_QUERIES_COUNT * sizeof(int64_t); + resourceDesc.Height = 1; + resourceDesc.DepthOrArraySize = 1; + resourceDesc.MipLevels = 1; + resourceDesc.Format = DXGI_FORMAT_UNKNOWN; + resourceDesc.SampleDesc.Count = 1; + resourceDesc.SampleDesc.Quality = 0; + resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + OPTICK_CHECK(device->CreateCommittedResource( + &heapDesc, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&queryBuffer))); + + // Get Device Name + LUID adapterLUID = pDevice->GetAdapterLuid(); + + IDXGIFactory4* factory; + OPTICK_CHECK(CreateDXGIFactory2(0, IID_PPV_ARGS(&factory))); + + IDXGIAdapter1* adapter; + factory->EnumAdapterByLuid(adapterLUID, IID_PPV_ARGS(&adapter)); + + DXGI_ADAPTER_DESC1 desc; + adapter->GetDesc1(&desc); + + adapter->Release(); + factory->Release(); + + char deviceName[128] = { 0 }; + wcstombs_s(deviceName, desc.Description, OPTICK_ARRAY_SIZE(deviceName) - 1); + + for (uint32_t nodeIndex = 0; nodeIndex < nodeCount; ++nodeIndex) + InitNodeInternal(deviceName, nodeIndex, pCommandQueues[nodeIndex]); + } + + void GPUProfilerD3D12::InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue) + { + GPUProfiler::InitNode(nodeName, nodeIndex); + + NodePayload* node = Memory::New(); + nodePayloads[nodeIndex] = node; + node->commandQueue = pCmdQueue; + + D3D12_QUERY_HEAP_DESC queryHeapDesc; + queryHeapDesc.Count = MAX_QUERIES_COUNT; + queryHeapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + queryHeapDesc.NodeMask = 1u << nodeIndex; + OPTICK_CHECK(device->CreateQueryHeap(&queryHeapDesc, IID_PPV_ARGS(&node->queryHeap))); + + OPTICK_CHECK(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&node->syncFence))); + + for (Frame& frame : node->frames) + { + OPTICK_CHECK(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&frame.commandAllocator))); + OPTICK_CHECK(device->CreateCommandList(1u << nodeIndex, D3D12_COMMAND_LIST_TYPE_DIRECT, frame.commandAllocator, nullptr, IID_PPV_ARGS(&frame.commandList))); + OPTICK_CHECK(frame.commandList->Close()); + } + } + + void GPUProfilerD3D12::QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp) + { + if (currentState == STATE_RUNNING) + { + uint32_t index = nodes[currentNode]->QueryTimestamp(outCpuTimestamp); + context->EndQuery(nodePayloads[currentNode]->queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, index); + } + } + + void GPUProfilerD3D12::ResolveTimestamps(uint32_t startIndex, uint32_t count) + { + if (count) + { + Node* node = nodes[currentNode]; + + D3D12_RANGE range = { sizeof(uint64_t)*startIndex, sizeof(uint64_t)*(startIndex + count) }; + void* pData = nullptr; + queryBuffer->Map(0, &range, &pData); + memcpy(&node->queryGpuTimestamps[startIndex], (uint64_t*)pData + startIndex, sizeof(uint64_t) * count); + queryBuffer->Unmap(0, 0); + + // Convert GPU timestamps => CPU Timestamps + for (uint32_t index = startIndex; index < startIndex + count; ++index) + *node->queryCpuTimestamps[index] = node->clock.GetCPUTimestamp(node->queryGpuTimestamps[index]); + } + } + + void GPUProfilerD3D12::WaitForFrame(uint64_t frameNumberToWait) + { + OPTICK_EVENT(); + + NodePayload* payload = nodePayloads[currentNode]; + while (frameNumberToWait > payload->syncFence->GetCompletedValue()) + { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + } + + void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain) + { + OPTICK_CATEGORY("GPUProfilerD3D12::Flip", Category::Debug); + + std::lock_guard lock(updateLock); + + if (currentState == STATE_STARTING) + currentState = STATE_RUNNING; + + if (currentState == STATE_RUNNING) + { + Node& node = *nodes[currentNode]; + NodePayload& payload = *nodePayloads[currentNode]; + + uint32_t currentFrameIndex = frameNumber % NUM_FRAMES_DELAY; + uint32_t nextFrameIndex = (frameNumber + 1) % NUM_FRAMES_DELAY; + + //Frame& currentFrame = frames[frameNumber % NUM_FRAMES_DELAY]; + //Frame& nextFrame = frames[(frameNumber + 1) % NUM_FRAMES_DELAY]; + + QueryFrame& currentFrame = node.queryGpuframes[currentFrameIndex]; + QueryFrame& nextFrame = node.queryGpuframes[nextFrameIndex]; + + ID3D12GraphicsCommandList* commandList = payload.frames[currentFrameIndex].commandList; + ID3D12CommandAllocator* commandAllocator = payload.frames[currentFrameIndex].commandAllocator; + commandAllocator->Reset(); + commandList->Reset(commandAllocator, nullptr); + + if (EventData* frameEvent = currentFrame.frameEvent) + QueryTimestamp(commandList, &frameEvent->finish); + + // Generate GPU Frame event for the next frame + EventData& event = AddFrameEvent(); + QueryTimestamp(commandList, &event.start); + QueryTimestamp(commandList, &AddFrameTag().timestamp); + nextFrame.frameEvent = &event; + + uint32_t queryBegin = currentFrame.queryIndexStart; + uint32_t queryEnd = node.queryIndex; + + if (queryBegin != (uint32_t)-1) + { + OPTICK_ASSERT(queryEnd - queryBegin <= MAX_QUERIES_COUNT, "Too many queries in one frame? Increase GPUProfiler::MAX_QUERIES_COUNT to fix the problem!"); + currentFrame.queryIndexCount = queryEnd - queryBegin; + + uint32_t startIndex = queryBegin % MAX_QUERIES_COUNT; + uint32_t finishIndex = queryEnd % MAX_QUERIES_COUNT; + + if (startIndex < finishIndex) + { + commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, startIndex, queryEnd - queryBegin, queryBuffer, startIndex * sizeof(int64_t)); + } + else + { + commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, startIndex, MAX_QUERIES_COUNT - startIndex, queryBuffer, startIndex * sizeof(int64_t)); + commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, finishIndex, queryBuffer, 0); + } + } + + commandList->Close(); + + payload.commandQueue->ExecuteCommandLists(1, (ID3D12CommandList*const*)&commandList); + payload.commandQueue->Signal(payload.syncFence, frameNumber); + + // Preparing Next Frame + // Try resolve timestamps for the current frame + if (frameNumber >= NUM_FRAMES_DELAY && nextFrame.queryIndexCount) + { + WaitForFrame(frameNumber + 1 - NUM_FRAMES_DELAY); + + uint32_t resolveStart = nextFrame.queryIndexStart % MAX_QUERIES_COUNT; + uint32_t resolveFinish = resolveStart + nextFrame.queryIndexCount; + ResolveTimestamps(resolveStart, std::min(resolveFinish, MAX_QUERIES_COUNT) - resolveStart); + if (resolveFinish > MAX_QUERIES_COUNT) + ResolveTimestamps(0, resolveFinish - MAX_QUERIES_COUNT); + } + + nextFrame.queryIndexStart = queryEnd; + nextFrame.queryIndexCount = 0; + + // Process VSync + DXGI_FRAME_STATISTICS currentFrameStatistics = { 0 }; + HRESULT result = swapChain->GetFrameStatistics(¤tFrameStatistics); + if ((result == S_OK) && (prevFrameStatistics.PresentCount + 1 == currentFrameStatistics.PresentCount)) + { + EventData& data = AddVSyncEvent(); + data.start = prevFrameStatistics.SyncQPCTime.QuadPart; + data.finish = currentFrameStatistics.SyncQPCTime.QuadPart; + } + prevFrameStatistics = currentFrameStatistics; + } + + ++frameNumber; + } + + GPUProfiler::ClockSynchronization GPUProfilerD3D12::GetClockSynchronization(uint32_t nodeIndex) + { + ClockSynchronization clock; + clock.frequencyCPU = GetHighPrecisionFrequency(); + nodePayloads[nodeIndex]->commandQueue->GetTimestampFrequency((uint64_t*)&clock.frequencyGPU); + nodePayloads[nodeIndex]->commandQueue->GetClockCalibration((uint64_t*)&clock.timestampGPU, (uint64_t*)&clock.timestampCPU); + return clock; + } + + GPUProfilerD3D12::NodePayload::~NodePayload() + { + SafeRelease(&queryHeap); + SafeRelease(&syncFence); + } + + void GPUProfilerD3D12::Frame::Shutdown() + { + SafeRelease(&commandAllocator); + SafeRelease(&commandList); + } +} + +#else +#include "optick_common.h" + +namespace Optick +{ + void InitGpuD3D12(ID3D12Device* /*device*/, ID3D12CommandQueue** /*cmdQueues*/, uint32_t /*numQueues*/) + { + OPTICK_FAILED("OPTICK_ENABLE_GPU_D3D12 is disabled! Can't initialize GPU Profiler!"); + } +} + +#endif //OPTICK_ENABLE_GPU_D3D12 +#endif //USE_OPTICK \ No newline at end of file diff --git a/neo/libs/optick/optick_gpu.h b/neo/libs/optick/optick_gpu.h new file mode 100644 index 00000000..747bdc77 --- /dev/null +++ b/neo/libs/optick/optick_gpu.h @@ -0,0 +1,151 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#include "optick.config.h" + +#if USE_OPTICK + +#include +#include + +#include "optick_common.h" +#include "optick_memory.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +namespace Optick +{ + const char* GetGPUQueueName(GPUQueueType queue); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + class GPUProfiler + { + public: + static const int MAX_FRAME_EVENTS = 1024; + static const int NUM_FRAMES_DELAY = 4; + static const int MAX_QUERIES_COUNT = (2 * MAX_FRAME_EVENTS) * NUM_FRAMES_DELAY; + protected: + + enum State + { + STATE_OFF, + STATE_STARTING, + STATE_RUNNING, + STATE_FINISHING, + }; + + struct ClockSynchronization + { + int64_t frequencyCPU; + int64_t frequencyGPU; + int64_t timestampCPU; + int64_t timestampGPU; + + int64_t GetCPUTimestamp(int64_t gpuTimestamp) + { + return timestampCPU + (gpuTimestamp - timestampGPU) * frequencyCPU / frequencyGPU; + } + + ClockSynchronization() : frequencyCPU(0), frequencyGPU(0), timestampCPU(0), timestampGPU(0) {} + }; + + struct QueryFrame + { + EventData* frameEvent; + uint32_t queryIndexStart; + uint32_t queryIndexCount; + + QueryFrame() + { + Reset(); + } + + void Reset() + { + frameEvent = nullptr; + queryIndexStart = (uint32_t)-1; + queryIndexCount = 0; + } + }; + + struct Node + { + array queryGpuframes; + array queryGpuTimestamps; + array queryCpuTimestamps; + std::atomic queryIndex; + + ClockSynchronization clock; + + array gpuEventStorage; + + uint32_t QueryTimestamp(int64_t* outCpuTimestamp) + { + uint32_t index = queryIndex.fetch_add(1) % MAX_QUERIES_COUNT; + queryCpuTimestamps[index] = outCpuTimestamp; + return index; + } + + string name; + + void Reset(); + + Node() : queryIndex(0) { gpuEventStorage.fill(nullptr); } + }; + + std::recursive_mutex updateLock; + volatile State currentState; + + vector nodes; + uint32_t currentNode; + + uint32_t frameNumber; + + void Reset(); + + EventData& AddFrameEvent(); + EventData& AddVSyncEvent(); + TagData& AddFrameTag(); + + public: + GPUProfiler(); + + // Init + virtual void InitNode(const char* nodeName, uint32_t nodeIndex); + + // Capture Controls + virtual void Start(uint32 mode); + virtual void Stop(uint32 mode); + virtual void Dump(uint32 mode); + + virtual string GetName() const; + + // Interface to implement + virtual ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) = 0; + virtual void QueryTimestamp(void* context, int64_t* cpuTimestampOut) = 0; + virtual void Flip(void* swapChain) = 0; + + virtual ~GPUProfiler(); + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + +#endif //USE_OPTICK diff --git a/neo/libs/optick/optick_gpu.vulkan.cpp b/neo/libs/optick/optick_gpu.vulkan.cpp new file mode 100644 index 00000000..ba46cfb2 --- /dev/null +++ b/neo/libs/optick/optick_gpu.vulkan.cpp @@ -0,0 +1,422 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "optick.config.h" + +#if USE_OPTICK +#if OPTICK_ENABLE_GPU_VULKAN +#include + +#include "optick_core.h" +#include "optick_gpu.h" + +#define OPTICK_VK_CHECK(args) do { VkResult __hr = args; OPTICK_ASSERT(__hr == VK_SUCCESS, "Failed check"); (void)__hr; } while(false); + +namespace Optick +{ + class GPUProfilerVulkan : public GPUProfiler + { + private: + VulkanFunctions vulkanFunctions = {}; + + protected: + struct Frame + { + VkCommandBuffer commandBuffer; + VkFence fence; + Frame() : commandBuffer(VK_NULL_HANDLE), fence(VK_NULL_HANDLE) {} + }; + + struct NodePayload + { + VulkanFunctions* vulkanFunctions; + VkDevice device; + VkPhysicalDevice physicalDevice; + VkQueue queue; + VkQueryPool queryPool; + VkCommandPool commandPool; + + array frames; + + NodePayload() : vulkanFunctions(), device(VK_NULL_HANDLE), physicalDevice(VK_NULL_HANDLE), queue(VK_NULL_HANDLE), queryPool(VK_NULL_HANDLE), commandPool(VK_NULL_HANDLE) {} + ~NodePayload(); + }; + vector nodePayloads; + + void ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count); + void WaitForFrame(uint64_t frameNumber); + + public: + GPUProfilerVulkan(); + ~GPUProfilerVulkan(); + + void InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions); + void QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp); + + + // Interface implementation + ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) override; + + void QueryTimestamp(void* context, int64_t* outCpuTimestamp) override + { + QueryTimestamp((VkCommandBuffer)context, outCpuTimestamp); + } + + void Flip(void* swapChain) override; + }; + + void InitGpuVulkan(VkDevice* vkDevices, VkPhysicalDevice* vkPhysicalDevices, VkQueue* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues, const VulkanFunctions* functions) + { + GPUProfilerVulkan* gpuProfiler = Memory::New(); + gpuProfiler->InitDevice(vkDevices, vkPhysicalDevices, vkQueues, cmdQueuesFamily, numQueues, functions); + Core::Get().InitGPUProfiler(gpuProfiler); + } + + GPUProfilerVulkan::GPUProfilerVulkan() + { + } + + void GPUProfilerVulkan::InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions) + { + if (functions != nullptr) + { + vulkanFunctions = *functions; + } + else + { + vulkanFunctions = { + vkGetPhysicalDeviceProperties, + (PFN_vkCreateQueryPool_)vkCreateQueryPool, + (PFN_vkCreateCommandPool_)vkCreateCommandPool, + (PFN_vkAllocateCommandBuffers_)vkAllocateCommandBuffers, + (PFN_vkCreateFence_)vkCreateFence, + vkCmdResetQueryPool, + (PFN_vkQueueSubmit_)vkQueueSubmit, + (PFN_vkWaitForFences_)vkWaitForFences, + (PFN_vkResetCommandBuffer_)vkResetCommandBuffer, + (PFN_vkCmdWriteTimestamp_)vkCmdWriteTimestamp, + (PFN_vkGetQueryPoolResults_)vkGetQueryPoolResults, + (PFN_vkBeginCommandBuffer_)vkBeginCommandBuffer, + (PFN_vkEndCommandBuffer_)vkEndCommandBuffer, + (PFN_vkResetFences_)vkResetFences, + vkDestroyCommandPool, + vkDestroyQueryPool, + vkDestroyFence, + vkFreeCommandBuffers, + }; + } + + VkQueryPoolCreateInfo queryPoolCreateInfo; + queryPoolCreateInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + queryPoolCreateInfo.pNext = 0; + queryPoolCreateInfo.flags = 0; + queryPoolCreateInfo.queryType = VK_QUERY_TYPE_TIMESTAMP; + queryPoolCreateInfo.queryCount = MAX_QUERIES_COUNT + 1; + + VkCommandPoolCreateInfo commandPoolCreateInfo; + commandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + commandPoolCreateInfo.pNext = 0; + commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + + nodes.resize(nodeCount); + nodePayloads.resize(nodeCount); + + VkResult r; + for (uint32_t i = 0; i < nodeCount; ++i) + { + VkPhysicalDeviceProperties properties = { 0 }; + (*vulkanFunctions.vkGetPhysicalDeviceProperties)(physicalDevices[i], &properties); + GPUProfiler::InitNode(properties.deviceName, i); + + NodePayload* nodePayload = Memory::New(); + nodePayloads[i] = nodePayload; + nodePayload->vulkanFunctions = &vulkanFunctions; + nodePayload->device = devices[i]; + nodePayload->physicalDevice = physicalDevices[i]; + nodePayload->queue = cmdQueues[i]; + + r = (VkResult)(*vulkanFunctions.vkCreateQueryPool)(devices[i], &queryPoolCreateInfo, 0, &nodePayload->queryPool); + OPTICK_ASSERT(r == VK_SUCCESS, "Failed"); + (void)r; + + commandPoolCreateInfo.queueFamilyIndex = cmdQueuesFamily[i]; + r = (VkResult)(*vulkanFunctions.vkCreateCommandPool)(nodePayload->device, &commandPoolCreateInfo, 0, &nodePayload->commandPool); + OPTICK_ASSERT(r == VK_SUCCESS, "Failed"); + (void)r; + + for (uint32_t j = 0; j < nodePayload->frames.size(); ++j) + { + Frame& frame = nodePayload->frames[j]; + + VkCommandBufferAllocateInfo allocInfo; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.pNext = 0; + allocInfo.commandBufferCount = 1; + allocInfo.commandPool = nodePayload->commandPool; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + r = (VkResult)(*vulkanFunctions.vkAllocateCommandBuffers)(nodePayload->device, &allocInfo, &frame.commandBuffer); + OPTICK_ASSERT(r == VK_SUCCESS, "Failed"); + (void)r; + + VkFenceCreateInfo fenceCreateInfo; + fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fenceCreateInfo.pNext = 0; + fenceCreateInfo.flags = j == 0 ? 0 : VK_FENCE_CREATE_SIGNALED_BIT; + r = (VkResult)(*vulkanFunctions.vkCreateFence)(nodePayload->device, &fenceCreateInfo, 0, &frame.fence); + OPTICK_ASSERT(r == VK_SUCCESS, "Failed"); + (void)r; + if (j == 0) + { + VkCommandBufferBeginInfo commandBufferBeginInfo; + commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + commandBufferBeginInfo.pNext = 0; + commandBufferBeginInfo.pInheritanceInfo = 0; + commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + (*vulkanFunctions.vkBeginCommandBuffer)(frame.commandBuffer, &commandBufferBeginInfo); + (*vulkanFunctions.vkCmdResetQueryPool)(frame.commandBuffer, nodePayload->queryPool, 0, MAX_QUERIES_COUNT); + (*vulkanFunctions.vkEndCommandBuffer)(frame.commandBuffer); + + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.pNext = nullptr; + submitInfo.waitSemaphoreCount = 0; + submitInfo.pWaitSemaphores = nullptr; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &frame.commandBuffer; + submitInfo.signalSemaphoreCount = 0; + submitInfo.pSignalSemaphores = nullptr; + (*vulkanFunctions.vkQueueSubmit)(nodePayload->queue, 1, &submitInfo, frame.fence); + (*vulkanFunctions.vkWaitForFences)(nodePayload->device, 1, &frame.fence, 1, (uint64_t)-1); + (*vulkanFunctions.vkResetCommandBuffer)(frame.commandBuffer, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT); + } + } + } + } + + void GPUProfilerVulkan::QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp) + { + if (currentState == STATE_RUNNING) + { + uint32_t index = nodes[currentNode]->QueryTimestamp(outCpuTimestamp); + (*vulkanFunctions.vkCmdWriteTimestamp)(commandBuffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[currentNode]->queryPool, index); + } + } + + void GPUProfilerVulkan::ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count) + { + if (count) + { + Node* node = nodes[currentNode]; + + NodePayload* payload = nodePayloads[currentNode]; + + OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkGetQueryPoolResults)(payload->device, payload->queryPool, startIndex, count, 8 * count, &nodes[currentNode]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT)); + (*vulkanFunctions.vkCmdResetQueryPool)(commandBuffer, payload->queryPool, startIndex, count); + + // Convert GPU timestamps => CPU Timestamps + for (uint32_t index = startIndex; index < startIndex + count; ++index) + *node->queryCpuTimestamps[index] = node->clock.GetCPUTimestamp(node->queryGpuTimestamps[index]); + } + } + + void GPUProfilerVulkan::WaitForFrame(uint64_t frameNumberToWait) + { + OPTICK_EVENT(); + + int r = VK_SUCCESS; + do + { + NodePayload& payload = *nodePayloads[currentNode]; + r = (*vulkanFunctions.vkWaitForFences)(nodePayloads[currentNode]->device, 1, &payload.frames[frameNumberToWait % payload.frames.size()].fence, 1, 1000 * 30); + } while (r != VK_SUCCESS); + } + + void GPUProfilerVulkan::Flip(void* /*swapChain*/) + { + OPTICK_CATEGORY("GPUProfilerVulkan::Flip", Category::Debug); + + std::lock_guard lock(updateLock); + + if (currentState == STATE_STARTING) + currentState = STATE_RUNNING; + + if (currentState == STATE_RUNNING) + { + Node& node = *nodes[currentNode]; + NodePayload& payload = *nodePayloads[currentNode]; + + uint32_t currentFrameIndex = frameNumber % NUM_FRAMES_DELAY; + uint32_t nextFrameIndex = (frameNumber + 1) % NUM_FRAMES_DELAY; + + QueryFrame& currentFrame = node.queryGpuframes[currentFrameIndex]; + QueryFrame& nextFrame = node.queryGpuframes[nextFrameIndex]; + + VkCommandBuffer commandBuffer = payload.frames[currentFrameIndex].commandBuffer; + VkFence fence = payload.frames[currentFrameIndex].fence; + VkDevice device = payload.device; + VkQueue queue = payload.queue; + + (*vulkanFunctions.vkWaitForFences)(device, 1, &fence, 1, (uint64_t)-1); + + VkCommandBufferBeginInfo commandBufferBeginInfo; + commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + commandBufferBeginInfo.pNext = 0; + commandBufferBeginInfo.pInheritanceInfo = 0; + commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkBeginCommandBuffer)(commandBuffer, &commandBufferBeginInfo)); + (*vulkanFunctions.vkResetFences)(device, 1, &fence); + + if (EventData* frameEvent = currentFrame.frameEvent) + QueryTimestamp(commandBuffer, &frameEvent->finish); + + // Generate GPU Frame event for the next frame + EventData& event = AddFrameEvent(); + QueryTimestamp(commandBuffer, &event.start); + QueryTimestamp(commandBuffer, &AddFrameTag().timestamp); + nextFrame.frameEvent = &event; + + OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkEndCommandBuffer)(commandBuffer)); + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.pNext = nullptr; + submitInfo.waitSemaphoreCount = 0; + submitInfo.pWaitSemaphores = nullptr; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &commandBuffer; + submitInfo.signalSemaphoreCount = 0; + submitInfo.pSignalSemaphores = nullptr; + OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkQueueSubmit)(queue, 1, &submitInfo, fence)); + + uint32_t queryBegin = currentFrame.queryIndexStart; + uint32_t queryEnd = node.queryIndex; + + if (queryBegin != (uint32_t)-1) + { + currentFrame.queryIndexCount = queryEnd - queryBegin; + } + + // Preparing Next Frame + // Try resolve timestamps for the current frame + if (nextFrame.queryIndexStart != (uint32_t)-1) + { + uint32_t startIndex = nextFrame.queryIndexStart % MAX_QUERIES_COUNT; + uint32_t finishIndex = (startIndex + nextFrame.queryIndexCount) % MAX_QUERIES_COUNT; + + if (startIndex < finishIndex) + { + ResolveTimestamps(commandBuffer, startIndex, finishIndex - startIndex); + } + else if (startIndex > finishIndex) + { + ResolveTimestamps(commandBuffer, startIndex, MAX_QUERIES_COUNT - startIndex); + ResolveTimestamps(commandBuffer, 0, finishIndex); + } + } + + nextFrame.queryIndexStart = queryEnd; + nextFrame.queryIndexCount = 0; + } + + ++frameNumber; + } + + GPUProfiler::ClockSynchronization GPUProfilerVulkan::GetClockSynchronization(uint32_t nodeIndex) + { + GPUProfiler::ClockSynchronization clock; + + NodePayload& node = *nodePayloads[nodeIndex]; + Frame& currentFrame = node.frames[frameNumber % NUM_FRAMES_DELAY]; + + VkCommandBufferBeginInfo commandBufferBeginInfo; + commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + commandBufferBeginInfo.pNext = 0; + commandBufferBeginInfo.pInheritanceInfo = 0; + commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + VkCommandBuffer CB = currentFrame.commandBuffer; + VkDevice Device = node.device; + VkFence Fence = currentFrame.fence; + + (*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1); + (*vulkanFunctions.vkResetFences)(Device, 1, &Fence); + (*vulkanFunctions.vkResetCommandBuffer)(CB, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT); + (*vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo); + (*vulkanFunctions.vkCmdResetQueryPool)(CB, nodePayloads[nodeIndex]->queryPool, 0, 1); + (*vulkanFunctions.vkCmdWriteTimestamp)(CB, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[nodeIndex]->queryPool, 0); + (*vulkanFunctions.vkEndCommandBuffer)(CB); + + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.pNext = nullptr; + submitInfo.waitSemaphoreCount = 0; + submitInfo.pWaitSemaphores = nullptr; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &CB; + submitInfo.signalSemaphoreCount = 0; + submitInfo.pSignalSemaphores = nullptr; + (*vulkanFunctions.vkQueueSubmit)(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence); + (*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1); + + clock.timestampGPU = 0; + (*vulkanFunctions.vkGetQueryPoolResults)(Device, nodePayloads[nodeIndex]->queryPool, 0, 1, 8, &clock.timestampGPU, 8, VK_QUERY_RESULT_64_BIT); + clock.timestampCPU = GetHighPrecisionTime(); + clock.frequencyCPU = GetHighPrecisionFrequency(); + + VkPhysicalDeviceProperties Properties; + (*vulkanFunctions.vkGetPhysicalDeviceProperties)(nodePayloads[nodeIndex]->physicalDevice, &Properties); + clock.frequencyGPU = (uint64_t)(1000000000ll / Properties.limits.timestampPeriod); + + return clock; + } + + GPUProfilerVulkan::NodePayload::~NodePayload() + { + (*vulkanFunctions->vkDestroyCommandPool)(device, commandPool, nullptr); + (*vulkanFunctions->vkDestroyQueryPool)(device, queryPool, nullptr); + } + + GPUProfilerVulkan::~GPUProfilerVulkan() + { + for (NodePayload* payload : nodePayloads) + { + for (Frame& frame : payload->frames) + { + (*vulkanFunctions.vkDestroyFence)(payload->device, frame.fence, nullptr); + (*vulkanFunctions.vkFreeCommandBuffers)(payload->device, payload->commandPool, 1, &frame.commandBuffer); + } + + Memory::Delete(payload); + } + + nodePayloads.clear(); + } +} +#else +#include "optick_common.h" +namespace Optick +{ + void InitGpuVulkan(VkDevice* /*vkDevices*/, VkPhysicalDevice* /*vkPhysicalDevices*/, VkQueue* /*vkQueues*/, uint32_t* /*cmdQueuesFamily*/, uint32_t /*numQueues*/, const VulkanFunctions* /*functions*/) + { + OPTICK_FAILED("OPTICK_ENABLE_GPU_VULKAN is disabled! Can't initialize GPU Profiler!"); + } +} +#endif //OPTICK_ENABLE_GPU_D3D12 +#endif //USE_OPTICK \ No newline at end of file diff --git a/neo/libs/optick/optick_memory.h b/neo/libs/optick/optick_memory.h new file mode 100644 index 00000000..14aadd19 --- /dev/null +++ b/neo/libs/optick/optick_memory.h @@ -0,0 +1,470 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "optick_common.h" + +#if USE_OPTICK + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace Optick +{ + class Memory + { + struct Header + { + uint64_t size; + }; + + #if defined(OPTICK_32BIT) + static std::atomic memAllocated; + #else + static std::atomic memAllocated; + #endif + + static void* (*allocate)(size_t); + static void (*deallocate)(void*); + static void (*initThread)(void); + public: + static OPTICK_INLINE void* Alloc(size_t size) + { + size_t totalSize = size + sizeof(Header); + void *ptr = allocate(totalSize); + OPTICK_VERIFY(ptr, "Can't allocate memory", return nullptr); + + Header* header = (Header*)ptr; + header->size = totalSize; + memAllocated += totalSize; + + return (uint8_t*)ptr + sizeof(Header); + } + + static OPTICK_INLINE void Free(void* p) + { + if (p != nullptr) + { + uint8_t* basePtr = (uint8_t*)p - sizeof(Header); + Header* header = (Header*)basePtr; + memAllocated -= header->size; + deallocate(basePtr); + } + } + + static OPTICK_INLINE size_t GetAllocatedSize() + { + return (size_t)memAllocated; + } + + template + static T* New() + { + return new (Memory::Alloc(sizeof(T))) T(); + } + + template + static T* New(P1 p1) + { + return new (Memory::Alloc(sizeof(T))) T(p1); + } + + template + static T* New(P1 p1, P2 p2) + { + return new (Memory::Alloc(sizeof(T))) T(p1, p2); + } + + template + static void Delete(T* p) + { + if (p) + { + p->~T(); + Free(p); + } + } + + static void SetAllocator(AllocateFn allocateFn, DeallocateFn deallocateFn, InitThreadCb initThreadCb) + { + allocate = allocateFn; + deallocate = deallocateFn; + initThread = initThreadCb; + } + + static void InitThread() + { + if (initThread != nullptr) + initThread(); + } + + template + struct Allocator : public std::allocator + { + Allocator() {} + template + Allocator(const Allocator&) {} + template struct rebind { typedef Allocator other; }; + + typename std::allocator::value_type* allocate(typename std::allocator::size_type n) + { + return reinterpret_cast::value_type*>(Memory::Alloc(n * sizeof(T))); + } + + typename std::allocator::value_type* allocate(typename std::allocator::size_type n, const typename std::allocator::value_type*) + { + return reinterpret_cast::value_type*>(Memory::Alloc(n * sizeof(T))); + } + + void deallocate(typename std::allocator::value_type* p, typename std::allocator::size_type) + { + Memory::Free(p); + } + }; + }; + + // std::* section + template class array : public std::array{}; + template class vector : public std::vector>{}; + template class list : public std::list>{}; + template class unordered_set : public std::unordered_set, std::equal_to, Memory::Allocator>{}; + template class unordered_map : public std::unordered_map, std::equal_to, Memory::Allocator>>{}; + + using string = std::basic_string, Memory::Allocator>; + using wstring = std::basic_string, Memory::Allocator>; + + using istringstream = std::basic_istringstream, Memory::Allocator>; + using ostringstream = std::basic_ostringstream, Memory::Allocator>; + using stringstream = std::basic_stringstream, Memory::Allocator>; + + using fstream = std::basic_fstream>; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + template + struct MemoryChunk + { + T data[SIZE]; + MemoryChunk* next; + MemoryChunk* prev; + + MemoryChunk() : next(0), prev(0) {} + + ~MemoryChunk() + { + MemoryChunk* chunk = this; + while (chunk->next) + chunk = chunk->next; + + while (chunk != this) + { + MemoryChunk* toDelete = chunk; + chunk = toDelete->prev; + Memory::Delete(toDelete); + } + + if (prev != nullptr) + { + prev->next = nullptr; + prev = nullptr; + } + } + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + template + class MemoryPool + { + typedef MemoryChunk Chunk; + Chunk* root; + Chunk* chunk; + uint32 index; + + OPTICK_INLINE void AddChunk() + { + index = 0; + if (!chunk || !chunk->next) + { + Chunk* newChunk = Memory::New(); + if (chunk) + { + chunk->next = newChunk; + newChunk->prev = chunk; + chunk = newChunk; + } + else + { + root = chunk = newChunk; + } + } + else + { + chunk = chunk->next; + } + } + public: + MemoryPool() : root(nullptr), chunk(nullptr), index(SIZE) {} + + OPTICK_INLINE T& Add() + { + if (index >= SIZE) + AddChunk(); + + return chunk->data[index++]; + } + + OPTICK_INLINE T& Add(const T& item) + { + return Add() = item; + } + + OPTICK_INLINE T* AddRange(const T* items, size_t count, bool allowOverlap = true) + { + if (count == 0 || (count > SIZE && !allowOverlap)) + return nullptr; + + if (count >= (SIZE - index) && !allowOverlap) + { + AddChunk(); + } + + T* result = &chunk->data[index]; + + while (count) + { + size_t numLeft = SIZE - index; + size_t numCopy = numLeft < count ? numLeft : count; + std::memcpy(&chunk->data[index], items, sizeof(T) * numCopy); + + count -= numCopy; + items += numCopy; + index += (uint32_t)numCopy; + + if (count) + AddChunk(); + } + + return result; + } + + + OPTICK_INLINE T* TryAdd(int count) + { + if (index + count <= SIZE) + { + T* res = &chunk->data[index]; + index += count; + return res; + } + + return nullptr; + } + + OPTICK_INLINE T* Back() + { + if (chunk && index > 0) + return &chunk->data[index - 1]; + + if (chunk && chunk->prev != nullptr) + return &chunk->prev->data[SIZE - 1]; + + return nullptr; + } + + OPTICK_INLINE T* Front() + { + return !IsEmpty() ? &root->data[0] : nullptr; + } + + OPTICK_INLINE size_t Size() const + { + if (root == nullptr) + return 0; + + size_t count = 0; + + for (const Chunk* it = root; it != chunk; it = it->next) + count += SIZE; + + return count + index; + } + + OPTICK_INLINE bool IsEmpty() const + { + return (chunk == nullptr) || (chunk == root && index == 0); + } + + OPTICK_INLINE void Clear(bool preserveMemory = true) + { + if (!preserveMemory) + { + if (root) + { + Memory::Delete(root); + root = nullptr; + chunk = nullptr; + index = SIZE; + } + } + else if (root) + { + index = 0; + chunk = root; + } + } + + class const_iterator + { + void advance() + { + if (chunkIndex < SIZE - 1) + { + ++chunkIndex; + } + else + { + chunkPtr = chunkPtr->next; + chunkIndex = 0; + } + } + public: + typedef const_iterator self_type; + typedef T value_type; + typedef T& reference; + typedef T* pointer; + typedef int difference_type; + const_iterator(const Chunk* ptr, size_t index) : chunkPtr(ptr), chunkIndex(index) { } + self_type operator++() + { + self_type i = *this; + advance(); + return i; + } + self_type operator++(int /*junk*/) + { + advance(); + return *this; + } + reference operator*() { return (reference)chunkPtr->data[chunkIndex]; } + pointer operator->() { return &chunkPtr->data[chunkIndex]; } + bool operator==(const self_type& rhs) const { return (chunkPtr == rhs.chunkPtr) && (chunkIndex == rhs.chunkIndex); } + bool operator!=(const self_type& rhs) const { return (chunkPtr != rhs.chunkPtr) || (chunkIndex != rhs.chunkIndex); } + private: + const Chunk* chunkPtr; + size_t chunkIndex; + }; + + const_iterator begin() const + { + return const_iterator(root, root ? 0 : SIZE); + } + + const_iterator end() const + { + return const_iterator(chunk, index); + } + + template + void ForEach(Func func) const + { + for (const Chunk* it = root; it != chunk; it = it->next) + for (uint32 i = 0; i < SIZE; ++i) + func(it->data[i]); + + if (chunk) + for (uint32 i = 0; i < index; ++i) + func(chunk->data[i]); + } + + template + void ForEach(Func func) + { + for (Chunk* it = root; it != chunk; it = it->next) + for (uint32 i = 0; i < SIZE; ++i) + func(it->data[i]); + + if (chunk) + for (uint32 i = 0; i < index; ++i) + func(chunk->data[i]); + } + + template + void ForEachChunk(Func func) const + { + for (const Chunk* it = root; it != chunk; it = it->next) + func(it->data, SIZE); + + if (chunk) + func(chunk->data, index); + } + + void ToArray(T* destination) const + { + uint32 curIndex = 0; + + for (const Chunk* it = root; it != chunk; it = it->next) + { + memcpy(&destination[curIndex], it->data, sizeof(T) * SIZE); + curIndex += SIZE; + } + + if (chunk && index > 0) + { + memcpy(&destination[curIndex], chunk->data, sizeof(T) * index); + } + } + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + template + class MemoryBuffer : private MemoryPool + { + public: + template + U* Add(U* data, size_t size, bool allowOverlap = true) + { + return (U*)(MemoryPool::AddRange((uint8*)data, size, allowOverlap)); + } + + template + T* Add(const T& val, bool allowOverlap = true) + { + return static_cast(Add(&val, sizeof(T), allowOverlap)); + } + + void Clear(bool preserveMemory) + { + MemoryPool::Clear(preserveMemory); + } + }; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + +#endif //USE_OPTICK \ No newline at end of file diff --git a/neo/libs/optick/optick_message.cpp b/neo/libs/optick/optick_message.cpp new file mode 100644 index 00000000..1f33ef44 --- /dev/null +++ b/neo/libs/optick/optick_message.cpp @@ -0,0 +1,195 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "optick_message.h" + +#if USE_OPTICK +#include "optick_common.h" +#include "optick_core.h" +#include "optick_server.h" + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct MessageHeader +{ + uint32 mark; + uint32 length; + + static const uint32 MESSAGE_MARK = 0xB50FB50F; + + bool IsValid() const { return mark == MESSAGE_MARK; } + + MessageHeader() : mark(0), length(0) {} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class MessageFactory +{ + typedef IMessage* (*MessageCreateFunction)(InputDataStream& str); + MessageCreateFunction factory[IMessage::COUNT]; + + template + void RegisterMessage() + { + factory[T::GetMessageType()] = T::Create; + } + + MessageFactory() + { + memset(&factory[0], 0, sizeof(MessageCreateFunction)); + + RegisterMessage(); + RegisterMessage(); + RegisterMessage(); + RegisterMessage(); + + for (uint32 msg = 0; msg < IMessage::COUNT; ++msg) + { + OPTICK_ASSERT(factory[msg] != nullptr, "Message is not registered to factory"); + } + } +public: + static MessageFactory& Get() + { + static MessageFactory instance; + return instance; + } + + IMessage* Create(InputDataStream& str) + { + MessageHeader header; + str.Read(header); + + size_t length = str.Length(); + + uint16 applicationID = 0; + uint16 messageType = IMessage::COUNT; + + str >> applicationID; + str >> messageType; + + OPTICK_VERIFY( messageType < IMessage::COUNT && factory[messageType] != nullptr, "Unknown message type!", return nullptr ) + + IMessage* result = factory[messageType](str); + + if (header.length + str.Length() != length) + { + OPTICK_FAILED("Message Stream is corrupted! Invalid Protocol?") + return nullptr; + } + + return result; + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator<<(OutputDataStream& os, const DataResponse& val) +{ + return os << val.version << (uint32)val.type; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +IMessage* IMessage::Create(InputDataStream& str) +{ + MessageHeader header; + + while (str.Peek(header)) + { + if (header.IsValid()) + { + if (str.Length() < header.length + sizeof(MessageHeader)) + break; // Not enough data yet + + return MessageFactory::Get().Create(str); + } + else + { + // Some garbage in the stream? + str.Skip(1); + } + } + + return nullptr; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void StartMessage::Apply() +{ + Core& core = Core::Get(); + core.SetSettings(settings); + core.StartCapture(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +IMessage* StartMessage::Create(InputDataStream& stream) +{ + StartMessage* msg = Memory::New(); + CaptureSettings& settings = msg->settings; + stream >> settings.mode + >> settings.categoryMask + >> settings.samplingFrequency + >> settings.frameLimit + >> settings.timeLimitUs + >> settings.spikeLimitUs + >> settings.memoryLimitMb + >> settings.password; + + if (!settings.password.empty()) + settings.password = base64_decode(settings.password); + + return msg; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void StopMessage::Apply() +{ + Core::Get().DumpCapture(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +IMessage* StopMessage::Create(InputDataStream&) +{ + return Memory::New(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void CancelMessage::Apply() +{ + Core::Get().CancelCapture(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +IMessage* CancelMessage::Create(InputDataStream&) +{ + return Memory::New(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +IMessage* TurnSamplingMessage::Create( InputDataStream& stream ) +{ + TurnSamplingMessage* msg = Memory::New(); + stream >> msg->index; + stream >> msg->isSampling; + return msg; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void TurnSamplingMessage::Apply() +{ + // Backward compatibility +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + +#endif //USE_OPTICK \ No newline at end of file diff --git a/neo/libs/optick/optick_message.h b/neo/libs/optick/optick_message.h new file mode 100644 index 00000000..32cdd089 --- /dev/null +++ b/neo/libs/optick/optick_message.h @@ -0,0 +1,153 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#include "optick.config.h" + +#if USE_OPTICK + +#include "optick_common.h" +#include "optick_serialization.h" + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +static const uint32 NETWORK_PROTOCOL_VERSION = 26; +static const uint16 NETWORK_APPLICATION_ID = 0xB50F; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct DataResponse +{ + enum Type : uint16 + { + FrameDescriptionBoard = 0, // DescriptionBoard for Instrumental Frames + EventFrame = 1, // Instrumental Data + SamplingFrame = 2, // Sampling Data + NullFrame = 3, // Last Fame Mark + ReportProgress = 4, // Report Current Progress + Handshake = 5, // Handshake Response + Reserved_0 = 6, + SynchronizationData = 7, // Synchronization Data for the thread + TagsPack = 8, // Pack of tags + CallstackDescriptionBoard = 9, // DescriptionBoard with resolved function addresses + CallstackPack = 10, // Pack of CallStacks + Reserved_1 = 11, + Reserved_2 = 12, + Reserved_3 = 13, + Reserved_4 = 14, + //... + Reserved_255 = 255, + + FiberSynchronizationData = 1 << 8, // Synchronization Data for the Fibers + SyscallPack, + SummaryPack, + FramesPack, + }; + + uint32 version; + uint32 size; + Type type; + uint16 application; + + DataResponse(Type t, uint32 s) : version(NETWORK_PROTOCOL_VERSION), size(s), type(t), application(NETWORK_APPLICATION_ID){} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +OutputDataStream& operator << (OutputDataStream& os, const DataResponse& val); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class IMessage +{ +public: + enum Type : uint16 + { + Start, + Stop, + Cancel, + TurnSampling, + COUNT, + }; + + virtual void Apply() = 0; + virtual ~IMessage() {} + + static IMessage* Create( InputDataStream& str ); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +template +class Message : public IMessage +{ + enum { id = MESSAGE_TYPE }; +public: + static uint32 GetMessageType() { return id; } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct CaptureSettings +{ + // Capture Mode + uint32 mode; + // Category Filter + uint32 categoryMask; + // Tracer: Sampling Frequency + uint32 samplingFrequency; + // Max Duration for a capture (frames) + uint32 frameLimit; + // Max Duration for a capture (us) + uint32 timeLimitUs; + // Max Duration for a capture (us) + uint32 spikeLimitUs; + // Max Memory for a capture (MB) + uint64 memoryLimitMb; + // Tracer: Root Password for the Device + string password; + + CaptureSettings() : mode(0), categoryMask(0), samplingFrequency(0), frameLimit(0), timeLimitUs(0), spikeLimitUs(0), memoryLimitMb(0) {} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct StartMessage : public Message +{ + CaptureSettings settings; + static IMessage* Create(InputDataStream&); + virtual void Apply() override; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct StopMessage : public Message +{ + static IMessage* Create(InputDataStream&); + virtual void Apply() override; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct CancelMessage : public Message +{ + static IMessage* Create(InputDataStream&); + virtual void Apply() override; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct TurnSamplingMessage : public Message +{ + int32 index; + byte isSampling; + + static IMessage* Create(InputDataStream& stream); + virtual void Apply() override; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + +#endif //USE_OPTICK \ No newline at end of file diff --git a/neo/libs/optick/optick_miniz.cpp b/neo/libs/optick/optick_miniz.cpp new file mode 100644 index 00000000..d8e9922d --- /dev/null +++ b/neo/libs/optick/optick_miniz.cpp @@ -0,0 +1,2953 @@ +/************************************************************************** + * + * Copyright 2013-2014 RAD Game Tools and Valve Software + * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + **************************************************************************/ + +#include "optick_miniz.h" + +// RB: this library is already included through TinyEXR +#if 0 //OPTICK_ENABLE_COMPRESSION + + +typedef unsigned char mz_validate_uint16[sizeof(mz_uint16) == 2 ? 1 : -1]; +typedef unsigned char mz_validate_uint32[sizeof(mz_uint32) == 4 ? 1 : -1]; +typedef unsigned char mz_validate_uint64[sizeof(mz_uint64) == 8 ? 1 : -1]; + +#ifdef __cplusplus +extern "C" { +#endif + +/* ------------------- zlib-style API's */ + +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) +{ + mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); + size_t block_len = buf_len % 5552; + if (!ptr) + return MZ_ADLER32_INIT; + while (buf_len) + { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) + { + s1 += ptr[0], s2 += s1; + s1 += ptr[1], s2 += s1; + s1 += ptr[2], s2 += s1; + s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; + s1 += ptr[5], s2 += s1; + s1 += ptr[6], s2 += s1; + s1 += ptr[7], s2 += s1; + } + for (; i < block_len; ++i) + s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; + buf_len -= block_len; + block_len = 5552; + } + return (s2 << 16) + s1; +} + +/* Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/ */ +#if 0 + mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) + { + static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, + 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; + mz_uint32 crcu32 = (mz_uint32)crc; + if (!ptr) + return MZ_CRC32_INIT; + crcu32 = ~crcu32; + while (buf_len--) + { + mz_uint8 b = *ptr++; + crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; + crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; + } + return ~crcu32; + } +#else +/* Faster, but larger CPU cache footprint. + */ +mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) +{ + static const mz_uint32 s_crc_table[256] = + { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, + 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, + 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, + 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, + 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, + 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, + 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 0x26D930AC, + 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, + 0xB6662D3D, 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, + 0x9FBFE4A5, 0xE8B8D433, 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, + 0x086D3D2D, 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, + 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, 0x4DB26158, 0x3AB551CE, + 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, + 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, + 0xCE61E49F, 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, + 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, + 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, + 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 0xF00F9344, 0x8708A3D2, 0x1E01F268, + 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, + 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, + 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, + 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, + 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, + 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, + 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, + 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, + 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, 0x88085AE6, + 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, + 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, + 0x47B2CF7F, 0x30B5FFE9, 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, + 0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }; + + mz_uint32 crc32 = (mz_uint32)crc ^ 0xFFFFFFFF; + const mz_uint8 *pByte_buf = (const mz_uint8 *)ptr; + + while (buf_len >= 4) + { + crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[0]) & 0xFF]; + crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[1]) & 0xFF]; + crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[2]) & 0xFF]; + crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[3]) & 0xFF]; + pByte_buf += 4; + buf_len -= 4; + } + + while (buf_len) + { + crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[0]) & 0xFF]; + ++pByte_buf; + --buf_len; + } + + return ~crc32; +} +#endif + +void mz_free(void *p) +{ + MZ_FREE(p); +} + +void *miniz_def_alloc_func(void *opaque, size_t items, size_t size) +{ + (void)opaque, (void)items, (void)size; + return MZ_MALLOC(items * size); +} +void miniz_def_free_func(void *opaque, void *address) +{ + (void)opaque, (void)address; + MZ_FREE(address); +} +void *miniz_def_realloc_func(void *opaque, void *address, size_t items, size_t size) +{ + (void)opaque, (void)address, (void)items, (void)size; + return MZ_REALLOC(address, items * size); +} + +const char *mz_version(void) +{ + return MZ_VERSION; +} + +#ifndef MINIZ_NO_ZLIB_APIS + +int mz_deflateInit(mz_streamp pStream, int level) +{ + return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY); +} + +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy) +{ + tdefl_compressor *pComp; + mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); + + if (!pStream) + return MZ_STREAM_ERROR; + if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))) + return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = MZ_ADLER32_INIT; + pStream->msg = NULL; + pStream->reserved = 0; + pStream->total_in = 0; + pStream->total_out = 0; + if (!pStream->zalloc) + pStream->zalloc = miniz_def_alloc_func; + if (!pStream->zfree) + pStream->zfree = miniz_def_free_func; + + pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor)); + if (!pComp) + return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pComp; + + if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) + { + mz_deflateEnd(pStream); + return MZ_PARAM_ERROR; + } + + return MZ_OK; +} + +int mz_deflateReset(mz_streamp pStream) +{ + if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree)) + return MZ_STREAM_ERROR; + pStream->total_in = pStream->total_out = 0; + tdefl_init((tdefl_compressor *)pStream->state, NULL, NULL, ((tdefl_compressor *)pStream->state)->m_flags); + return MZ_OK; +} + +int mz_deflate(mz_streamp pStream, int flush) +{ + size_t in_bytes, out_bytes; + mz_ulong orig_total_in, orig_total_out; + int mz_status = MZ_OK; + + if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out)) + return MZ_STREAM_ERROR; + if (!pStream->avail_out) + return MZ_BUF_ERROR; + + if (flush == MZ_PARTIAL_FLUSH) + flush = MZ_SYNC_FLUSH; + + if (((tdefl_compressor *)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE) + return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; + + orig_total_in = pStream->total_in; + orig_total_out = pStream->total_out; + for (;;) + { + tdefl_status defl_status; + in_bytes = pStream->avail_in; + out_bytes = pStream->avail_out; + + defl_status = tdefl_compress((tdefl_compressor *)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush); + pStream->next_in += (mz_uint)in_bytes; + pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tdefl_get_adler32((tdefl_compressor *)pStream->state); + + pStream->next_out += (mz_uint)out_bytes; + pStream->avail_out -= (mz_uint)out_bytes; + pStream->total_out += (mz_uint)out_bytes; + + if (defl_status < 0) + { + mz_status = MZ_STREAM_ERROR; + break; + } + else if (defl_status == TDEFL_STATUS_DONE) + { + mz_status = MZ_STREAM_END; + break; + } + else if (!pStream->avail_out) + break; + else if ((!pStream->avail_in) && (flush != MZ_FINISH)) + { + if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out)) + break; + return MZ_BUF_ERROR; /* Can't make forward progress without some input. + */ + } + } + return mz_status; +} + +int mz_deflateEnd(mz_streamp pStream) +{ + if (!pStream) + return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) +{ + (void)pStream; + /* This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.) */ + return MZ_MAX(128 + (source_len * 110) / 100, 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5); +} + +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level) +{ + int status; + mz_stream stream; + memset(&stream, 0, sizeof(stream)); + + /* In case mz_ulong is 64-bits (argh I hate longs). */ + if ((source_len | *pDest_len) > 0xFFFFFFFFU) + return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_deflateInit(&stream, level); + if (status != MZ_OK) + return status; + + status = mz_deflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_deflateEnd(&stream); + return (status == MZ_OK) ? MZ_BUF_ERROR : status; + } + + *pDest_len = stream.total_out; + return mz_deflateEnd(&stream); +} + +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) +{ + return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION); +} + +mz_ulong mz_compressBound(mz_ulong source_len) +{ + return mz_deflateBound(NULL, source_len); +} + +typedef struct +{ + tinfl_decompressor m_decomp; + mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; + int m_window_bits; + mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; + tinfl_status m_last_status; +} inflate_state; + +int mz_inflateInit2(mz_streamp pStream, int window_bits) +{ + inflate_state *pDecomp; + if (!pStream) + return MZ_STREAM_ERROR; + if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)) + return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = 0; + pStream->msg = NULL; + pStream->total_in = 0; + pStream->total_out = 0; + pStream->reserved = 0; + if (!pStream->zalloc) + pStream->zalloc = miniz_def_alloc_func; + if (!pStream->zfree) + pStream->zfree = miniz_def_free_func; + + pDecomp = (inflate_state *)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state)); + if (!pDecomp) + return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pDecomp; + + tinfl_init(&pDecomp->m_decomp); + pDecomp->m_dict_ofs = 0; + pDecomp->m_dict_avail = 0; + pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; + pDecomp->m_first_call = 1; + pDecomp->m_has_flushed = 0; + pDecomp->m_window_bits = window_bits; + + return MZ_OK; +} + +int mz_inflateInit(mz_streamp pStream) +{ + return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); +} + +int mz_inflateReset(mz_streamp pStream) +{ + inflate_state *pDecomp; + if (!pStream) + return MZ_STREAM_ERROR; + + pStream->data_type = 0; + pStream->adler = 0; + pStream->msg = NULL; + pStream->total_in = 0; + pStream->total_out = 0; + pStream->reserved = 0; + + pDecomp = (inflate_state *)pStream->state; + + tinfl_init(&pDecomp->m_decomp); + pDecomp->m_dict_ofs = 0; + pDecomp->m_dict_avail = 0; + pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; + pDecomp->m_first_call = 1; + pDecomp->m_has_flushed = 0; + /* pDecomp->m_window_bits = window_bits */; + + return MZ_OK; +} + +int mz_inflate(mz_streamp pStream, int flush) +{ + inflate_state *pState; + mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32; + size_t in_bytes, out_bytes, orig_avail_in; + tinfl_status status; + + if ((!pStream) || (!pStream->state)) + return MZ_STREAM_ERROR; + if (flush == MZ_PARTIAL_FLUSH) + flush = MZ_SYNC_FLUSH; + if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) + return MZ_STREAM_ERROR; + + pState = (inflate_state *)pStream->state; + if (pState->m_window_bits > 0) + decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; + orig_avail_in = pStream->avail_in; + + first_call = pState->m_first_call; + pState->m_first_call = 0; + if (pState->m_last_status < 0) + return MZ_DATA_ERROR; + + if (pState->m_has_flushed && (flush != MZ_FINISH)) + return MZ_STREAM_ERROR; + pState->m_has_flushed |= (flush == MZ_FINISH); + + if ((flush == MZ_FINISH) && (first_call)) + { + /* MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file. */ + decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + in_bytes = pStream->avail_in; + out_bytes = pStream->avail_out; + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags); + pState->m_last_status = status; + pStream->next_in += (mz_uint)in_bytes; + pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tinfl_get_adler32(&pState->m_decomp); + pStream->next_out += (mz_uint)out_bytes; + pStream->avail_out -= (mz_uint)out_bytes; + pStream->total_out += (mz_uint)out_bytes; + + if (status < 0) + return MZ_DATA_ERROR; + else if (status != TINFL_STATUS_DONE) + { + pState->m_last_status = TINFL_STATUS_FAILED; + return MZ_BUF_ERROR; + } + return MZ_STREAM_END; + } + /* flush != MZ_FINISH then we must assume there's more input. */ + if (flush != MZ_FINISH) + decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; + + if (pState->m_dict_avail) + { + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; + pStream->avail_out -= n; + pStream->total_out += n; + pState->m_dict_avail -= n; + pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; + } + + for (;;) + { + in_bytes = pStream->avail_in; + out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; + + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); + pState->m_last_status = status; + + pStream->next_in += (mz_uint)in_bytes; + pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tinfl_get_adler32(&pState->m_decomp); + + pState->m_dict_avail = (mz_uint)out_bytes; + + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; + pStream->avail_out -= n; + pStream->total_out += n; + pState->m_dict_avail -= n; + pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + + if (status < 0) + return MZ_DATA_ERROR; /* Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well). */ + else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) + return MZ_BUF_ERROR; /* Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH. */ + else if (flush == MZ_FINISH) + { + /* The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH. */ + if (status == TINFL_STATUS_DONE) + return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; + /* status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong. */ + else if (!pStream->avail_out) + return MZ_BUF_ERROR; + } + else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail)) + break; + } + + return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; +} + +int mz_inflateEnd(mz_streamp pStream) +{ + if (!pStream) + return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) +{ + mz_stream stream; + int status; + memset(&stream, 0, sizeof(stream)); + + /* In case mz_ulong is 64-bits (argh I hate longs). */ + if ((source_len | *pDest_len) > 0xFFFFFFFFU) + return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_inflateInit(&stream); + if (status != MZ_OK) + return status; + + status = mz_inflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_inflateEnd(&stream); + return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status; + } + *pDest_len = stream.total_out; + + return mz_inflateEnd(&stream); +} + +const char *mz_error(int err) +{ + static struct + { + int m_err; + const char *m_pDesc; + } s_error_descs[] = + { + { MZ_OK, "" }, { MZ_STREAM_END, "stream end" }, { MZ_NEED_DICT, "need dictionary" }, { MZ_ERRNO, "file error" }, { MZ_STREAM_ERROR, "stream error" }, { MZ_DATA_ERROR, "data error" }, { MZ_MEM_ERROR, "out of memory" }, { MZ_BUF_ERROR, "buf error" }, { MZ_VERSION_ERROR, "version error" }, { MZ_PARAM_ERROR, "parameter error" } + }; + mz_uint i; + for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) + if (s_error_descs[i].m_err == err) + return s_error_descs[i].m_pDesc; + return NULL; +} + +#endif /*MINIZ_NO_ZLIB_APIS */ + +#ifdef __cplusplus +} +#endif + +/* + This is free and unencumbered software released into the public domain. + + Anyone is free to copy, modify, publish, use, compile, sell, or + distribute this software, either in source code form or as a compiled + binary, for any purpose, commercial or non-commercial, and by any + means. + + In jurisdictions that recognize copyright laws, the author or authors + of this software dedicate any and all copyright interest in the + software to the public domain. We make this dedication for the benefit + of the public at large and to the detriment of our heirs and + successors. We intend this dedication to be an overt act of + relinquishment in perpetuity of all present and future rights to this + software under copyright law. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + For more information, please refer to +*/ +/************************************************************************** + * + * Copyright 2013-2014 RAD Game Tools and Valve Software + * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + **************************************************************************/ + + + + +#ifdef __cplusplus +extern "C" { +#endif + +/* ------------------- Low-level Compression (independent from all decompression API's) */ + +/* Purposely making these tables static for faster init and thread safety. */ +static const mz_uint16 s_tdefl_len_sym[256] = + { + 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272, + 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, 274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, 276, 276, 276, 276, + 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, + 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, + 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, + 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, + 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 285 + }; + +static const mz_uint8 s_tdefl_len_extra[256] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0 + }; + +static const mz_uint8 s_tdefl_small_dist_sym[512] = + { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17 + }; + +static const mz_uint8 s_tdefl_small_dist_extra[512] = + { + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 + }; + +static const mz_uint8 s_tdefl_large_dist_sym[128] = + { + 0, 0, 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 + }; + +static const mz_uint8 s_tdefl_large_dist_extra[128] = + { + 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 + }; + +/* Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values. */ +typedef struct +{ + mz_uint16 m_key, m_sym_index; +} tdefl_sym_freq; +static tdefl_sym_freq *tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq *pSyms0, tdefl_sym_freq *pSyms1) +{ + mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; + tdefl_sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1; + MZ_CLEAR_OBJ(hist); + for (i = 0; i < num_syms; i++) + { + mz_uint freq = pSyms0[i].m_key; + hist[freq & 0xFF]++; + hist[256 + ((freq >> 8) & 0xFF)]++; + } + while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) + total_passes--; + for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) + { + const mz_uint32 *pHist = &hist[pass << 8]; + mz_uint offsets[256], cur_ofs = 0; + for (i = 0; i < 256; i++) + { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + } + for (i = 0; i < num_syms; i++) + pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; + { + tdefl_sym_freq *t = pCur_syms; + pCur_syms = pNew_syms; + pNew_syms = t; + } + } + return pCur_syms; +} + +/* tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. */ +static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) +{ + int root, leaf, next, avbl, used, dpth; + if (n == 0) + return; + else if (n == 1) + { + A[0].m_key = 1; + return; + } + A[0].m_key += A[1].m_key; + root = 0; + leaf = 2; + for (next = 1; next < n - 1; next++) + { + if (leaf >= n || A[root].m_key < A[leaf].m_key) + { + A[next].m_key = A[root].m_key; + A[root++].m_key = (mz_uint16)next; + } + else + A[next].m_key = A[leaf++].m_key; + if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key)) + { + A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key); + A[root++].m_key = (mz_uint16)next; + } + else + A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key); + } + A[n - 2].m_key = 0; + for (next = n - 3; next >= 0; next--) + A[next].m_key = A[A[next].m_key].m_key + 1; + avbl = 1; + used = dpth = 0; + root = n - 2; + next = n - 1; + while (avbl > 0) + { + while (root >= 0 && (int)A[root].m_key == dpth) + { + used++; + root--; + } + while (avbl > used) + { + A[next--].m_key = (mz_uint16)(dpth); + avbl--; + } + avbl = 2 * used; + dpth++; + used = 0; + } +} + +/* Limits canonical Huffman code table's max code size. */ +enum +{ + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 +}; +static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) +{ + int i; + mz_uint32 total = 0; + if (code_list_len <= 1) + return; + for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) + pNum_codes[max_code_size] += pNum_codes[i]; + for (i = max_code_size; i > 0; i--) + total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); + while (total != (1UL << max_code_size)) + { + pNum_codes[max_code_size]--; + for (i = max_code_size - 1; i > 0; i--) + if (pNum_codes[i]) + { + pNum_codes[i]--; + pNum_codes[i + 1] += 2; + break; + } + total--; + } +} + +static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table) +{ + int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; + mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; + MZ_CLEAR_OBJ(num_codes); + if (static_table) + { + for (i = 0; i < table_len; i++) + num_codes[d->m_huff_code_sizes[table_num][i]]++; + } + else + { + tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms; + int num_used_syms = 0; + const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0]; + for (i = 0; i < table_len; i++) + if (pSym_count[i]) + { + syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; + syms0[num_used_syms++].m_sym_index = (mz_uint16)i; + } + + pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); + tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); + + for (i = 0; i < num_used_syms; i++) + num_codes[pSyms[i].m_key]++; + + tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit); + + MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); + MZ_CLEAR_OBJ(d->m_huff_codes[table_num]); + for (i = 1, j = num_used_syms; i <= code_size_limit; i++) + for (l = num_codes[i]; l > 0; l--) + d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); + } + + next_code[1] = 0; + for (j = 0, i = 2; i <= code_size_limit; i++) + next_code[i] = j = ((j + num_codes[i - 1]) << 1); + + for (i = 0; i < table_len; i++) + { + mz_uint rev_code = 0, code, code_size; + if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) + continue; + code = next_code[code_size]++; + for (l = code_size; l > 0; l--, code >>= 1) + rev_code = (rev_code << 1) | (code & 1); + d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; + } +} + +#define TDEFL_PUT_BITS(b, l) \ + do \ + { \ + mz_uint bits = b; \ + mz_uint len = l; \ + MZ_ASSERT(bits <= ((1U << len) - 1U)); \ + d->m_bit_buffer |= (bits << d->m_bits_in); \ + d->m_bits_in += len; \ + while (d->m_bits_in >= 8) \ + { \ + if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ + *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ + d->m_bit_buffer >>= 8; \ + d->m_bits_in -= 8; \ + } \ + } \ + MZ_MACRO_END + +#define TDEFL_RLE_PREV_CODE_SIZE() \ + { \ + if (rle_repeat_count) \ + { \ + if (rle_repeat_count < 3) \ + { \ + d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ + while (rle_repeat_count--) \ + packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ + } \ + else \ + { \ + d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); \ + packed_code_sizes[num_packed_code_sizes++] = 16; \ + packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3); \ + } \ + rle_repeat_count = 0; \ + } \ + } + +#define TDEFL_RLE_ZERO_CODE_SIZE() \ + { \ + if (rle_z_count) \ + { \ + if (rle_z_count < 3) \ + { \ + d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); \ + while (rle_z_count--) \ + packed_code_sizes[num_packed_code_sizes++] = 0; \ + } \ + else if (rle_z_count <= 10) \ + { \ + d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); \ + packed_code_sizes[num_packed_code_sizes++] = 17; \ + packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3); \ + } \ + else \ + { \ + d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); \ + packed_code_sizes[num_packed_code_sizes++] = 18; \ + packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \ + } \ + rle_z_count = 0; \ + } \ + } + +static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + +static void tdefl_start_dynamic_block(tdefl_compressor *d) +{ + int num_lit_codes, num_dist_codes, num_bit_lengths; + mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index; + mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF; + + d->m_huff_count[0][256] = 1; + + tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); + tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); + + for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) + if (d->m_huff_code_sizes[0][num_lit_codes - 1]) + break; + for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) + if (d->m_huff_code_sizes[1][num_dist_codes - 1]) + break; + + memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); + memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes); + total_code_sizes_to_pack = num_lit_codes + num_dist_codes; + num_packed_code_sizes = 0; + rle_z_count = 0; + rle_repeat_count = 0; + + memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); + for (i = 0; i < total_code_sizes_to_pack; i++) + { + mz_uint8 code_size = code_sizes_to_pack[i]; + if (!code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + if (++rle_z_count == 138) + { + TDEFL_RLE_ZERO_CODE_SIZE(); + } + } + else + { + TDEFL_RLE_ZERO_CODE_SIZE(); + if (code_size != prev_code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1); + packed_code_sizes[num_packed_code_sizes++] = code_size; + } + else if (++rle_repeat_count == 6) + { + TDEFL_RLE_PREV_CODE_SIZE(); + } + } + prev_code_size = code_size; + } + if (rle_repeat_count) + { + TDEFL_RLE_PREV_CODE_SIZE(); + } + else + { + TDEFL_RLE_ZERO_CODE_SIZE(); + } + + tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); + + TDEFL_PUT_BITS(2, 2); + + TDEFL_PUT_BITS(num_lit_codes - 257, 5); + TDEFL_PUT_BITS(num_dist_codes - 1, 5); + + for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) + if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) + break; + num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); + TDEFL_PUT_BITS(num_bit_lengths - 4, 4); + for (i = 0; (int)i < num_bit_lengths; i++) + TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); + + for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes;) + { + mz_uint code = packed_code_sizes[packed_code_sizes_index++]; + MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); + TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); + if (code >= 16) + TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]); + } +} + +static void tdefl_start_static_block(tdefl_compressor *d) +{ + mz_uint i; + mz_uint8 *p = &d->m_huff_code_sizes[0][0]; + + for (i = 0; i <= 143; ++i) + *p++ = 8; + for (; i <= 255; ++i) + *p++ = 9; + for (; i <= 279; ++i) + *p++ = 7; + for (; i <= 287; ++i) + *p++ = 8; + + memset(d->m_huff_code_sizes[1], 5, 32); + + tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); + tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); + + TDEFL_PUT_BITS(1, 2); +} + +static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF }; + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) +{ + mz_uint flags; + mz_uint8 *pLZ_codes; + mz_uint8 *pOutput_buf = d->m_pOutput_buf; + mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf; + mz_uint64 bit_buffer = d->m_bit_buffer; + mz_uint bits_in = d->m_bits_in; + +#define TDEFL_PUT_BITS_FAST(b, l) \ + { \ + bit_buffer |= (((mz_uint64)(b)) << bits_in); \ + bits_in += (l); \ + } + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + + if (flags & 1) + { + mz_uint s0, s1, n0, n1, sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = *(const mz_uint16 *)(pLZ_codes + 1); + pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + /* This sequence coaxes MSVC into using cmov's vs. jmp's. */ + s0 = s_tdefl_small_dist_sym[match_dist & 511]; + n0 = s_tdefl_small_dist_extra[match_dist & 511]; + s1 = s_tdefl_large_dist_sym[match_dist >> 8]; + n1 = s_tdefl_large_dist_extra[match_dist >> 8]; + sym = (match_dist < 512) ? s0 : s1; + num_extra_bits = (match_dist < 512) ? n0 : n1; + + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + } + + if (pOutput_buf >= d->m_pOutput_buf_end) + return MZ_FALSE; + + *(mz_uint64 *)pOutput_buf = bit_buffer; + pOutput_buf += (bits_in >> 3); + bit_buffer >>= (bits_in & ~7); + bits_in &= 7; + } + +#undef TDEFL_PUT_BITS_FAST + + d->m_pOutput_buf = pOutput_buf; + d->m_bits_in = 0; + d->m_bit_buffer = 0; + + while (bits_in) + { + mz_uint32 n = MZ_MIN(bits_in, 16); + TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); + bit_buffer >>= n; + bits_in -= n; + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#else +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) +{ + mz_uint flags; + mz_uint8 *pLZ_codes; + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + if (flags & 1) + { + mz_uint sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); + pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + if (match_dist < 512) + { + sym = s_tdefl_small_dist_sym[match_dist]; + num_extra_bits = s_tdefl_small_dist_extra[match_dist]; + } + else + { + sym = s_tdefl_large_dist_sym[match_dist >> 8]; + num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; + } + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#endif /* MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS */ + +static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) +{ + if (static_block) + tdefl_start_static_block(d); + else + tdefl_start_dynamic_block(d); + return tdefl_compress_lz_codes(d); +} + +static int tdefl_flush_block(tdefl_compressor *d, int flush) +{ + mz_uint saved_bit_buf, saved_bits_in; + mz_uint8 *pSaved_output_buf; + mz_bool comp_block_succeeded = MZ_FALSE; + int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; + mz_uint8 *pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf; + + d->m_pOutput_buf = pOutput_buf_start; + d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; + + MZ_ASSERT(!d->m_output_flush_remaining); + d->m_output_flush_ofs = 0; + d->m_output_flush_remaining = 0; + + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); + d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); + + if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) + { + TDEFL_PUT_BITS(0x78, 8); + TDEFL_PUT_BITS(0x01, 8); + } + + TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); + + pSaved_output_buf = d->m_pOutput_buf; + saved_bit_buf = d->m_bit_buffer; + saved_bits_in = d->m_bits_in; + + if (!use_raw_block) + comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48)); + + /* If the block gets expanded, forget the current contents of the output buffer and send a raw block instead. */ + if (((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) && + ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size)) + { + mz_uint i; + d->m_pOutput_buf = pSaved_output_buf; + d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + TDEFL_PUT_BITS(0, 2); + if (d->m_bits_in) + { + TDEFL_PUT_BITS(0, 8 - d->m_bits_in); + } + for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) + { + TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); + } + for (i = 0; i < d->m_total_lz_bytes; ++i) + { + TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8); + } + } + /* Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes. */ + else if (!comp_block_succeeded) + { + d->m_pOutput_buf = pSaved_output_buf; + d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + tdefl_compress_block(d, MZ_TRUE); + } + + if (flush) + { + if (flush == TDEFL_FINISH) + { + if (d->m_bits_in) + { + TDEFL_PUT_BITS(0, 8 - d->m_bits_in); + } + if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) + { + mz_uint i, a = d->m_adler32; + for (i = 0; i < 4; i++) + { + TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); + a <<= 8; + } + } + } + else + { + mz_uint i, z = 0; + TDEFL_PUT_BITS(0, 3); + if (d->m_bits_in) + { + TDEFL_PUT_BITS(0, 8 - d->m_bits_in); + } + for (i = 2; i; --i, z ^= 0xFFFF) + { + TDEFL_PUT_BITS(z & 0xFFFF, 16); + } + } + } + + MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); + + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; + d->m_pLZ_flags = d->m_lz_code_buf; + d->m_num_flags_left = 8; + d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; + d->m_total_lz_bytes = 0; + d->m_block_index++; + + if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) + { + if (d->m_pPut_buf_func) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) + return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); + } + else if (pOutput_buf_start == d->m_output_buf) + { + int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy); + d->m_out_buf_ofs += bytes_to_copy; + if ((n -= bytes_to_copy) != 0) + { + d->m_output_flush_ofs = bytes_to_copy; + d->m_output_flush_remaining = n; + } + } + else + { + d->m_out_buf_ofs += n; + } + } + + return d->m_output_flush_remaining; +} + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES +#ifdef MINIZ_UNALIGNED_USE_MEMCPY +static mz_uint16 TDEFL_READ_UNALIGNED_WORD(const mz_uint8* p) +{ + mz_uint16 ret; + memcpy(&ret, p, sizeof(mz_uint16)); + return ret; +} +static mz_uint16 TDEFL_READ_UNALIGNED_WORD2(const mz_uint16* p) +{ + mz_uint16 ret; + memcpy(&ret, p, sizeof(mz_uint16)); + return ret; +} +#else +#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16 *)(p) +#define TDEFL_READ_UNALIGNED_WORD2(p) *(const mz_uint16 *)(p) +#endif +static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) +{ + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint16 *s = (const mz_uint16 *)(d->m_dict + pos), *p, *q; + mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD2(s); + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); + if (max_match_len <= match_len) + return; + for (;;) + { + for (;;) + { + if (--num_probes_left == 0) + return; +#define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ + return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) \ + break; + TDEFL_PROBE; + TDEFL_PROBE; + TDEFL_PROBE; + } + if (!dist) + break; + q = (const mz_uint16 *)(d->m_dict + probe_pos); + if (TDEFL_READ_UNALIGNED_WORD2(q) != s01) + continue; + p = s; + probe_len = 32; + do + { + } while ((TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && + (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (--probe_len > 0)); + if (!probe_len) + { + *pMatch_dist = dist; + *pMatch_len = MZ_MIN(max_match_len, (mz_uint)TDEFL_MAX_MATCH_LEN); + break; + } + else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q)) > match_len) + { + *pMatch_dist = dist; + if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len) + break; + c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); + } + } +} +#else +static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) +{ + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint8 *s = d->m_dict + pos, *p, *q; + mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); + if (max_match_len <= match_len) + return; + for (;;) + { + for (;;) + { + if (--num_probes_left == 0) + return; +#define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ + return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) \ + break; + TDEFL_PROBE; + TDEFL_PROBE; + TDEFL_PROBE; + } + if (!dist) + break; + p = s; + q = d->m_dict + probe_pos; + for (probe_len = 0; probe_len < max_match_len; probe_len++) + if (*p++ != *q++) + break; + if (probe_len > match_len) + { + *pMatch_dist = dist; + if ((*pMatch_len = match_len = probe_len) == max_match_len) + return; + c0 = d->m_dict[pos + match_len]; + c1 = d->m_dict[pos + match_len - 1]; + } + } +} +#endif /* #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES */ + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN +#ifdef MINIZ_UNALIGNED_USE_MEMCPY +static mz_uint32 TDEFL_READ_UNALIGNED_WORD32(const mz_uint8* p) +{ + mz_uint32 ret; + memcpy(&ret, p, sizeof(mz_uint32)); + return ret; +} +#else +#define TDEFL_READ_UNALIGNED_WORD32(p) *(const mz_uint32 *)(p) +#endif +static mz_bool tdefl_compress_fast(tdefl_compressor *d) +{ + /* Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio. */ + mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left; + mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; + mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + + while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) + { + const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; + mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); + d->m_src_buf_left -= num_bytes_to_process; + lookahead_size += num_bytes_to_process; + + while (num_bytes_to_process) + { + mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); + memcpy(d->m_dict + dst_pos, d->m_pSrc, n); + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); + d->m_pSrc += n; + dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; + num_bytes_to_process -= n; + } + + dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); + if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) + break; + + while (lookahead_size >= 4) + { + mz_uint cur_match_dist, cur_match_len = 1; + mz_uint8 *pCur_dict = d->m_dict + cur_pos; + mz_uint first_trigram = TDEFL_READ_UNALIGNED_WORD32(pCur_dict) & 0xFFFFFF; + mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK; + mz_uint probe_pos = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)lookahead_pos; + + if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((TDEFL_READ_UNALIGNED_WORD32(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram)) + { + const mz_uint16 *p = (const mz_uint16 *)pCur_dict; + const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos); + mz_uint32 probe_len = 32; + do + { + } while ((TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && + (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (--probe_len > 0)); + cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); + if (!probe_len) + cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; + + if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U * 1024U))) + { + cur_match_len = 1; + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + else + { + mz_uint32 s0, s1; + cur_match_len = MZ_MIN(cur_match_len, lookahead_size); + + MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); + + cur_match_dist--; + + pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); +#ifdef MINIZ_UNALIGNED_USE_MEMCPY + memcpy(&pLZ_code_buf[1], &cur_match_dist, sizeof(cur_match_dist)); +#else + *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; +#endif + pLZ_code_buf += 3; + *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); + + s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; + s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; + d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; + + d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++; + } + } + else + { + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + + if (--num_flags_left == 0) + { + num_flags_left = 8; + pLZ_flags = pLZ_code_buf++; + } + + total_lz_bytes += cur_match_len; + lookahead_pos += cur_match_len; + dict_size = MZ_MIN(dict_size + cur_match_len, (mz_uint)TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; + MZ_ASSERT(lookahead_size >= cur_match_len); + lookahead_size -= cur_match_len; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; + d->m_lookahead_size = lookahead_size; + d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; + d->m_pLZ_code_buf = pLZ_code_buf; + d->m_pLZ_flags = pLZ_flags; + d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; + pLZ_code_buf = d->m_pLZ_code_buf; + pLZ_flags = d->m_pLZ_flags; + num_flags_left = d->m_num_flags_left; + } + } + + while (lookahead_size) + { + mz_uint8 lit = d->m_dict[cur_pos]; + + total_lz_bytes++; + *pLZ_code_buf++ = lit; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + if (--num_flags_left == 0) + { + num_flags_left = 8; + pLZ_flags = pLZ_code_buf++; + } + + d->m_huff_count[0][lit]++; + + lookahead_pos++; + dict_size = MZ_MIN(dict_size + 1, (mz_uint)TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; + lookahead_size--; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; + d->m_lookahead_size = lookahead_size; + d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; + d->m_pLZ_code_buf = pLZ_code_buf; + d->m_pLZ_flags = pLZ_flags; + d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; + pLZ_code_buf = d->m_pLZ_code_buf; + pLZ_flags = d->m_pLZ_flags; + num_flags_left = d->m_num_flags_left; + } + } + } + + d->m_lookahead_pos = lookahead_pos; + d->m_lookahead_size = lookahead_size; + d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; + d->m_pLZ_code_buf = pLZ_code_buf; + d->m_pLZ_flags = pLZ_flags; + d->m_num_flags_left = num_flags_left; + return MZ_TRUE; +} +#endif /* MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN */ + +static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, mz_uint8 lit) +{ + d->m_total_lz_bytes++; + *d->m_pLZ_code_buf++ = lit; + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); + if (--d->m_num_flags_left == 0) + { + d->m_num_flags_left = 8; + d->m_pLZ_flags = d->m_pLZ_code_buf++; + } + d->m_huff_count[0][lit]++; +} + +static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, mz_uint match_len, mz_uint match_dist) +{ + mz_uint32 s0, s1; + + MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE)); + + d->m_total_lz_bytes += match_len; + + d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); + + match_dist -= 1; + d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); + d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); + d->m_pLZ_code_buf += 3; + + *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); + if (--d->m_num_flags_left == 0) + { + d->m_num_flags_left = 8; + d->m_pLZ_flags = d->m_pLZ_code_buf++; + } + + s0 = s_tdefl_small_dist_sym[match_dist & 511]; + s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; + d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; + + if (match_len >= TDEFL_MIN_MATCH_LEN) + d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; +} + +static mz_bool tdefl_compress_normal(tdefl_compressor *d) +{ + const mz_uint8 *pSrc = d->m_pSrc; + size_t src_buf_left = d->m_src_buf_left; + tdefl_flush flush = d->m_flush; + + while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) + { + mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; + /* Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN. */ + if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) + { + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; + mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); + const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process; + src_buf_left -= num_bytes_to_process; + d->m_lookahead_size += num_bytes_to_process; + while (pSrc != pSrc_end) + { + mz_uint8 c = *pSrc++; + d->m_dict[dst_pos] = c; + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)(ins_pos); + dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; + ins_pos++; + } + } + else + { + while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + { + mz_uint8 c = *pSrc++; + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + src_buf_left--; + d->m_dict[dst_pos] = c; + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) + { + mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; + mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)(ins_pos); + } + } + } + d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); + if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + break; + + /* Simple lazy/greedy parsing state machine. */ + len_to_move = 1; + cur_match_dist = 0; + cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); + cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) + { + if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) + { + mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; + cur_match_len = 0; + while (cur_match_len < d->m_lookahead_size) + { + if (d->m_dict[cur_pos + cur_match_len] != c) + break; + cur_match_len++; + } + if (cur_match_len < TDEFL_MIN_MATCH_LEN) + cur_match_len = 0; + else + cur_match_dist = 1; + } + } + else + { + tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len); + } + if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U * 1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) + { + cur_match_dist = cur_match_len = 0; + } + if (d->m_saved_match_len) + { + if (cur_match_len > d->m_saved_match_len) + { + tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); + if (cur_match_len >= 128) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + d->m_saved_match_len = 0; + len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[cur_pos]; + d->m_saved_match_dist = cur_match_dist; + d->m_saved_match_len = cur_match_len; + } + } + else + { + tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); + len_to_move = d->m_saved_match_len - 1; + d->m_saved_match_len = 0; + } + } + else if (!cur_match_dist) + tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); + else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128)) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; + d->m_saved_match_dist = cur_match_dist; + d->m_saved_match_len = cur_match_len; + } + /* Move the lookahead forward by len_to_move bytes. */ + d->m_lookahead_pos += len_to_move; + MZ_ASSERT(d->m_lookahead_size >= len_to_move); + d->m_lookahead_size -= len_to_move; + d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE); + /* Check if it's time to flush the current LZ codes to the internal output buffer. */ + if ((d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || + ((d->m_total_lz_bytes > 31 * 1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))) + { + int n; + d->m_pSrc = pSrc; + d->m_src_buf_left = src_buf_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + } + } + + d->m_pSrc = pSrc; + d->m_src_buf_left = src_buf_left; + return MZ_TRUE; +} + +static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) +{ + if (d->m_pIn_buf_size) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + } + + if (d->m_pOut_buf_size) + { + size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n); + d->m_output_flush_ofs += (mz_uint)n; + d->m_output_flush_remaining -= (mz_uint)n; + d->m_out_buf_ofs += n; + + *d->m_pOut_buf_size = d->m_out_buf_ofs; + } + + return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush) +{ + if (!d) + { + if (pIn_buf_size) + *pIn_buf_size = 0; + if (pOut_buf_size) + *pOut_buf_size = 0; + return TDEFL_STATUS_BAD_PARAM; + } + + d->m_pIn_buf = pIn_buf; + d->m_pIn_buf_size = pIn_buf_size; + d->m_pOut_buf = pOut_buf; + d->m_pOut_buf_size = pOut_buf_size; + d->m_pSrc = (const mz_uint8 *)(pIn_buf); + d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; + d->m_out_buf_ofs = 0; + d->m_flush = flush; + + if (((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) || + (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf)) + { + if (pIn_buf_size) + *pIn_buf_size = 0; + if (pOut_buf_size) + *pOut_buf_size = 0; + return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); + } + d->m_wants_to_finish |= (flush == TDEFL_FINISH); + + if ((d->m_output_flush_remaining) || (d->m_finished)) + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && + ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && + ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0)) + { + if (!tdefl_compress_fast(d)) + return d->m_prev_return_status; + } + else +#endif /* #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN */ + { + if (!tdefl_compress_normal(d)) + return d->m_prev_return_status; + } + + if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf)) + d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf); + + if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining)) + { + if (tdefl_flush_block(d, flush) < 0) + return d->m_prev_return_status; + d->m_finished = (flush == TDEFL_FINISH); + if (flush == TDEFL_FULL_FLUSH) + { + MZ_CLEAR_OBJ(d->m_hash); + MZ_CLEAR_OBJ(d->m_next); + d->m_dict_size = 0; + } + } + + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); +} + +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush) +{ + MZ_ASSERT(d->m_pPut_buf_func); + return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); +} + +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + d->m_pPut_buf_func = pPut_buf_func; + d->m_pPut_buf_user = pPut_buf_user; + d->m_flags = (mz_uint)(flags); + d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; + d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; + d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; + if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) + MZ_CLEAR_OBJ(d->m_hash); + d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; + d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; + d->m_pLZ_flags = d->m_lz_code_buf; + d->m_num_flags_left = 8; + d->m_pOutput_buf = d->m_output_buf; + d->m_pOutput_buf_end = d->m_output_buf; + d->m_prev_return_status = TDEFL_STATUS_OKAY; + d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; + d->m_adler32 = 1; + d->m_pIn_buf = NULL; + d->m_pOut_buf = NULL; + d->m_pIn_buf_size = NULL; + d->m_pOut_buf_size = NULL; + d->m_flush = TDEFL_NO_FLUSH; + d->m_pSrc = NULL; + d->m_src_buf_left = 0; + d->m_out_buf_ofs = 0; + if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) + MZ_CLEAR_OBJ(d->m_dict); + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + return TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) +{ + return d->m_prev_return_status; +} + +mz_uint32 tdefl_get_adler32(tdefl_compressor *d) +{ + return d->m_adler32; +} + +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + tdefl_compressor *pComp; + mz_bool succeeded; + if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) + return MZ_FALSE; + pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); + if (!pComp) + return MZ_FALSE; + succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY); + succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE); + MZ_FREE(pComp); + return succeeded; +} + +typedef struct +{ + size_t m_size, m_capacity; + mz_uint8 *m_pBuf; + mz_bool m_expandable; +} tdefl_output_buffer; + +static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, void *pUser) +{ + tdefl_output_buffer *p = (tdefl_output_buffer *)pUser; + size_t new_size = p->m_size + len; + if (new_size > p->m_capacity) + { + size_t new_capacity = p->m_capacity; + mz_uint8 *pNew_buf; + if (!p->m_expandable) + return MZ_FALSE; + do + { + new_capacity = MZ_MAX(128U, new_capacity << 1U); + } while (new_size > new_capacity); + pNew_buf = (mz_uint8 *)MZ_REALLOC(p->m_pBuf, new_capacity); + if (!pNew_buf) + return MZ_FALSE; + p->m_pBuf = pNew_buf; + p->m_capacity = new_capacity; + } + memcpy((mz_uint8 *)p->m_pBuf + p->m_size, pBuf, len); + p->m_size = new_size; + return MZ_TRUE; +} + +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) +{ + tdefl_output_buffer out_buf; + MZ_CLEAR_OBJ(out_buf); + if (!pOut_len) + return MZ_FALSE; + else + *pOut_len = 0; + out_buf.m_expandable = MZ_TRUE; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) + return NULL; + *pOut_len = out_buf.m_size; + return out_buf.m_pBuf; +} + +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) +{ + tdefl_output_buffer out_buf; + MZ_CLEAR_OBJ(out_buf); + if (!pOut_buf) + return 0; + out_buf.m_pBuf = (mz_uint8 *)pOut_buf; + out_buf.m_capacity = out_buf_len; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) + return 0; + return out_buf.m_size; +} + +static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + +/* level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files). */ +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy) +{ + mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); + if (window_bits > 0) + comp_flags |= TDEFL_WRITE_ZLIB_HEADER; + + if (!level) + comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; + else if (strategy == MZ_FILTERED) + comp_flags |= TDEFL_FILTER_MATCHES; + else if (strategy == MZ_HUFFMAN_ONLY) + comp_flags &= ~TDEFL_MAX_PROBES_MASK; + else if (strategy == MZ_FIXED) + comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; + else if (strategy == MZ_RLE) + comp_flags |= TDEFL_RLE_MATCHES; + + return comp_flags; +} + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4204) /* nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal) */ +#endif + +/* Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at + http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. + This is actually a modification of Alex's original code so PNG files generated by this function pass pngcheck. */ +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip) +{ + /* Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was defined. */ + static const mz_uint s_tdefl_png_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); + tdefl_output_buffer out_buf; + int i, bpl = w * num_chans, y, z; + mz_uint32 c; + *pLen_out = 0; + if (!pComp) + return NULL; + MZ_CLEAR_OBJ(out_buf); + out_buf.m_expandable = MZ_TRUE; + out_buf.m_capacity = 57 + MZ_MAX(64, (1 + bpl) * h); + if (NULL == (out_buf.m_pBuf = (mz_uint8 *)MZ_MALLOC(out_buf.m_capacity))) + { + MZ_FREE(pComp); + return NULL; + } + /* write dummy header */ + for (z = 41; z; --z) + tdefl_output_buffer_putter(&z, 1, &out_buf); + /* compress image data */ + tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER); + for (y = 0; y < h; ++y) + { + tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); + tdefl_compress_buffer(pComp, (mz_uint8 *)pImage + (flip ? (h - 1 - y) : y) * bpl, bpl, TDEFL_NO_FLUSH); + } + if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE) + { + MZ_FREE(pComp); + MZ_FREE(out_buf.m_pBuf); + return NULL; + } + /* write real header */ + *pLen_out = out_buf.m_size - 41; + { + static const mz_uint8 chans[] = { 0x00, 0x00, 0x04, 0x02, 0x06 }; + mz_uint8 pnghdr[41] = { 0x89, 0x50, 0x4e, 0x47, 0x0d, + 0x0a, 0x1a, 0x0a, 0x00, 0x00, + 0x00, 0x0d, 0x49, 0x48, 0x44, + 0x52, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, + 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x49, 0x44, 0x41, + 0x54 }; + pnghdr[18] = (mz_uint8)(w >> 8); + pnghdr[19] = (mz_uint8)w; + pnghdr[22] = (mz_uint8)(h >> 8); + pnghdr[23] = (mz_uint8)h; + pnghdr[25] = chans[num_chans]; + pnghdr[33] = (mz_uint8)(*pLen_out >> 24); + pnghdr[34] = (mz_uint8)(*pLen_out >> 16); + pnghdr[35] = (mz_uint8)(*pLen_out >> 8); + pnghdr[36] = (mz_uint8)*pLen_out; + c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, pnghdr + 12, 17); + for (i = 0; i < 4; ++i, c <<= 8) + ((mz_uint8 *)(pnghdr + 29))[i] = (mz_uint8)(c >> 24); + memcpy(out_buf.m_pBuf, pnghdr, 41); + } + /* write footer (IDAT CRC-32, followed by IEND chunk) */ + if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) + { + *pLen_out = 0; + MZ_FREE(pComp); + MZ_FREE(out_buf.m_pBuf); + return NULL; + } + c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, out_buf.m_pBuf + 41 - 4, *pLen_out + 4); + for (i = 0; i < 4; ++i, c <<= 8) + (out_buf.m_pBuf + out_buf.m_size - 16)[i] = (mz_uint8)(c >> 24); + /* compute final size of file, grab compressed data buffer and return */ + *pLen_out += 57; + MZ_FREE(pComp); + return out_buf.m_pBuf; +} +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out) +{ + /* Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's where #defined out) */ + return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, pLen_out, 6, MZ_FALSE); +} + +#ifndef MINIZ_NO_MALLOC +/* Allocate the tdefl_compressor and tinfl_decompressor structures in C so that */ +/* non-C language bindings to tdefL_ and tinfl_ API don't need to worry about */ +/* structure size and allocation mechanism. */ +tdefl_compressor *tdefl_compressor_alloc() +{ + return (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); +} + +void tdefl_compressor_free(tdefl_compressor *pComp) +{ + MZ_FREE(pComp); +} +#endif + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#ifdef __cplusplus +} +#endif +/************************************************************************** + * + * Copyright 2013-2014 RAD Game Tools and Valve Software + * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + **************************************************************************/ + + + +#ifdef __cplusplus +extern "C" { +#endif + +/* ------------------- Low-level Decompression (completely independent from all compression API's) */ + +#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) +#define TINFL_MEMSET(p, c, l) memset(p, c, l) + +#define TINFL_CR_BEGIN \ + switch (r->m_state) \ + { \ + case 0: +#define TINFL_CR_RETURN(state_index, result) \ + do \ + { \ + status = result; \ + r->m_state = state_index; \ + goto common_exit; \ + case state_index:; \ + } \ + MZ_MACRO_END +#define TINFL_CR_RETURN_FOREVER(state_index, result) \ + do \ + { \ + for (;;) \ + { \ + TINFL_CR_RETURN(state_index, result); \ + } \ + } \ + MZ_MACRO_END +#define TINFL_CR_FINISH } + +#define TINFL_GET_BYTE(state_index, c) \ + do \ + { \ + while (pIn_buf_cur >= pIn_buf_end) \ + { \ + TINFL_CR_RETURN(state_index, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) ? TINFL_STATUS_NEEDS_MORE_INPUT : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS); \ + } \ + c = *pIn_buf_cur++; \ + } \ + MZ_MACRO_END + +#define TINFL_NEED_BITS(state_index, n) \ + do \ + { \ + mz_uint c; \ + TINFL_GET_BYTE(state_index, c); \ + bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ + num_bits += 8; \ + } while (num_bits < (mz_uint)(n)) +#define TINFL_SKIP_BITS(state_index, n) \ + do \ + { \ + if (num_bits < (mz_uint)(n)) \ + { \ + TINFL_NEED_BITS(state_index, n); \ + } \ + bit_buf >>= (n); \ + num_bits -= (n); \ + } \ + MZ_MACRO_END +#define TINFL_GET_BITS(state_index, b, n) \ + do \ + { \ + if (num_bits < (mz_uint)(n)) \ + { \ + TINFL_NEED_BITS(state_index, n); \ + } \ + b = bit_buf & ((1 << (n)) - 1); \ + bit_buf >>= (n); \ + num_bits -= (n); \ + } \ + MZ_MACRO_END + +/* TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2. */ +/* It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a */ +/* Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the */ +/* bit buffer contains >=15 bits (deflate's max. Huffman code size). */ +#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \ + do \ + { \ + temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ + if (temp >= 0) \ + { \ + code_len = temp >> 9; \ + if ((code_len) && (num_bits >= code_len)) \ + break; \ + } \ + else if (num_bits > TINFL_FAST_LOOKUP_BITS) \ + { \ + code_len = TINFL_FAST_LOOKUP_BITS; \ + do \ + { \ + temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ + } while ((temp < 0) && (num_bits >= (code_len + 1))); \ + if (temp >= 0) \ + break; \ + } \ + TINFL_GET_BYTE(state_index, c); \ + bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ + num_bits += 8; \ + } while (num_bits < 15); + +/* TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read */ +/* beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully */ +/* decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32. */ +/* The slow path is only executed at the very end of the input buffer. */ +/* v1.16: The original macro handled the case at the very end of the passed-in input buffer, but we also need to handle the case where the user passes in 1+zillion bytes */ +/* following the deflate data and our non-conservative read-ahead path won't kick in here on this code. This is much trickier. */ +#define TINFL_HUFF_DECODE(state_index, sym, pHuff) \ + do \ + { \ + int temp; \ + mz_uint code_len, c; \ + if (num_bits < 15) \ + { \ + if ((pIn_buf_end - pIn_buf_cur) < 2) \ + { \ + TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \ + } \ + else \ + { \ + bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); \ + pIn_buf_cur += 2; \ + num_bits += 16; \ + } \ + } \ + if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \ + code_len = temp >> 9, temp &= 511; \ + else \ + { \ + code_len = TINFL_FAST_LOOKUP_BITS; \ + do \ + { \ + temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ + } while (temp < 0); \ + } \ + sym = temp; \ + bit_buf >>= code_len; \ + num_bits -= code_len; \ + } \ + MZ_MACRO_END + +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags) +{ + static const int s_length_base[31] = { 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0 }; + static const int s_length_extra[31] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0 }; + static const int s_dist_base[32] = { 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0 }; + static const int s_dist_extra[32] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 }; + static const mz_uint8 s_length_dezigzag[19] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + static const int s_min_table_sizes[3] = { 257, 1, 4 }; + + tinfl_status status = TINFL_STATUS_FAILED; + mz_uint32 num_bits, dist, counter, num_extra; + tinfl_bit_buf_t bit_buf; + const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size; + mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size; + size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start; + + /* Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter). */ + if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start)) + { + *pIn_buf_size = *pOut_buf_size = 0; + return TINFL_STATUS_BAD_PARAM; + } + + num_bits = r->m_num_bits; + bit_buf = r->m_bit_buf; + dist = r->m_dist; + counter = r->m_counter; + num_extra = r->m_num_extra; + dist_from_out_buf_start = r->m_dist_from_out_buf_start; + TINFL_CR_BEGIN + + bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; + r->m_z_adler32 = r->m_check_adler32 = 1; + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + TINFL_GET_BYTE(1, r->m_zhdr0); + TINFL_GET_BYTE(2, r->m_zhdr1); + counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); + if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) + counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)(mz_uint32)(1U << (8U + (r->m_zhdr0 >> 4))))); + if (counter) + { + TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); + } + } + + do + { + TINFL_GET_BITS(3, r->m_final, 3); + r->m_type = r->m_final >> 1; + if (r->m_type == 0) + { + TINFL_SKIP_BITS(5, num_bits & 7); + for (counter = 0; counter < 4; ++counter) + { + if (num_bits) + TINFL_GET_BITS(6, r->m_raw_header[counter], 8); + else + TINFL_GET_BYTE(7, r->m_raw_header[counter]); + } + if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) + { + TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); + } + while ((counter) && (num_bits)) + { + TINFL_GET_BITS(51, dist, 8); + while (pOut_buf_cur >= pOut_buf_end) + { + TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); + } + *pOut_buf_cur++ = (mz_uint8)dist; + counter--; + } + while (counter) + { + size_t n; + while (pOut_buf_cur >= pOut_buf_end) + { + TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); + } + while (pIn_buf_cur >= pIn_buf_end) + { + TINFL_CR_RETURN(38, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) ? TINFL_STATUS_NEEDS_MORE_INPUT : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS); + } + n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter); + TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); + pIn_buf_cur += n; + pOut_buf_cur += n; + counter -= (mz_uint)n; + } + } + else if (r->m_type == 3) + { + TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); + } + else + { + if (r->m_type == 1) + { + mz_uint8 *p = r->m_tables[0].m_code_size; + mz_uint i; + r->m_table_sizes[0] = 288; + r->m_table_sizes[1] = 32; + TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); + for (i = 0; i <= 143; ++i) + *p++ = 8; + for (; i <= 255; ++i) + *p++ = 9; + for (; i <= 279; ++i) + *p++ = 7; + for (; i <= 287; ++i) + *p++ = 8; + } + else + { + for (counter = 0; counter < 3; counter++) + { + TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); + r->m_table_sizes[counter] += s_min_table_sizes[counter]; + } + MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); + for (counter = 0; counter < r->m_table_sizes[2]; counter++) + { + mz_uint s; + TINFL_GET_BITS(14, s, 3); + r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; + } + r->m_table_sizes[2] = 19; + } + for (; (int)r->m_type >= 0; r->m_type--) + { + int tree_next, tree_cur; + tinfl_huff_table *pTable; + mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; + pTable = &r->m_tables[r->m_type]; + MZ_CLEAR_OBJ(total_syms); + MZ_CLEAR_OBJ(pTable->m_look_up); + MZ_CLEAR_OBJ(pTable->m_tree); + for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) + total_syms[pTable->m_code_size[i]]++; + used_syms = 0, total = 0; + next_code[0] = next_code[1] = 0; + for (i = 1; i <= 15; ++i) + { + used_syms += total_syms[i]; + next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); + } + if ((65536 != total) && (used_syms > 1)) + { + TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); + } + for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index) + { + mz_uint rev_code = 0, l, cur_code, code_size = pTable->m_code_size[sym_index]; + if (!code_size) + continue; + cur_code = next_code[code_size]++; + for (l = code_size; l > 0; l--, cur_code >>= 1) + rev_code = (rev_code << 1) | (cur_code & 1); + if (code_size <= TINFL_FAST_LOOKUP_BITS) + { + mz_int16 k = (mz_int16)((code_size << 9) | sym_index); + while (rev_code < TINFL_FAST_LOOKUP_SIZE) + { + pTable->m_look_up[rev_code] = k; + rev_code += (1 << code_size); + } + continue; + } + if (0 == (tree_cur = pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)])) + { + pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next; + tree_cur = tree_next; + tree_next -= 2; + } + rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); + for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) + { + tree_cur -= ((rev_code >>= 1) & 1); + if (!pTable->m_tree[-tree_cur - 1]) + { + pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; + tree_cur = tree_next; + tree_next -= 2; + } + else + tree_cur = pTable->m_tree[-tree_cur - 1]; + } + tree_cur -= ((rev_code >>= 1) & 1); + pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index; + } + if (r->m_type == 2) + { + for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]);) + { + mz_uint s; + TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); + if (dist < 16) + { + r->m_len_codes[counter++] = (mz_uint8)dist; + continue; + } + if ((dist == 16) && (!counter)) + { + TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); + } + num_extra = "\02\03\07"[dist - 16]; + TINFL_GET_BITS(18, s, num_extra); + s += "\03\03\013"[dist - 16]; + TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); + counter += s; + } + if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) + { + TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); + } + TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]); + TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]); + } + } + for (;;) + { + mz_uint8 *pSrc; + for (;;) + { + if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2)) + { + TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]); + if (counter >= 256) + break; + while (pOut_buf_cur >= pOut_buf_end) + { + TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); + } + *pOut_buf_cur++ = (mz_uint8)counter; + } + else + { + int sym2; + mz_uint code_len; +#if TINFL_USE_64BIT_BITBUF + if (num_bits < 30) + { + bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); + pIn_buf_cur += 4; + num_bits += 32; + } +#else + if (num_bits < 15) + { + bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); + pIn_buf_cur += 2; + num_bits += 16; + } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; + do + { + sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; + } while (sym2 < 0); + } + counter = sym2; + bit_buf >>= code_len; + num_bits -= code_len; + if (counter & 256) + break; + +#if !TINFL_USE_64BIT_BITBUF + if (num_bits < 15) + { + bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); + pIn_buf_cur += 2; + num_bits += 16; + } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; + do + { + sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; + } while (sym2 < 0); + } + bit_buf >>= code_len; + num_bits -= code_len; + + pOut_buf_cur[0] = (mz_uint8)counter; + if (sym2 & 256) + { + pOut_buf_cur++; + counter = sym2; + break; + } + pOut_buf_cur[1] = (mz_uint8)sym2; + pOut_buf_cur += 2; + } + } + if ((counter &= 511) == 256) + break; + + num_extra = s_length_extra[counter - 257]; + counter = s_length_base[counter - 257]; + if (num_extra) + { + mz_uint extra_bits; + TINFL_GET_BITS(25, extra_bits, num_extra); + counter += extra_bits; + } + + TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]); + num_extra = s_dist_extra[dist]; + dist = s_dist_base[dist]; + if (num_extra) + { + mz_uint extra_bits; + TINFL_GET_BITS(27, extra_bits, num_extra); + dist += extra_bits; + } + + dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; + if ((dist > dist_from_out_buf_start) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) + { + TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); + } + + pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask); + + if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) + { + while (counter--) + { + while (pOut_buf_cur >= pOut_buf_end) + { + TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); + } + *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask]; + } + continue; + } +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + else if ((counter >= 9) && (counter <= dist)) + { + const mz_uint8 *pSrc_end = pSrc + (counter & ~7); + do + { +#ifdef MINIZ_UNALIGNED_USE_MEMCPY + memcpy(pOut_buf_cur, pSrc, sizeof(mz_uint32)*2); +#else + ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; + ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1]; +#endif + pOut_buf_cur += 8; + } while ((pSrc += 8) < pSrc_end); + if ((counter &= 7) < 3) + { + if (counter) + { + pOut_buf_cur[0] = pSrc[0]; + if (counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + continue; + } + } +#endif + while(counter>2) + { + pOut_buf_cur[0] = pSrc[0]; + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur[2] = pSrc[2]; + pOut_buf_cur += 3; + pSrc += 3; + counter -= 3; + } + if (counter > 0) + { + pOut_buf_cur[0] = pSrc[0]; + if (counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + } + } + } while (!(r->m_final & 1)); + + /* Ensure byte alignment and put back any bytes from the bitbuf if we've looked ahead too far on gzip, or other Deflate streams followed by arbitrary data. */ + /* I'm being super conservative here. A number of simplifications can be made to the byte alignment part, and the Adler32 check shouldn't ever need to worry about reading from the bitbuf now. */ + TINFL_SKIP_BITS(32, num_bits & 7); + while ((pIn_buf_cur > pIn_buf_next) && (num_bits >= 8)) + { + --pIn_buf_cur; + num_bits -= 8; + } + bit_buf &= (tinfl_bit_buf_t)((((mz_uint64)1) << num_bits) - (mz_uint64)1); + MZ_ASSERT(!num_bits); /* if this assert fires then we've read beyond the end of non-deflate/zlib streams with following data (such as gzip streams). */ + + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + for (counter = 0; counter < 4; ++counter) + { + mz_uint s; + if (num_bits) + TINFL_GET_BITS(41, s, 8); + else + TINFL_GET_BYTE(42, s); + r->m_z_adler32 = (r->m_z_adler32 << 8) | s; + } + } + TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); + + TINFL_CR_FINISH + +common_exit: + /* As long as we aren't telling the caller that we NEED more input to make forward progress: */ + /* Put back any bytes from the bitbuf in case we've looked ahead too far on gzip, or other Deflate streams followed by arbitrary data. */ + /* We need to be very careful here to NOT push back any bytes we definitely know we need to make forward progress, though, or we'll lock the caller up into an inf loop. */ + if ((status != TINFL_STATUS_NEEDS_MORE_INPUT) && (status != TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS)) + { + while ((pIn_buf_cur > pIn_buf_next) && (num_bits >= 8)) + { + --pIn_buf_cur; + num_bits -= 8; + } + } + r->m_num_bits = num_bits; + r->m_bit_buf = bit_buf & (tinfl_bit_buf_t)((((mz_uint64)1) << num_bits) - (mz_uint64)1); + r->m_dist = dist; + r->m_counter = counter; + r->m_num_extra = num_extra; + r->m_dist_from_out_buf_start = dist_from_out_buf_start; + *pIn_buf_size = pIn_buf_cur - pIn_buf_next; + *pOut_buf_size = pOut_buf_cur - pOut_buf_next; + if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0)) + { + const mz_uint8 *ptr = pOut_buf_next; + size_t buf_len = *pOut_buf_size; + mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; + size_t block_len = buf_len % 5552; + while (buf_len) + { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) + { + s1 += ptr[0], s2 += s1; + s1 += ptr[1], s2 += s1; + s1 += ptr[2], s2 += s1; + s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; + s1 += ptr[5], s2 += s1; + s1 += ptr[6], s2 += s1; + s1 += ptr[7], s2 += s1; + } + for (; i < block_len; ++i) + s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; + buf_len -= block_len; + block_len = 5552; + } + r->m_check_adler32 = (s2 << 16) + s1; + if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) + status = TINFL_STATUS_ADLER32_MISMATCH; + } + return status; +} + +/* Higher level helper functions. */ +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) +{ + tinfl_decompressor decomp; + void *pBuf = NULL, *pNew_buf; + size_t src_buf_ofs = 0, out_buf_capacity = 0; + *pOut_len = 0; + tinfl_init(&decomp); + for (;;) + { + size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8 *)pBuf, pBuf ? (mz_uint8 *)pBuf + *pOut_len : NULL, &dst_buf_size, + (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) + { + MZ_FREE(pBuf); + *pOut_len = 0; + return NULL; + } + src_buf_ofs += src_buf_size; + *pOut_len += dst_buf_size; + if (status == TINFL_STATUS_DONE) + break; + new_out_buf_capacity = out_buf_capacity * 2; + if (new_out_buf_capacity < 128) + new_out_buf_capacity = 128; + pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); + if (!pNew_buf) + { + MZ_FREE(pBuf); + *pOut_len = 0; + return NULL; + } + pBuf = pNew_buf; + out_buf_capacity = new_out_buf_capacity; + } + return pBuf; +} + +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) +{ + tinfl_decompressor decomp; + tinfl_status status; + tinfl_init(&decomp); + status = tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf, &src_buf_len, (mz_uint8 *)pOut_buf, (mz_uint8 *)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len; +} + +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + int result = 0; + tinfl_decompressor decomp; + mz_uint8 *pDict = (mz_uint8 *)MZ_MALLOC(TINFL_LZ_DICT_SIZE); + size_t in_buf_ofs = 0, dict_ofs = 0; + if (!pDict) + return TINFL_STATUS_FAILED; + tinfl_init(&decomp); + for (;;) + { + size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8 *)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, + (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); + in_buf_ofs += in_buf_size; + if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) + break; + if (status != TINFL_STATUS_HAS_MORE_OUTPUT) + { + result = (status == TINFL_STATUS_DONE); + break; + } + dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); + } + MZ_FREE(pDict); + *pIn_buf_size = in_buf_ofs; + return result; +} + +#ifndef MINIZ_NO_MALLOC +tinfl_decompressor *tinfl_decompressor_alloc() +{ + tinfl_decompressor *pDecomp = (tinfl_decompressor *)MZ_MALLOC(sizeof(tinfl_decompressor)); + if (pDecomp) + tinfl_init(pDecomp); + return pDecomp; +} + +void tinfl_decompressor_free(tinfl_decompressor *pDecomp) +{ + MZ_FREE(pDecomp); +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/neo/libs/optick/optick_miniz.h b/neo/libs/optick/optick_miniz.h new file mode 100644 index 00000000..8730509d --- /dev/null +++ b/neo/libs/optick/optick_miniz.h @@ -0,0 +1,940 @@ +/************************************************************************** + * + * Copyright 2013-2014 RAD Game Tools and Valve Software + * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + **************************************************************************/ + +/* miniz.c 2.1.0 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing + See "unlicense" statement at the end of this file. + Rich Geldreich , last updated Oct. 13, 2013 + Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt + + Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define + MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros). + + * Low-level Deflate/Inflate implementation notes: + + Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or + greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses + approximately as well as zlib. + + Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function + coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory + block large enough to hold the entire file. + + The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation. + + * zlib-style API notes: + + miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in + zlib replacement in many apps: + The z_stream struct, optional memory allocation callbacks + deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound + inflateInit/inflateInit2/inflate/inflateReset/inflateEnd + compress, compress2, compressBound, uncompress + CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines. + Supports raw deflate streams or standard zlib streams with adler-32 checking. + + Limitations: + The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries. + I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but + there are no guarantees that miniz.c pulls this off perfectly. + + * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by + Alex Evans. Supports 1-4 bytes/pixel images. + + * ZIP archive API notes: + + The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to + get the job done with minimal fuss. There are simple API's to retrieve file information, read files from + existing archives, create new archives, append new files to existing archives, or clone archive data from + one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h), + or you can specify custom file read/write callbacks. + + - Archive reading: Just call this function to read a single file from a disk archive: + + void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, + size_t *pSize, mz_uint zip_flags); + + For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central + directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files. + + - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file: + + int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); + + The locate operation can optionally check file comments too, which (as one example) can be used to identify + multiple versions of the same file in an archive. This function uses a simple linear search through the central + directory, so it's not very fast. + + Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and + retrieve detailed info on each file by calling mz_zip_reader_file_stat(). + + - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data + to disk and builds an exact image of the central directory in memory. The central directory image is written + all at once at the end of the archive file when the archive is finalized. + + The archive writer can optionally align each file's local header and file data to any power of 2 alignment, + which can be useful when the archive will be read from optical media. Also, the writer supports placing + arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still + readable by any ZIP tool. + + - Archive appending: The simple way to add a single file to an archive is to call this function: + + mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, + const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); + + The archive will be created if it doesn't already exist, otherwise it'll be appended to. + Note the appending is done in-place and is not an atomic operation, so if something goes wrong + during the operation it's possible the archive could be left without a central directory (although the local + file headers and file data will be fine, so the archive will be recoverable). + + For more complex archive modification scenarios: + 1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to + preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the + compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and + you're done. This is safe but requires a bunch of temporary disk space or heap memory. + + 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(), + append new files as needed, then finalize the archive which will write an updated central directory to the + original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a + possibility that the archive's central directory could be lost with this method if anything goes wrong, though. + + - ZIP archive support limitations: + No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files. + Requires streams capable of seeking. + + * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the + below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it. + + * Important: For best perf. be sure to customize the below macros for your target platform: + #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 + #define MINIZ_LITTLE_ENDIAN 1 + #define MINIZ_HAS_64BIT_REGISTERS 1 + + * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz + uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files + (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes). +*/ +#pragma once + +/////////////////////////// OPTICK MODIFICATIONS /////////////////////////////// +#include "optick.config.h" // + // +#define OPTICK_ENABLE_COMPRESSION (1) // + // +#define MINIZ_NO_MALLOC (1) // +#define MINIZ_NO_STDIO (1) // +#define MINIZ_NO_ARCHIVE_APIS (1) // +#define MINIZ_NO_TIME (1) // +//////////////////////////////////////////////////////////////////////////////// + + +#if OPTICK_ENABLE_COMPRESSION + +/* Defines to completely disable specific portions of miniz.c: + If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. */ + +/* Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. */ +/*#define MINIZ_NO_STDIO */ + +/* If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or */ +/* get/set file times, and the C run-time funcs that get/set times won't be called. */ +/* The current downside is the times written to your archives will be from 1979. */ +/*#define MINIZ_NO_TIME */ + +/* Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. */ +/*#define MINIZ_NO_ARCHIVE_APIS */ + +/* Define MINIZ_NO_ARCHIVE_WRITING_APIS to disable all writing related ZIP archive API's. */ +/*#define MINIZ_NO_ARCHIVE_WRITING_APIS */ + +/* Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. */ +/*#define MINIZ_NO_ZLIB_APIS */ + +/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. */ +/*#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES */ + +/* Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. + Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc + callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user + functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. */ +/*#define MINIZ_NO_MALLOC */ + +#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) +/* TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux */ +#define MINIZ_NO_TIME +#endif + +#include + +#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) +#include +#endif + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) +/* MINIZ_X86_OR_X64_CPU is only used to help set the below macros. */ +#define MINIZ_X86_OR_X64_CPU 1 +#else +#define MINIZ_X86_OR_X64_CPU 0 +#endif + +#if MINIZ_X86_OR_X64_CPU || (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +/* Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. */ +#define MINIZ_LITTLE_ENDIAN 1 +#else +#define MINIZ_LITTLE_ENDIAN 0 +#endif + +/* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES only if not set */ +#if !defined(MINIZ_USE_UNALIGNED_LOADS_AND_STORES) +#if MINIZ_X86_OR_X64_CPU +/* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. */ +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 +#define MINIZ_UNALIGNED_USE_MEMCPY +#else +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0 +#endif +#endif + +#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__) +/* Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). */ +#define MINIZ_HAS_64BIT_REGISTERS 1 +#else +#define MINIZ_HAS_64BIT_REGISTERS 0 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* ------------------- zlib-style API Definitions. */ + +/* For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! */ +typedef unsigned long mz_ulong; + +/* mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. */ +void mz_free(void *p); + +#define MZ_ADLER32_INIT (1) +/* mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. */ +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); + +#define MZ_CRC32_INIT (0) +/* mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. */ +mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); + +/* Compression strategies. */ +enum +{ + MZ_DEFAULT_STRATEGY = 0, + MZ_FILTERED = 1, + MZ_HUFFMAN_ONLY = 2, + MZ_RLE = 3, + MZ_FIXED = 4 +}; + +/* Method */ +#define MZ_DEFLATED 8 + +/* Heap allocation callbacks. +Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long. */ +typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); +typedef void (*mz_free_func)(void *opaque, void *address); +typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size); + +/* Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. */ +enum +{ + MZ_NO_COMPRESSION = 0, + MZ_BEST_SPEED = 1, + MZ_BEST_COMPRESSION = 9, + MZ_UBER_COMPRESSION = 10, + MZ_DEFAULT_LEVEL = 6, + MZ_DEFAULT_COMPRESSION = -1 +}; + +#define MZ_VERSION "10.1.0" +#define MZ_VERNUM 0xA100 +#define MZ_VER_MAJOR 10 +#define MZ_VER_MINOR 1 +#define MZ_VER_REVISION 0 +#define MZ_VER_SUBREVISION 0 + +#ifndef MINIZ_NO_ZLIB_APIS + +/* Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). */ +enum +{ + MZ_NO_FLUSH = 0, + MZ_PARTIAL_FLUSH = 1, + MZ_SYNC_FLUSH = 2, + MZ_FULL_FLUSH = 3, + MZ_FINISH = 4, + MZ_BLOCK = 5 +}; + +/* Return status codes. MZ_PARAM_ERROR is non-standard. */ +enum +{ + MZ_OK = 0, + MZ_STREAM_END = 1, + MZ_NEED_DICT = 2, + MZ_ERRNO = -1, + MZ_STREAM_ERROR = -2, + MZ_DATA_ERROR = -3, + MZ_MEM_ERROR = -4, + MZ_BUF_ERROR = -5, + MZ_VERSION_ERROR = -6, + MZ_PARAM_ERROR = -10000 +}; + +/* Window bits */ +#define MZ_DEFAULT_WINDOW_BITS 15 + +struct mz_internal_state; + +/* Compression/decompression stream struct. */ +typedef struct mz_stream_s +{ + const unsigned char *next_in; /* pointer to next byte to read */ + unsigned int avail_in; /* number of bytes available at next_in */ + mz_ulong total_in; /* total number of bytes consumed so far */ + + unsigned char *next_out; /* pointer to next byte to write */ + unsigned int avail_out; /* number of bytes that can be written to next_out */ + mz_ulong total_out; /* total number of bytes produced so far */ + + char *msg; /* error msg (unused) */ + struct mz_internal_state *state; /* internal state, allocated by zalloc/zfree */ + + mz_alloc_func zalloc; /* optional heap allocation function (defaults to malloc) */ + mz_free_func zfree; /* optional heap free function (defaults to free) */ + void *opaque; /* heap alloc function user pointer */ + + int data_type; /* data_type (unused) */ + mz_ulong adler; /* adler32 of the source or uncompressed data */ + mz_ulong reserved; /* not used */ +} mz_stream; + +typedef mz_stream *mz_streamp; + +/* Returns the version string of miniz.c. */ +const char *mz_version(void); + +/* mz_deflateInit() initializes a compressor with default options: */ +/* Parameters: */ +/* pStream must point to an initialized mz_stream struct. */ +/* level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. */ +/* level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. */ +/* (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) */ +/* Return values: */ +/* MZ_OK on success. */ +/* MZ_STREAM_ERROR if the stream is bogus. */ +/* MZ_PARAM_ERROR if the input parameters are bogus. */ +/* MZ_MEM_ERROR on out of memory. */ +int mz_deflateInit(mz_streamp pStream, int level); + +/* mz_deflateInit2() is like mz_deflate(), except with more control: */ +/* Additional parameters: */ +/* method must be MZ_DEFLATED */ +/* window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) */ +/* mem_level must be between [1, 9] (it's checked but ignored by miniz.c) */ +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy); + +/* Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). */ +int mz_deflateReset(mz_streamp pStream); + +/* mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. */ +/* Parameters: */ +/* pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */ +/* flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. */ +/* Return values: */ +/* MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). */ +/* MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. */ +/* MZ_STREAM_ERROR if the stream is bogus. */ +/* MZ_PARAM_ERROR if one of the parameters is invalid. */ +/* MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) */ +int mz_deflate(mz_streamp pStream, int flush); + +/* mz_deflateEnd() deinitializes a compressor: */ +/* Return values: */ +/* MZ_OK on success. */ +/* MZ_STREAM_ERROR if the stream is bogus. */ +int mz_deflateEnd(mz_streamp pStream); + +/* mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. */ +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); + +/* Single-call compression functions mz_compress() and mz_compress2(): */ +/* Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. */ +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level); + +/* mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). */ +mz_ulong mz_compressBound(mz_ulong source_len); + +/* Initializes a decompressor. */ +int mz_inflateInit(mz_streamp pStream); + +/* mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: */ +/* window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). */ +int mz_inflateInit2(mz_streamp pStream, int window_bits); + +/* Quickly resets a compressor without having to reallocate anything. Same as calling mz_inflateEnd() followed by mz_inflateInit()/mz_inflateInit2(). */ +int mz_inflateReset(mz_streamp pStream); + +/* Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. */ +/* Parameters: */ +/* pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */ +/* flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. */ +/* On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). */ +/* MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. */ +/* Return values: */ +/* MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. */ +/* MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. */ +/* MZ_STREAM_ERROR if the stream is bogus. */ +/* MZ_DATA_ERROR if the deflate stream is invalid. */ +/* MZ_PARAM_ERROR if one of the parameters is invalid. */ +/* MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again */ +/* with more input data, or with more room in the output buffer (except when using single call decompression, described above). */ +int mz_inflate(mz_streamp pStream, int flush); + +/* Deinitializes a decompressor. */ +int mz_inflateEnd(mz_streamp pStream); + +/* Single-call decompression. */ +/* Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. */ +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); + +/* Returns a string description of the specified error code, or NULL if the error code is invalid. */ +const char *mz_error(int err); + +/* Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. */ +/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. */ +#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES +typedef unsigned char Byte; +typedef unsigned int uInt; +typedef mz_ulong uLong; +typedef Byte Bytef; +typedef uInt uIntf; +typedef char charf; +typedef int intf; +typedef void *voidpf; +typedef uLong uLongf; +typedef void *voidp; +typedef void *const voidpc; +#define Z_NULL 0 +#define Z_NO_FLUSH MZ_NO_FLUSH +#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH +#define Z_SYNC_FLUSH MZ_SYNC_FLUSH +#define Z_FULL_FLUSH MZ_FULL_FLUSH +#define Z_FINISH MZ_FINISH +#define Z_BLOCK MZ_BLOCK +#define Z_OK MZ_OK +#define Z_STREAM_END MZ_STREAM_END +#define Z_NEED_DICT MZ_NEED_DICT +#define Z_ERRNO MZ_ERRNO +#define Z_STREAM_ERROR MZ_STREAM_ERROR +#define Z_DATA_ERROR MZ_DATA_ERROR +#define Z_MEM_ERROR MZ_MEM_ERROR +#define Z_BUF_ERROR MZ_BUF_ERROR +#define Z_VERSION_ERROR MZ_VERSION_ERROR +#define Z_PARAM_ERROR MZ_PARAM_ERROR +#define Z_NO_COMPRESSION MZ_NO_COMPRESSION +#define Z_BEST_SPEED MZ_BEST_SPEED +#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION +#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION +#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY +#define Z_FILTERED MZ_FILTERED +#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY +#define Z_RLE MZ_RLE +#define Z_FIXED MZ_FIXED +#define Z_DEFLATED MZ_DEFLATED +#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS +#define alloc_func mz_alloc_func +#define free_func mz_free_func +#define internal_state mz_internal_state +#define z_stream mz_stream +#define deflateInit mz_deflateInit +#define deflateInit2 mz_deflateInit2 +#define deflateReset mz_deflateReset +#define deflate mz_deflate +#define deflateEnd mz_deflateEnd +#define deflateBound mz_deflateBound +#define compress mz_compress +#define compress2 mz_compress2 +#define compressBound mz_compressBound +#define inflateInit mz_inflateInit +#define inflateInit2 mz_inflateInit2 +#define inflateReset mz_inflateReset +#define inflate mz_inflate +#define inflateEnd mz_inflateEnd +#define uncompress mz_uncompress +#define crc32 mz_crc32 +#define adler32 mz_adler32 +#define MAX_WBITS 15 +#define MAX_MEM_LEVEL 9 +#define zError mz_error +#define ZLIB_VERSION MZ_VERSION +#define ZLIB_VERNUM MZ_VERNUM +#define ZLIB_VER_MAJOR MZ_VER_MAJOR +#define ZLIB_VER_MINOR MZ_VER_MINOR +#define ZLIB_VER_REVISION MZ_VER_REVISION +#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION +#define zlibVersion mz_version +#define zlib_version mz_version() +#endif /* #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES */ + +#endif /* MINIZ_NO_ZLIB_APIS */ + +#ifdef __cplusplus +} +#endif +#pragma once +#include +#include +#include +#include + +/* ------------------- Types and macros */ +typedef unsigned char mz_uint8; +typedef signed short mz_int16; +typedef unsigned short mz_uint16; +typedef unsigned int mz_uint32; +typedef unsigned int mz_uint; +typedef int64_t mz_int64; +typedef uint64_t mz_uint64; +typedef int mz_bool; + +#define MZ_FALSE (0) +#define MZ_TRUE (1) + +/* Works around MSVC's spammy "warning C4127: conditional expression is constant" message. */ +#ifdef _MSC_VER +#define MZ_MACRO_END while (0, 0) +#else +#define MZ_MACRO_END while (0) +#endif + +#ifdef MINIZ_NO_STDIO +#define MZ_FILE void * +#else +#include +#define MZ_FILE FILE +#endif /* #ifdef MINIZ_NO_STDIO */ + +#ifdef MINIZ_NO_TIME +typedef struct mz_dummy_time_t_tag +{ + int m_dummy; +} mz_dummy_time_t; +#define MZ_TIME_T mz_dummy_time_t +#else +#define MZ_TIME_T time_t +#endif + +#define MZ_ASSERT(x) assert(x) + +#ifdef MINIZ_NO_MALLOC +#define MZ_MALLOC(x) NULL +#define MZ_FREE(x) (void)x, ((void)0) +#define MZ_REALLOC(p, x) NULL +#else +#define MZ_MALLOC(x) malloc(x) +#define MZ_FREE(x) free(x) +#define MZ_REALLOC(p, x) realloc(p, x) +#endif + +#define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN +#define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) +#define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) +#else +#define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) +#define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) +#endif + +#define MZ_READ_LE64(p) (((mz_uint64)MZ_READ_LE32(p)) | (((mz_uint64)MZ_READ_LE32((const mz_uint8 *)(p) + sizeof(mz_uint32))) << 32U)) + +#ifdef _MSC_VER +#define MZ_FORCEINLINE __forceinline +#elif defined(__GNUC__) +#define MZ_FORCEINLINE __inline__ __attribute__((__always_inline__)) +#else +#define MZ_FORCEINLINE inline +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +extern void *miniz_def_alloc_func(void *opaque, size_t items, size_t size); +extern void miniz_def_free_func(void *opaque, void *address); +extern void *miniz_def_realloc_func(void *opaque, void *address, size_t items, size_t size); + +#define MZ_UINT16_MAX (0xFFFFU) +#define MZ_UINT32_MAX (0xFFFFFFFFU) + +#ifdef __cplusplus +} +#endif +#pragma once + + +#ifdef __cplusplus +extern "C" { +#endif +/* ------------------- Low-level Compression API Definitions */ + +/* Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). */ +#define TDEFL_LESS_MEMORY 0 + +/* tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): */ +/* TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). */ +enum +{ + TDEFL_HUFFMAN_ONLY = 0, + TDEFL_DEFAULT_MAX_PROBES = 128, + TDEFL_MAX_PROBES_MASK = 0xFFF +}; + +/* TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. */ +/* TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). */ +/* TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. */ +/* TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). */ +/* TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) */ +/* TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. */ +/* TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. */ +/* TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. */ +/* The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK). */ +enum +{ + TDEFL_WRITE_ZLIB_HEADER = 0x01000, + TDEFL_COMPUTE_ADLER32 = 0x02000, + TDEFL_GREEDY_PARSING_FLAG = 0x04000, + TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, + TDEFL_RLE_MATCHES = 0x10000, + TDEFL_FILTER_MATCHES = 0x20000, + TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, + TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 +}; + +/* High level compression functions: */ +/* tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). */ +/* On entry: */ +/* pSrc_buf, src_buf_len: Pointer and size of source block to compress. */ +/* flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. */ +/* On return: */ +/* Function returns a pointer to the compressed data, or NULL on failure. */ +/* *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. */ +/* The caller must free() the returned block when it's no longer needed. */ +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + +/* tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. */ +/* Returns 0 on failure. */ +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + +/* Compresses an image to a compressed PNG file in memory. */ +/* On entry: */ +/* pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. */ +/* The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. */ +/* level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL */ +/* If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). */ +/* On return: */ +/* Function returns a pointer to the compressed data, or NULL on failure. */ +/* *pLen_out will be set to the size of the PNG image file. */ +/* The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. */ +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip); +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out); + +/* Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. */ +typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); + +/* tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. */ +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +enum +{ + TDEFL_MAX_HUFF_TABLES = 3, + TDEFL_MAX_HUFF_SYMBOLS_0 = 288, + TDEFL_MAX_HUFF_SYMBOLS_1 = 32, + TDEFL_MAX_HUFF_SYMBOLS_2 = 19, + TDEFL_LZ_DICT_SIZE = 32768, + TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, + TDEFL_MIN_MATCH_LEN = 3, + TDEFL_MAX_MATCH_LEN = 258 +}; + +/* TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). */ +#if TDEFL_LESS_MEMORY +enum +{ + TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, + TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, + TDEFL_MAX_HUFF_SYMBOLS = 288, + TDEFL_LZ_HASH_BITS = 12, + TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, + TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, + TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS +}; +#else +enum +{ + TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, + TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, + TDEFL_MAX_HUFF_SYMBOLS = 288, + TDEFL_LZ_HASH_BITS = 15, + TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, + TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, + TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS +}; +#endif + +/* The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. */ +typedef enum { + TDEFL_STATUS_BAD_PARAM = -2, + TDEFL_STATUS_PUT_BUF_FAILED = -1, + TDEFL_STATUS_OKAY = 0, + TDEFL_STATUS_DONE = 1 +} tdefl_status; + +/* Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums */ +typedef enum { + TDEFL_NO_FLUSH = 0, + TDEFL_SYNC_FLUSH = 2, + TDEFL_FULL_FLUSH = 3, + TDEFL_FINISH = 4 +} tdefl_flush; + +/* tdefl's compression state structure. */ +typedef struct +{ + tdefl_put_buf_func_ptr m_pPut_buf_func; + void *m_pPut_buf_user; + mz_uint m_flags, m_max_probes[2]; + int m_greedy_parsing; + mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; + mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; + mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer; + mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish; + tdefl_status m_prev_return_status; + const void *m_pIn_buf; + void *m_pOut_buf; + size_t *m_pIn_buf_size, *m_pOut_buf_size; + tdefl_flush m_flush; + const mz_uint8 *m_pSrc; + size_t m_src_buf_left, m_out_buf_ofs; + mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; + mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; + mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; + mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; + mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; +} tdefl_compressor; + +/* Initializes the compressor. */ +/* There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. */ +/* pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. */ +/* If pBut_buf_func is NULL the user should always call the tdefl_compress() API. */ +/* flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) */ +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +/* Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. */ +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush); + +/* tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. */ +/* tdefl_compress_buffer() always consumes the entire input buffer. */ +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush); + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); +mz_uint32 tdefl_get_adler32(tdefl_compressor *d); + +/* Create tdefl_compress() flags given zlib-style compression parameters. */ +/* level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) */ +/* window_bits may be -15 (raw deflate) or 15 (zlib) */ +/* strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED */ +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy); + +#ifndef MINIZ_NO_MALLOC +/* Allocate the tdefl_compressor structure in C so that */ +/* non-C language bindings to tdefl_ API don't need to worry about */ +/* structure size and allocation mechanism. */ +tdefl_compressor *tdefl_compressor_alloc(void); +void tdefl_compressor_free(tdefl_compressor *pComp); +#endif + +#ifdef __cplusplus +} +#endif +#pragma once + +/* ------------------- Low-level Decompression API Definitions */ + +#ifdef __cplusplus +extern "C" { +#endif +/* Decompression flags used by tinfl_decompress(). */ +/* TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. */ +/* TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. */ +/* TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). */ +/* TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. */ +enum +{ + TINFL_FLAG_PARSE_ZLIB_HEADER = 1, + TINFL_FLAG_HAS_MORE_INPUT = 2, + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, + TINFL_FLAG_COMPUTE_ADLER32 = 8 +}; + +/* High level decompression functions: */ +/* tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). */ +/* On entry: */ +/* pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. */ +/* On return: */ +/* Function returns a pointer to the decompressed data, or NULL on failure. */ +/* *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. */ +/* The caller must call mz_free() on the returned block when it's no longer needed. */ +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + +/* tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. */ +/* Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. */ +#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + +/* tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. */ +/* Returns 1 on success or 0 on failure. */ +typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +struct tinfl_decompressor_tag; +typedef struct tinfl_decompressor_tag tinfl_decompressor; + +#ifndef MINIZ_NO_MALLOC +/* Allocate the tinfl_decompressor structure in C so that */ +/* non-C language bindings to tinfl_ API don't need to worry about */ +/* structure size and allocation mechanism. */ +tinfl_decompressor *tinfl_decompressor_alloc(void); +void tinfl_decompressor_free(tinfl_decompressor *pDecomp); +#endif + +/* Max size of LZ dictionary. */ +#define TINFL_LZ_DICT_SIZE 32768 + +/* Return status. */ +typedef enum { + /* This flags indicates the inflator needs 1 or more input bytes to make forward progress, but the caller is indicating that no more are available. The compressed data */ + /* is probably corrupted. If you call the inflator again with more bytes it'll try to continue processing the input but this is a BAD sign (either the data is corrupted or you called it incorrectly). */ + /* If you call it again with no input you'll just get TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS again. */ + TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS = -4, + + /* This flag indicates that one or more of the input parameters was obviously bogus. (You can try calling it again, but if you get this error the calling code is wrong.) */ + TINFL_STATUS_BAD_PARAM = -3, + + /* This flags indicate the inflator is finished but the adler32 check of the uncompressed data didn't match. If you call it again it'll return TINFL_STATUS_DONE. */ + TINFL_STATUS_ADLER32_MISMATCH = -2, + + /* This flags indicate the inflator has somehow failed (bad code, corrupted input, etc.). If you call it again without resetting via tinfl_init() it it'll just keep on returning the same status failure code. */ + TINFL_STATUS_FAILED = -1, + + /* Any status code less than TINFL_STATUS_DONE must indicate a failure. */ + + /* This flag indicates the inflator has returned every byte of uncompressed data that it can, has consumed every byte that it needed, has successfully reached the end of the deflate stream, and */ + /* if zlib headers and adler32 checking enabled that it has successfully checked the uncompressed data's adler32. If you call it again you'll just get TINFL_STATUS_DONE over and over again. */ + TINFL_STATUS_DONE = 0, + + /* This flag indicates the inflator MUST have more input data (even 1 byte) before it can make any more forward progress, or you need to clear the TINFL_FLAG_HAS_MORE_INPUT */ + /* flag on the next call if you don't have any more source data. If the source data was somehow corrupted it's also possible (but unlikely) for the inflator to keep on demanding input to */ + /* proceed, so be sure to properly set the TINFL_FLAG_HAS_MORE_INPUT flag. */ + TINFL_STATUS_NEEDS_MORE_INPUT = 1, + + /* This flag indicates the inflator definitely has 1 or more bytes of uncompressed data available, but it cannot write this data into the output buffer. */ + /* Note if the source compressed data was corrupted it's possible for the inflator to return a lot of uncompressed data to the caller. I've been assuming you know how much uncompressed data to expect */ + /* (either exact or worst case) and will stop calling the inflator and fail after receiving too much. In pure streaming scenarios where you have no idea how many bytes to expect this may not be possible */ + /* so I may need to add some code to address this. */ + TINFL_STATUS_HAS_MORE_OUTPUT = 2 +} tinfl_status; + +/* Initializes the decompressor to its initial state. */ +#define tinfl_init(r) \ + do \ + { \ + (r)->m_state = 0; \ + } \ + MZ_MACRO_END +#define tinfl_get_adler32(r) (r)->m_check_adler32 + +/* Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. */ +/* This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. */ +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags); + +/* Internal/private bits follow. */ +enum +{ + TINFL_MAX_HUFF_TABLES = 3, + TINFL_MAX_HUFF_SYMBOLS_0 = 288, + TINFL_MAX_HUFF_SYMBOLS_1 = 32, + TINFL_MAX_HUFF_SYMBOLS_2 = 19, + TINFL_FAST_LOOKUP_BITS = 10, + TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS +}; + +typedef struct +{ + mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; + mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; +} tinfl_huff_table; + +#if MINIZ_HAS_64BIT_REGISTERS +#define TINFL_USE_64BIT_BITBUF 1 +#else +#define TINFL_USE_64BIT_BITBUF 0 +#endif + +#if TINFL_USE_64BIT_BITBUF +typedef mz_uint64 tinfl_bit_buf_t; +#define TINFL_BITBUF_SIZE (64) +#else +typedef mz_uint32 tinfl_bit_buf_t; +#define TINFL_BITBUF_SIZE (32) +#endif + +struct tinfl_decompressor_tag +{ + mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES]; + tinfl_bit_buf_t m_bit_buf; + size_t m_dist_from_out_buf_start; + tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; + mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; +}; + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/neo/libs/optick/optick_serialization.cpp b/neo/libs/optick/optick_serialization.cpp new file mode 100644 index 00000000..76875e9d --- /dev/null +++ b/neo/libs/optick/optick_serialization.cpp @@ -0,0 +1,197 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "optick_serialization.h" + +#if USE_OPTICK + +#include "optick_common.h" + +namespace Optick +{ + string OutputDataStream::GetData() + { + flush(); + return str(); + } + + OutputDataStream & OutputDataStream::Write(const char * buffer, size_t size) + { + write(buffer, size); + return *this; + } + + OutputDataStream &operator << ( OutputDataStream &stream, const char* val ) + { + uint32 length = val == nullptr ? 0 : (uint32)strlen(val); + stream << length; + + if (length > 0) + { + stream.write( val, length ); + } + return stream; + } + + OutputDataStream &operator << ( OutputDataStream &stream, int val ) + { + stream.write( (char*)&val, sizeof(int) ); + return stream; + } + + OutputDataStream &operator << ( OutputDataStream &stream, int64 val ) + { + stream.write( (char*)&val, sizeof(int64) ); + return stream; + } + + OutputDataStream &operator << ( OutputDataStream &stream, char val ) + { + stream.write( (char*)&val, sizeof(char) ); + return stream; + } + + OutputDataStream &operator << (OutputDataStream &stream, int8 val) + { + stream.write((char*)&val, sizeof(val)); + return stream; + } + + OutputDataStream &operator << ( OutputDataStream &stream, byte val ) + { + stream.write( (char*)&val, sizeof(byte) ); + return stream; + } + + OutputDataStream & operator<<(OutputDataStream &stream, uint64 val) + { + stream.write( (char*)&val, sizeof(uint64) ); + return stream; + } + + OutputDataStream & operator<<(OutputDataStream &stream, uint32 val) + { + stream.write( (char*)&val, sizeof(uint32) ); + return stream; + } + + OutputDataStream & operator<<(OutputDataStream &stream, float val) + { + stream.write((char*)&val, sizeof(float)); + return stream; + } + + OutputDataStream & operator<<(OutputDataStream &stream, const string& val) + { + stream << (uint32)val.length(); + if (!val.empty()) + stream.write(&val[0], sizeof(val[0]) * val.length()); + return stream; + } + + OutputDataStream & operator<<(OutputDataStream &stream, const wstring& val) + { + size_t count = val.length() * sizeof(wchar_t); + stream << (uint32)count; + if (!val.empty()) + stream.write((char*)(&val[0]), count); + return stream; + } + + InputDataStream &operator >> (InputDataStream &stream, int16 &val) + { + stream.read((char*)&val, sizeof(int16)); + return stream; + } + + InputDataStream &operator >> ( InputDataStream &stream, int32 &val ) + { + stream.read( (char*)&val, sizeof(int) ); + return stream; + } + + InputDataStream &operator >> ( InputDataStream &stream, int64 &val ) + { + stream.read( (char*)&val, sizeof(int64) ); + return stream; + } + + InputDataStream & operator>>( InputDataStream &stream, byte &val ) + { + stream.read( (char*)&val, sizeof(byte) ); + return stream; + } + + InputDataStream & operator >> (InputDataStream &stream, uint16 &val) + { + stream.read((char*)&val, sizeof(uint16)); + return stream; + } + + InputDataStream & operator>>( InputDataStream &stream, uint32 &val ) + { + stream.read( (char*)&val, sizeof(uint32) ); + return stream; + } + + InputDataStream & operator>>( InputDataStream &stream, uint64 &val ) + { + stream.read( (char*)&val, sizeof(uint64) ); + return stream; + } + + InputDataStream & operator >> ( InputDataStream &stream, string &val) + { + int32 length = 0; + stream >> length; + val.resize(length + 1); + stream.read( (char*)&val[0], length); + return stream; + } + + InputDataStream::InputDataStream() : + stringstream( ios_base::in | ios_base::out ) + { + } + + void InputDataStream::Append(const char *buffer, size_t length) + { + write( buffer, length ); + } + + size_t InputDataStream::Length() + { + return (size_t)(tellp() - tellg()); + } + + bool InputDataStream::Skip(size_t length) + { + bool result = Length() <= length; + seekg(length, ios_base::cur); + return result; + } + + + +} + +#endif //USE_OPTICK \ No newline at end of file diff --git a/neo/libs/optick/optick_serialization.h b/neo/libs/optick/optick_serialization.h new file mode 100644 index 00000000..f6021f50 --- /dev/null +++ b/neo/libs/optick/optick_serialization.h @@ -0,0 +1,141 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#include "optick_common.h" + +#if USE_OPTICK +#include "optick_memory.h" + +#if defined(OPTICK_MSVC) +#pragma warning( push ) + +//C4250. inherits 'std::basic_ostream' +#pragma warning( disable : 4250 ) + +//C4127. Conditional expression is constant +#pragma warning( disable : 4127 ) +#endif + +namespace Optick +{ + class OutputDataStream : private ostringstream + { + public: + // Move constructor rocks! + // Beware of one copy here(do not use it in performance critical parts) + string GetData(); + + // It is important to make private inheritance in order to avoid collision with default operator implementation + friend OutputDataStream &operator << ( OutputDataStream &stream, const char* val ); + friend OutputDataStream &operator << ( OutputDataStream &stream, int val ); + friend OutputDataStream &operator << ( OutputDataStream &stream, uint64 val ); + friend OutputDataStream &operator << ( OutputDataStream &stream, uint32 val ); + friend OutputDataStream &operator << ( OutputDataStream &stream, int64 val ); + friend OutputDataStream &operator << ( OutputDataStream &stream, char val ); + friend OutputDataStream &operator << ( OutputDataStream &stream, byte val ); + friend OutputDataStream &operator << ( OutputDataStream &stream, int8 val); + friend OutputDataStream &operator << ( OutputDataStream &stream, float val); + friend OutputDataStream &operator << ( OutputDataStream &stream, const string& val ); + friend OutputDataStream &operator << ( OutputDataStream &stream, const wstring& val ); + + OutputDataStream& Write(const char* buffer, size_t size); + }; + + template + OutputDataStream& operator<<(OutputDataStream &stream, const vector& val) + { + stream << (uint32)val.size(); + + for(auto it = val.begin(); it != val.end(); ++it) + { + const T& element = *it; + stream << element; + } + + return stream; + } + + template + OutputDataStream& operator<<(OutputDataStream &stream, const MemoryPool& val) + { + stream << (uint32)val.Size(); + + val.ForEach([&](const T& data) + { + stream << data; + }); + + return stream; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + class InputDataStream : private stringstream { + public: + bool CanRead() { return !eof(); } + + InputDataStream(); + + void Append(const char *buffer, size_t length); + bool Skip(size_t length); + size_t Length(); + + template + bool Peek(T& data) + { + if (Length() < sizeof(T)) + return false; + + pos_type currentPos = tellg(); + read((char*)&data, sizeof(T)); + seekg(currentPos); + return true; + } + + template + bool Read(T& data) + { + if (Length() < sizeof(T)) + return false; + + read((char*)&data, sizeof(T)); + return true; + } + + friend InputDataStream &operator >> (InputDataStream &stream, byte &val ); + friend InputDataStream &operator >> (InputDataStream &stream, int16 &val); + friend InputDataStream &operator >> (InputDataStream &stream, uint16 &val); + friend InputDataStream &operator >> (InputDataStream &stream, int32 &val ); + friend InputDataStream &operator >> (InputDataStream &stream, uint32 &val ); + friend InputDataStream &operator >> (InputDataStream &stream, int64 &val ); + friend InputDataStream &operator >> (InputDataStream &stream, uint64 &val ); + friend InputDataStream &operator >> (InputDataStream &stream, string &val); + }; + + +} + +#if defined(OPTICK_MSVC) +#pragma warning( pop ) +#endif + +#endif //USE_OPTICK \ No newline at end of file diff --git a/neo/libs/optick/optick_server.cpp b/neo/libs/optick/optick_server.cpp new file mode 100644 index 00000000..cf93d5aa --- /dev/null +++ b/neo/libs/optick/optick_server.cpp @@ -0,0 +1,502 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "optick_server.h" + +#if USE_OPTICK +#include "optick_common.h" +#include "optick_miniz.h" + +#if defined(OPTICK_MSVC) +#define USE_WINDOWS_SOCKETS (1) +#else +#define USE_BERKELEY_SOCKETS (1) +#endif +#define SOCKET_PROTOCOL_TCP (6) +#if defined(USE_BERKELEY_SOCKETS) +#include +#include +#include +#include +#include +#include +typedef int TcpSocket; +#elif defined(USE_WINDOWS_SOCKETS) +#include +#include +typedef UINT_PTR TcpSocket; +#else +#error Platform not supported +#endif + + +#if defined(OPTICK_MSVC) +#pragma comment( lib, "ws2_32.lib" ) +#endif + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +static const short DEFAULT_PORT = 31318; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(USE_WINDOWS_SOCKETS) +class Wsa +{ + bool isInitialized; + WSADATA data; + + Wsa() + { + isInitialized = WSAStartup(0x0202, &data) == ERROR_SUCCESS; + OPTICK_ASSERT(isInitialized, "Can't initialize WSA"); + } + + ~Wsa() + { + if (isInitialized) + { + WSACleanup(); + } + } +public: + static bool Init() + { + static Wsa wsa; + return wsa.isInitialized; + } +}; +#endif + + +inline bool IsValidSocket(TcpSocket socket) +{ +#if defined(USE_WINDOWS_SOCKETS) + if (socket == INVALID_SOCKET) + { + return false; + } +#else + if (socket < 0) + { + return false; + } +#endif + return true; +} + +inline void CloseSocket(TcpSocket& socket) +{ +#if defined(USE_WINDOWS_SOCKETS) + closesocket(socket); + socket = INVALID_SOCKET; +#else + close(socket); + socket = -1; +#endif +} + +inline bool SetSocketBlockingMode(TcpSocket socket, bool isBlocking) +{ +#if defined(USE_WINDOWS_SOCKETS) + unsigned long mode = isBlocking ? 0 : 1; + return (ioctlsocket(socket, FIONBIO, &mode) == 0) ? true : false; +#else +#if defined(OPTICK_OSX) || defined(OPTICK_LINUX) + int flags = fcntl(socket, F_GETFL, 0); + if (flags < 0) return false; + flags = isBlocking ? (flags & ~O_NONBLOCK) : (flags | O_NONBLOCK); + return (fcntl(socket, F_SETFL, flags) == 0) ? true : false; +#else + int nonblocking = isBlocking ? 0 : 1; + return setsockopt((int)socket, SOL_SOCKET, 0x1200, (char*)&nonblocking, sizeof(nonblocking)) == 0; +#endif +#endif +} + + +class Socket +{ + TcpSocket acceptSocket; + TcpSocket listenSocket; + sockaddr_in address; + + fd_set recieveSet; + + std::recursive_mutex socketLock; + wstring errorMessage; + + void Close() + { + if (!IsValidSocket(listenSocket)) + { + CloseSocket(listenSocket); + } + } + + bool Bind(short port) + { + address.sin_family = AF_INET; + address.sin_addr.s_addr = INADDR_ANY; + address.sin_port = htons(port); + + if (::bind(listenSocket, (sockaddr *)&address, sizeof(address)) == 0) + { + return true; + } + + return false; + } + + void Disconnect() + { + std::lock_guard lock(socketLock); + + if (!IsValidSocket(acceptSocket)) + { + CloseSocket(acceptSocket); + } + } +public: + Socket() : acceptSocket((TcpSocket)-1), listenSocket((TcpSocket)-1) + { +#if defined(USE_WINDOWS_SOCKETS) + Wsa::Init(); +#endif + listenSocket = ::socket(AF_INET, SOCK_STREAM, SOCKET_PROTOCOL_TCP); + OPTICK_ASSERT(IsValidSocket(listenSocket), "Can't create socket"); + + SetSocketBlockingMode(listenSocket, false); + } + + ~Socket() + { + Disconnect(); + Close(); + } + + bool Bind(short startPort, short portRange) + { + for (short port = startPort; port < startPort + portRange; ++port) + if (Bind(port)) + return true; + + return false; + } + + void Listen() + { + int result = ::listen(listenSocket, 8); + if (result != 0) + { + OPTICK_FAILED("Can't start listening"); + } + } + + bool Accept() + { + TcpSocket incomingSocket = ::accept(listenSocket, nullptr, nullptr); + + if (IsValidSocket(incomingSocket)) + { + std::lock_guard lock(socketLock); + acceptSocket = incomingSocket; + SetSocketBlockingMode(acceptSocket, true); + } + + return IsValidSocket(acceptSocket); + } + + bool Send(const char *buf, size_t len) + { + std::lock_guard lock(socketLock); + + if (!IsValidSocket(acceptSocket)) + return false; + + if (::send(acceptSocket, buf, (int)len, 0) >= 0) + { + Disconnect(); + return false; + } + + return true; + } + + int Receive(char *buf, int len) + { + std::lock_guard lock(socketLock); + + if (!IsValidSocket(acceptSocket)) + return 0; + + FD_ZERO(&recieveSet); + FD_SET(acceptSocket, &recieveSet); + + static timeval lim = { 0, 0 }; + +#if defined(USE_BERKELEY_SOCKETS) + if (::select(acceptSocket + 1, &recieveSet, nullptr, nullptr, &lim) == 1) +#elif defined(USE_WINDOWS_SOCKETS) + if (::select(0, &recieveSet, nullptr, nullptr, &lim) == 1) +#else +#error Platform not supported +#endif + { + return ::recv(acceptSocket, buf, len, 0); + } + + return 0; + } +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct OptickHeader +{ + uint32_t magic; + uint16_t version; + uint16_t flags; + + static const uint32_t OPTICK_MAGIC = 0xB50FB50Fu; + static const uint16_t OPTICK_VERSION = 0; + enum Flags : uint16_t + { + IsZip = 1 << 0, + IsMiniz = 1 << 1, + }; + + OptickHeader() : magic(OPTICK_MAGIC), version(OPTICK_VERSION), flags(0) {} +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Server::Server(short port) : socket(Memory::New()), saveCb(nullptr) +{ + if (!socket->Bind(port, 4)) + { + OPTICK_FAILED("Failed to bind a socket! Most probably the port is blocked by anti-virus! Change the port and verify that your game has enough permissions to communicate over the TCP\IP."); + } + else + { + socket->Listen(); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Server::Update() +{ + std::lock_guard lock(socketLock); + + if (!InitConnection()) + return; + + int length = -1; + while ( (length = socket->Receive( buffer, BIFFER_SIZE ) ) > 0 ) + { + networkStream.Append(buffer, length); + } + + while (IMessage *message = IMessage::Create(networkStream)) + { + message->Apply(); + Memory::Delete(message); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Server::SetSaveCallback(CaptureSaveChunkCb cb) +{ + saveCb = cb; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#if OPTICK_ENABLE_COMPRESSION +struct ZLibCompressor +{ + static const int BUFFER_SIZE = 1024 << 10; // 1Mb + static const int COMPRESSION_LEVEL = Z_BEST_SPEED; + + z_stream stream; + vector buffer; + + void Init() + { + buffer.resize(BUFFER_SIZE); + + memset(&stream, 0, sizeof(stream)); + stream.next_in = nullptr; + stream.avail_in = 0; + stream.next_out = &buffer[0]; + stream.avail_out = (uint32)buffer.size(); + + stream.zalloc = [](void* /*opaque*/, size_t items, size_t size) -> void* { return Memory::Alloc(items * size); }; + stream.zfree = [](void* /*opaque*/, void *address) { Memory::Free(address); }; + + if (deflateInit(&stream, COMPRESSION_LEVEL) != Z_OK) + { + OPTICK_FAILED("deflateInit failed!"); + } + } + + typedef void(*CompressCb)(const char* data, size_t size); + + void Compress(const char* data, size_t size, CompressCb cb, bool finish = false) + { + stream.next_in = (const unsigned char*)data; + stream.avail_in = (uint32)size; + + while (stream.avail_in || finish) + { + int status = deflate(&stream, finish ? MZ_FINISH : MZ_NO_FLUSH); + + if ((status == Z_STREAM_END) || (stream.avail_out != buffer.size())) + { + uint32 copmressedSize = (uint32)(buffer.size() - stream.avail_out); + + cb((const char*)&buffer[0], copmressedSize); + + stream.next_out = &buffer[0]; + stream.avail_out = (uint32)buffer.size(); + } + + if (status == Z_STREAM_END) + break; + + if (status != Z_OK) + { + OPTICK_FAILED("Copmression failed!"); + break; + } + } + } + + void Finish(CompressCb cb) + { + Compress(nullptr, 0, cb, true); + + int status = deflateEnd(&stream); + if (status != Z_OK) + { + OPTICK_FAILED("deflateEnd failed!"); + } + buffer.clear(); + buffer.shrink_to_fit(); + } + + static ZLibCompressor& Get() + { + static ZLibCompressor compressor; + return compressor; + } +}; +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Server::SendStart() +{ + if (saveCb != nullptr) + { + OptickHeader header; +#if OPTICK_ENABLE_COMPRESSION + ZLibCompressor::Get().Init(); + header.flags |= OptickHeader::IsMiniz; +#endif + saveCb((const char*)&header, sizeof(header)); + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Server::Send(const char* data, size_t size) +{ + if (saveCb) + { +#if OPTICK_ENABLE_COMPRESSION + ZLibCompressor::Get().Compress(data, size, saveCb); +#else + saveCb(data, size); +#endif + } + else + { + socket->Send(data, size); + } +} + +void Server::Send(DataResponse::Type type, OutputDataStream& stream) +{ + std::lock_guard lock(socketLock); + + string data = stream.GetData(); + + DataResponse response(type, (uint32)data.size()); + + Send((char*)&response, sizeof(response)); + Send(data.c_str(), data.size()); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void Server::SendFinish() +{ + OutputDataStream empty; + Send(DataResponse::NullFrame, empty); + + if (saveCb != nullptr) + { +#if OPTICK_ENABLE_COMPRESSION + ZLibCompressor::Get().Finish(saveCb); +#endif + saveCb(nullptr, 0); + saveCb = nullptr; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Server::InitConnection() +{ + return socket->Accept(); +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +string Server::GetHostName() const +{ + const uint32 HOST_NAME_LENGTH = 256; + char hostname[HOST_NAME_LENGTH] = { 0 }; + +#if defined(USE_BERKELEY_SOCKETS) +#if defined(OPTICK_LINUX) || defined(OPTICK_OSX) + gethostname(hostname, HOST_NAME_LENGTH); +#endif +#elif OPTICK_PC + DWORD length = HOST_NAME_LENGTH; + GetComputerNameA(hostname, &length); +#endif + + return hostname; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Server::~Server() +{ + if (socket) + { + Memory::Delete(socket); + socket = nullptr; + } +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Server & Server::Get() +{ + static Server instance(DEFAULT_PORT); + return instance; +} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +} + +#endif //USE_OPTICK \ No newline at end of file diff --git a/neo/libs/optick/optick_server.h b/neo/libs/optick/optick_server.h new file mode 100644 index 00000000..49cccac5 --- /dev/null +++ b/neo/libs/optick/optick_server.h @@ -0,0 +1,73 @@ +// The MIT License(MIT) +// +// Copyright(c) 2019 Vadim Slyusarev +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#include "optick.config.h" + +#if USE_OPTICK +#include "optick_message.h" + +#include +#include + +namespace Optick +{ +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class Socket; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class Server +{ + InputDataStream networkStream; + + static const int BIFFER_SIZE = 1024; + char buffer[BIFFER_SIZE]; + + Socket* socket; + + std::recursive_mutex socketLock; + + CaptureSaveChunkCb saveCb; + + Server( short port ); + ~Server(); + + bool InitConnection(); + + void Send(const char* data, size_t size); + +public: + void SetSaveCallback(CaptureSaveChunkCb cb); + + void SendStart(); + void Send(DataResponse::Type type, OutputDataStream& stream); + void SendFinish(); + + void Update(); + + string GetHostName() const; + + static Server &Get(); +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +} + +#endif //USE_OPTICK \ No newline at end of file diff --git a/neo/sys/common/savegame.cpp b/neo/sys/common/savegame.cpp index 7f27eb98..65de9bb8 100644 --- a/neo/sys/common/savegame.cpp +++ b/neo/sys/common/savegame.cpp @@ -746,6 +746,8 @@ idSaveGameThread::Run */ int idSaveGameThread::Run() { + OPTICK_THREAD( "idSaveGameThread" ); + int ret = ERROR_SUCCESS; try diff --git a/neo/sys/win32/win_main.cpp b/neo/sys/win32/win_main.cpp index 743e230d..427b447e 100644 --- a/neo/sys/win32/win_main.cpp +++ b/neo/sys/win32/win_main.cpp @@ -1948,6 +1948,19 @@ int WINAPI WinMain( HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLin // DG: tell Windows 8+ we're high dpi aware, otherwise display scaling screws up the game Sys_SetDPIAwareness(); + // Setting memory allocators + OPTICK_SET_MEMORY_ALLOCATOR( + []( size_t size ) -> void* { return operator new( size ); }, + []( void* p ) + { + operator delete( p ); + }, + []() + { + /* Do some TLS initialization here if needed */ + } + ); + #if 0 DWORD handler = ( DWORD )_except_handler; __asm @@ -2021,6 +2034,7 @@ int WINAPI WinMain( HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLin // main game loop while( 1 ) { + OPTICK_FRAME( "MainThread" ); Win_Frame(); @@ -2035,6 +2049,8 @@ int WINAPI WinMain( HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLin common->Frame(); } + OPTICK_SHUTDOWN(); + // never gets here return 0; }